Summary of the benchmark study

The following makes use of the packages data.table, dplyr, memisc, and rbenchmark. You may need to install these packages from CRAN by calling install.packages(c("data.table","dplyr","memisc","rbenchmark")) if you want to run this on your computer. (The packages are already installed on the notebook container, however.)

library(data.table)
library(dplyr)
library(memisc)
library(rbenchmark)
bench_matrix <- function(x){
    rn <- x$test
    x <- as.matrix(x[,-1])
    rownames(x) <- rn
    x
}
load("grouped-summary-benchmark.RData")
grouped_summary_benchmark_1 <- bench_matrix(grouped_summary_benchmark_1)
grouped_summary_benchmark_2 <- bench_matrix(grouped_summary_benchmark_2)
grouped_summary_benchmark <- memisc::collect(
    "`Big data'"    = grouped_summary_benchmark_1,
    "`Survey data'" = grouped_summary_benchmark_2)
grouped_summary_benchmark <- grouped_summary_benchmark[-5,,]
colnames(grouped_summary_benchmark) <- c("abs.","rel.")
names(dimnames(grouped_summary_benchmark)) <- c("Method","Timing","Data")
options(jupyter.rich_display=TRUE)
ftable(grouped_summary_benchmark,col.vars=3:2) %>% memisc::show_html(digits=2)
Data: `Big data' `Survey data'
Method Timing: abs. rel. abs. rel.
aggregate 54 . 46 11 . 96 0 . 55 4 . 55
with + tapply 4 . 55 1 . 00 0 . 12 1 . 00
data.table 17 . 04 3 . 74 0 . 89 7 . 29
group_by + summarize 14 . 00 3 . 08 0 . 36 2 . 97
withGroups 22 . 70 4 . 99 1 . 41 11 . 56
load("grouped-modification-benchmark.RData")

grouped_modification_benchmark_1 <- bench_matrix(grouped_modification_benchmark_1)
grouped_modification_benchmark_2 <- bench_matrix(grouped_modification_benchmark_2)

grouped_modification_benchmark <- collect(
    "`Big data'"    = grouped_modification_benchmark_1,
    "`Survey data'" = grouped_modification_benchmark_2)
colnames(grouped_modification_benchmark) <- c("abs.","rel.")
names(dimnames(grouped_modification_benchmark)) <- c("Method","Timing","Data")
ftable(grouped_modification_benchmark,col.vars=3:2) %>% memisc::show_html(digits=2)
Data: `Big data' `Survey data'
Method Timing: abs. rel. abs. rel.
within 26 . 91 1 . 08 2 . 37 1 . 58
data.table 24 . 85 1 . 00 2 . 66 1 . 77
group_by + mutate 27 . 18 1 . 09 3 . 26 2 . 17
withinGroups 33 . 94 1 . 37 1 . 50 1 . 00