Summary of the benchmark study¶
The following makes use of the packages data.table, dplyr, memisc, and rbenchmark. You may need to install these packages from CRAN by calling install.packages(c("data.table","dplyr","memisc","rbenchmark"))
if you want to run this on your computer. (The packages are already installed
on the notebook container, however.)
library(data.table)
library(dplyr)
library(memisc)
library(rbenchmark)
bench_matrix <- function(x){
rn <- x$test
x <- as.matrix(x[,-1])
rownames(x) <- rn
x
}
load("grouped-summary-benchmark.RData")
grouped_summary_benchmark_1 <- bench_matrix(grouped_summary_benchmark_1)
grouped_summary_benchmark_2 <- bench_matrix(grouped_summary_benchmark_2)
grouped_summary_benchmark <- memisc::collect(
"`Big data'" = grouped_summary_benchmark_1,
"`Survey data'" = grouped_summary_benchmark_2)
grouped_summary_benchmark <- grouped_summary_benchmark[-5,,]
colnames(grouped_summary_benchmark) <- c("abs.","rel.")
names(dimnames(grouped_summary_benchmark)) <- c("Method","Timing","Data")
options(jupyter.rich_display=TRUE)
ftable(grouped_summary_benchmark,col.vars=3:2) %>% memisc::show_html(digits=2)
Data: | `Big data' | `Survey data' | |||||||||||
Method | Timing: | abs. | rel. | abs. | rel. | ||||||||
aggregate | 54 | . | 46 | 11 | . | 96 | 0 | . | 55 | 4 | . | 55 | |
with + tapply | 4 | . | 55 | 1 | . | 00 | 0 | . | 12 | 1 | . | 00 | |
data.table | 17 | . | 04 | 3 | . | 74 | 0 | . | 89 | 7 | . | 29 | |
group_by + summarize | 14 | . | 00 | 3 | . | 08 | 0 | . | 36 | 2 | . | 97 | |
withGroups | 22 | . | 70 | 4 | . | 99 | 1 | . | 41 | 11 | . | 56 |
load("grouped-modification-benchmark.RData")
grouped_modification_benchmark_1 <- bench_matrix(grouped_modification_benchmark_1)
grouped_modification_benchmark_2 <- bench_matrix(grouped_modification_benchmark_2)
grouped_modification_benchmark <- collect(
"`Big data'" = grouped_modification_benchmark_1,
"`Survey data'" = grouped_modification_benchmark_2)
colnames(grouped_modification_benchmark) <- c("abs.","rel.")
names(dimnames(grouped_modification_benchmark)) <- c("Method","Timing","Data")
ftable(grouped_modification_benchmark,col.vars=3:2) %>% memisc::show_html(digits=2)
Data: | `Big data' | `Survey data' | |||||||||||
Method | Timing: | abs. | rel. | abs. | rel. | ||||||||
within | 26 | . | 91 | 1 | . | 08 | 2 | . | 37 | 1 | . | 58 | |
data.table | 24 | . | 85 | 1 | . | 00 | 2 | . | 66 | 1 | . | 77 | |
group_by + mutate | 27 | . | 18 | 1 | . | 09 | 3 | . | 26 | 2 | . | 17 | |
withinGroups | 33 | . | 94 | 1 | . | 37 | 1 | . | 50 | 1 | . | 00 |
- R file: comparison-summary-tables.R
- Rmarkdown file: comparison-summary-tables.Rmd
- Jupyter notebook file: comparison-summary-tables.ipynb
- Interactive version of the Jupyter notebook (shuts down after 60s):
- Interactive version of the Jupyter notebook (sign in required):