Benchmark study: Modifying data within groups¶
The following makes use of the packages data.table, dplyr, memisc, rbenchmark. You may need to
install them from CRAN using the code
install.packages(c("data.table","dplyr","memisc","rbenchmark"))
if you want to run this on your computer. (The packages are already installed on the notebook container, however.)
library(data.table)
library(memisc)
library(dplyr)
library(rbenchmark)
load("BData.RData")
load("SData.RData")
grouped_modification_benchmark_1 <- benchmark(
within =
within(BDataF,{
X1c <- X1 - ave(X1, a,b,FUN = mean)
X2c2 <- (X2 - ave(X2, a,b,FUN = mean))^2
X3ca <- abs(X3 - ave(X3,a,b,FUN = median))
X4cm <- X4 - ave(X4,a,b,FUN = max)
}),
data.table =
BDataT[,`:=`(X1c = X1 - mean(X1),
X2c2 = (X2 - mean(X2))^2,
X3ca = abs(X3 - median(X3)),
X4cm = X4 - max(X4)),
by = .(a,b)],
`group_by + mutate` =
BDTbl %>% group_by(a,b) %>%
mutate(X1c = X1 - mean(X1),
X2c2 = (X2 - mean(X2))^2,
X3ca = abs(X3 - median(X3)),
X4cm = X4 - max(X4)),
withinGroups =
withinGroups(BDataF, ~a+b, {
X1c <- X1 - mean(X1)
X2c2 <- (X2 - mean(X2))^2
X3ca <- abs(X3 - median(X3))
X4cm <- X4 - max(X4)
}),
columns = c("test","user.self","relative"),
replications = 100,
order = NULL,
relative = "user.self"
)
grouped_modification_benchmark_2 <- benchmark(
within =
within(SDataF,{
X1c <- X1 - ave(X1, a,b,FUN = mean)
X2c2 <- (X2 - ave(X2, a,b,FUN = mean))^2
X3ca <- abs(X3 - ave(X3,a,b,FUN = median))
X4cm <- X4 - ave(X4,a,b,FUN = max)
}),
data.table =
SDataT[,`:=`(X1c = X1 - mean(X1),
X2c2 = (X2 - mean(X2))^2,
X3ca = abs(X3 - median(X3)),
X4cm = X4 - max(X4)),
by = .(a,b)],
`group_by + mutate` =
SDTbl %>% group_by(a,b) %>%
mutate(X1c = X1 - mean(X1),
X2c2 = (X2 - mean(X2))^2,
X3ca = abs(X3 - median(X3)),
X4cm = X4 - max(X4)),
withinGroups =
withinGroups(SDataF, ~a+b, {
X1c <- X1 - mean(X1)
X2c2 <- (X2 - mean(X2))^2
X3ca <- abs(X3 - median(X3))
X4cm <- X4 - max(X4)
}),
columns = c("test","user.self","relative"),
replications = 100,
order = NULL,
relative = "user.self"
)
save(grouped_modification_benchmark_1,
grouped_modification_benchmark_2,
file="grouped-modification-benchmark.RData")
- R file: benchmark-group-data-modification.R
- Rmarkdown file: benchmark-group-data-modification.Rmd
- Jupyter notebook file: benchmark-group-data-modification.ipynb
- Interactive version of the Jupyter notebook (shuts down after 60s):
- Interactive version of the Jupyter notebook (sign in required):