Benchmarking future.apply

The Three Contenders
#

Standard for loop: Manual iteration (pre-allocated).
lapply: The functional, sequential R standard.
future_lapply: The parallelized version.

Experiment 1: The “Cheap” Task
#

In this scenario, we do something very fast: calculating the mean of 1,000 numbers.

n <- 200
data_list <- replicate(n, rnorm(1000), simplify = FALSE)

bench_cheap <- microbenchmark(
  for_loop = {
    res_for <- vector("list", n)
    for(i in 1:n) res_for[[i]] <- mean(data_list[[i]])
  },
  standard_apply = lapply(data_list, mean),
  future_apply   = future_lapply(data_list, mean),
  times = 10
)

# Generate Table
kable(summary(bench_cheap), caption = "Cheap Task Results (milliseconds)")

expr	min	lq	mean	median	uq	max	neval
for_loop	1608.359	1670.975	2190.1166	1903.875	2669.886	3709.897	10
standard_apply	587.176	595.874	947.8083	867.347	1267.916	1548.279	10
future_apply	43543.992	46387.785	84049.1765	99176.992	102294.771	133827.555	10

Cheap Task Results (milliseconds)


# Generate Figure
autoplot(bench_cheap) + labs(title = "Cheap Task: Parallel Overhead is Visible")
#> Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
#> ℹ Please use tidy evaluation idioms with `aes()`.
#> ℹ See also `vignette("ggplot2-in-packages")` for more information.
#> ℹ The deprecated feature was likely used in the microbenchmark package.
#>   Please report the issue at
#>   <https://github.com/joshuaulrich/microbenchmark/issues/>.
#> This warning is displayed once per session.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.

Experiment 2: The “Expensive” Task
#

In this scenario, we simulate “heavy” work by adding a tiny delay (Sys.sleep). This mimics complex statistical modeling or web scraping.

n_heavy <- 20
data_heavy <- replicate(n_heavy, rnorm(10), simplify = FALSE)

# A function that takes 0.1 seconds per call
heavy_func <- function(x) {
  Sys.sleep(0.1)
  mean(x)
}

bench_expensive <- microbenchmark(
  for_loop = {
    res_for <- vector("list", n_heavy)
    for(i in 1:n_heavy) res_for[[i]] <- heavy_func(data_heavy[[i]])
  },
  standard_apply = lapply(data_heavy, heavy_func),
  future_apply   = future_lapply(data_heavy, heavy_func),
  times = 2 # Low iterations because it's slow!
)

# Generate Table
kable(summary(bench_expensive), caption = "Expensive Task Results (seconds)")

expr	min	lq	mean	median	uq	max	neval
for_loop	2010.7866	2010.7866	2014.4950	2014.4950	2018.2033	2018.2033	2
standard_apply	2008.7650	2008.7650	2011.7931	2011.7931	2014.8213	2014.8213	2
future_apply	279.5349	279.5349	292.0067	292.0067	304.4786	304.4786	2

Expensive Task Results (seconds)


# Generate Figure
autoplot(bench_expensive) + labs(title = "Expensive Task: Future Wins Big")

The Three Contenders#

Experiment 1: The “Cheap” Task#

Experiment 2: The “Expensive” Task#

The Three Contenders
#

Experiment 1: The “Cheap” Task
#

Experiment 2: The “Expensive” Task
#