Fast Deviance: Microbenchmarks

This vignette compares the fast deviance evaluator (loglik_gamlss_newdata_fast) with the generic route for several common families. Results will vary by machine/BLAS and by R package versions.

Recent releases added optimized shortcuts for additional families (LOGITNO, GEOM, LO, BE) and automatic routing through native gamlss.dist densities for zero-adjusted/zero-inflated variants (e.g., ZAGA, ZIPF, BETA4). Pair this benchmark with check_fast_vs_generic() for numerical validation and fast_vs_generic_ll() for micro-timing comparisons.

What you’ll learn

How to loop over gamlss.dist families and time fast_vs_generic_ll() results.
How to extend the benchmark generator list with custom families or parameter grids.
How to interpret the summary tibble returned by fast_vs_generic_ll() (mean/median ratios, speedups).

library(gamlss)
#> Loading required package: splines
#> Loading required package: gamlss.data
#> 
#> Attaching package: 'gamlss.data'
#> The following object is masked from 'package:datasets':
#> 
#>     sleep
#> Loading required package: gamlss.dist
#> Loading required package: nlme
#> Loading required package: parallel
#>  **********   GAMLSS Version 5.5-0  **********
#> For more on GAMLSS look at https://www.gamlss.com/
#> Type gamlssNews() to see new features/changes/bug fixes.
library(SelectBoost.gamlss)

set.seed(2025)
n <- 5000  # larger n makes the differences clearer

families <- list(
  NO = gamlss.dist::NO(),
  PO = gamlss.dist::PO(),
  LOGNO = gamlss.dist::LOGNO(),
  GA = gamlss.dist::GA(),
  IG = gamlss.dist::IG(),
  LO = gamlss.dist::LO(),
  LOGITNO = gamlss.dist::LOGITNO(),
  GEOM = gamlss.dist::GEOM(),
  BE = gamlss.dist::BE()
)

gen_fun <- list(
  NO = function(n) gamlss.dist::rNO(n, mu = 0, sigma = 1),
  PO = function(n) gamlss.dist::rPO(n, mu = 2.5),
  LOGNO = function(n) gamlss.dist::rLOGNO(n, mu = 0, sigma = 0.6),
  GA = function(n) gamlss.dist::rGA(n, mu = 2, sigma = 0.5),
  IG = function(n) gamlss.dist::rIG(n, mu = 2, sigma = 0.5),
  LO = function(n) gamlss.dist::rLO(n, mu = 0, sigma = 1),
  LOGITNO = function(n) gamlss.dist::rLOGITNO(n, mu = 0, sigma = 1),
  GEOM = function(n) gamlss.dist::rGEOM(n, mu = 3),
  BE = function(n) gamlss.dist::rBE(n, mu = 0.4, sigma = 0.2)
)

bench_one <- function(fname) {
  fam <- families[[fname]]
  gen <- gen_fun[[fname]]
  if (is.null(fam) || is.null(gen)) return(NULL)

  y <- gen(n)
  dat <- data.frame(y = y)

  fit <- try(gamlss::gamlss(y ~ 1, data = dat, family = fam), silent = TRUE)
  if (inherits(fit, "try-error")) return(NULL)

  # ensure predictions are available on newdata (same data is fine)
  fast_vs_generic_ll(fit, newdata = dat, reps = 50)
}

res_list <- lapply(names(families), bench_one)
#> GAMLSS-RS iteration 1: Global Deviance = 14284.31 
#> GAMLSS-RS iteration 2: Global Deviance = 14284.31
#> Warning in microbenchmark::microbenchmark(fast = f_fast(), generic = f_gen(), :
#> less accurate nanosecond times to avoid potential integer overflows
#> GAMLSS-RS iteration 1: Global Deviance = 18412.08 
#> GAMLSS-RS iteration 2: Global Deviance = 18412.08 
#> GAMLSS-RS iteration 1: Global Deviance = 9135.418 
#> GAMLSS-RS iteration 2: Global Deviance = 9135.418 
#> GAMLSS-RS iteration 1: Global Deviance = 13230.48 
#> GAMLSS-RS iteration 2: Global Deviance = 13230.48 
#> GAMLSS-RS iteration 1: Global Deviance = 14685.78 
#> GAMLSS-RS iteration 2: Global Deviance = 14685.78 
#> GAMLSS-RS iteration 1: Global Deviance = 20001.95 
#> GAMLSS-RS iteration 2: Global Deviance = 20001.95 
#> GAMLSS-RS iteration 1: Global Deviance = 22420.06 
#> GAMLSS-RS iteration 1: Global Deviance = -8775.955 
#> GAMLSS-RS iteration 2: Global Deviance = -9082.524 
#> GAMLSS-RS iteration 3: Global Deviance = -9082.579 
#> GAMLSS-RS iteration 4: Global Deviance = -9082.579
names(res_list) <- names(families)
res_list <- Filter(Negate(is.null), res_list)

# Present results
res <- do.call(rbind, lapply(names(res_list), function(nm) {
  df <- res_list[[nm]]
  df$family <- nm
  df
}))

res
#>     method    median unit rel_speed family
#> 1     fast  831.5620   us 0.5549009     NO
#> 2  generic  461.4345   us 1.8021236     NO
#> 3     fast  754.9945   us 1.5799506     PO
#> 4  generic 1192.8540   us 0.6329312     PO
#> 5     fast  890.0690   us 1.1034133  LOGNO
#> 6  generic  982.1140   us 0.9062787  LOGNO
#> 7     fast  888.7775   us 1.0822281     GA
#> 8  generic  961.8600   us 0.9240196     GA
#> 9     fast  874.4480   us 1.0833646     IG
#> 10 generic  947.3460   us 0.9230503     IG
#> 11    fast  858.3965   us 0.5833353     LO
#> 12 generic  500.7330   us 1.7142799     LO
#> 13    fast  549.9125   us 1.7757689   GEOM
#> 14 generic  976.5175   us 0.5631363   GEOM
#> 15    fast 1135.8845   us 0.6736631     BE
#> 16 generic  765.2035   us 1.4844215     BE
if (nrow(res)) {
  # Plot median times by family (if microbenchmark unit)
  if (!is.null(attr(res_list[[1]], "unit"))) {
    # simple barplot: generic vs fast for each family
    op <- par(mfrow=c(1,1), mar=c(8,4,2,1))
    fams <- unique(res$family)
    med_fast <- tapply(res$median[res$method=="fast"], res$family[res$method=="fast"], median)
    med_gen  <- tapply(res$median[res$method=="generic"], res$family[res$method=="generic"], median)
    mat <- rbind(med_gen[fams], med_fast[fams])
    barplot(mat, beside=TRUE, las=2, legend.text = c("generic","fast"),
            main="Median time (microbenchmark units)", ylab=attr(res_list[[1]], "unit"))
    par(op)
  }
}

Frédéric Bertrand

2025-10-27

What you’ll learn