Fast Deviance: Microbenchmarks
Frédéric Bertrand
Cedric, Cnam, Parisfrederic.bertrand@lecnam.net
2025-10-27
Source:vignettes/fast-deviance-benchmark.Rmd
fast-deviance-benchmark.RmdThis vignette compares the fast deviance evaluator
(loglik_gamlss_newdata_fast) with the generic route for
several common families. Results will vary by machine/BLAS and by R
package versions.
Recent releases added optimized shortcuts for additional families
(LOGITNO, GEOM, LO,
BE) and automatic routing through native
gamlss.dist densities for zero-adjusted/zero-inflated
variants (e.g., ZAGA, ZIPF,
BETA4). Pair this benchmark with
check_fast_vs_generic() for numerical validation and
fast_vs_generic_ll() for micro-timing comparisons.
What you’ll learn
- How to loop over
gamlss.distfamilies and timefast_vs_generic_ll()results. - How to extend the benchmark generator list with custom families or parameter grids.
- How to interpret the summary tibble returned by
fast_vs_generic_ll()(mean/median ratios, speedups).
library(gamlss)
#> Loading required package: splines
#> Loading required package: gamlss.data
#>
#> Attaching package: 'gamlss.data'
#> The following object is masked from 'package:datasets':
#>
#> sleep
#> Loading required package: gamlss.dist
#> Loading required package: nlme
#> Loading required package: parallel
#> ********** GAMLSS Version 5.5-0 **********
#> For more on GAMLSS look at https://www.gamlss.com/
#> Type gamlssNews() to see new features/changes/bug fixes.
library(SelectBoost.gamlss)
set.seed(2025)
n <- 5000 # larger n makes the differences clearer
families <- list(
NO = gamlss.dist::NO(),
PO = gamlss.dist::PO(),
LOGNO = gamlss.dist::LOGNO(),
GA = gamlss.dist::GA(),
IG = gamlss.dist::IG(),
LO = gamlss.dist::LO(),
LOGITNO = gamlss.dist::LOGITNO(),
GEOM = gamlss.dist::GEOM(),
BE = gamlss.dist::BE()
)
gen_fun <- list(
NO = function(n) gamlss.dist::rNO(n, mu = 0, sigma = 1),
PO = function(n) gamlss.dist::rPO(n, mu = 2.5),
LOGNO = function(n) gamlss.dist::rLOGNO(n, mu = 0, sigma = 0.6),
GA = function(n) gamlss.dist::rGA(n, mu = 2, sigma = 0.5),
IG = function(n) gamlss.dist::rIG(n, mu = 2, sigma = 0.5),
LO = function(n) gamlss.dist::rLO(n, mu = 0, sigma = 1),
LOGITNO = function(n) gamlss.dist::rLOGITNO(n, mu = 0, sigma = 1),
GEOM = function(n) gamlss.dist::rGEOM(n, mu = 3),
BE = function(n) gamlss.dist::rBE(n, mu = 0.4, sigma = 0.2)
)
bench_one <- function(fname) {
fam <- families[[fname]]
gen <- gen_fun[[fname]]
if (is.null(fam) || is.null(gen)) return(NULL)
y <- gen(n)
dat <- data.frame(y = y)
fit <- try(gamlss::gamlss(y ~ 1, data = dat, family = fam), silent = TRUE)
if (inherits(fit, "try-error")) return(NULL)
# ensure predictions are available on newdata (same data is fine)
fast_vs_generic_ll(fit, newdata = dat, reps = 50)
}
res_list <- lapply(names(families), bench_one)
#> GAMLSS-RS iteration 1: Global Deviance = 14284.31
#> GAMLSS-RS iteration 2: Global Deviance = 14284.31
#> Warning in microbenchmark::microbenchmark(fast = f_fast(), generic = f_gen(), :
#> less accurate nanosecond times to avoid potential integer overflows
#> GAMLSS-RS iteration 1: Global Deviance = 18412.08
#> GAMLSS-RS iteration 2: Global Deviance = 18412.08
#> GAMLSS-RS iteration 1: Global Deviance = 9135.418
#> GAMLSS-RS iteration 2: Global Deviance = 9135.418
#> GAMLSS-RS iteration 1: Global Deviance = 13230.48
#> GAMLSS-RS iteration 2: Global Deviance = 13230.48
#> GAMLSS-RS iteration 1: Global Deviance = 14685.78
#> GAMLSS-RS iteration 2: Global Deviance = 14685.78
#> GAMLSS-RS iteration 1: Global Deviance = 20001.95
#> GAMLSS-RS iteration 2: Global Deviance = 20001.95
#> GAMLSS-RS iteration 1: Global Deviance = 22420.06
#> GAMLSS-RS iteration 1: Global Deviance = -8775.955
#> GAMLSS-RS iteration 2: Global Deviance = -9082.524
#> GAMLSS-RS iteration 3: Global Deviance = -9082.579
#> GAMLSS-RS iteration 4: Global Deviance = -9082.579
names(res_list) <- names(families)
res_list <- Filter(Negate(is.null), res_list)
# Present results
res <- do.call(rbind, lapply(names(res_list), function(nm) {
df <- res_list[[nm]]
df$family <- nm
df
}))
res
#> method median unit rel_speed family
#> 1 fast 831.5620 us 0.5549009 NO
#> 2 generic 461.4345 us 1.8021236 NO
#> 3 fast 754.9945 us 1.5799506 PO
#> 4 generic 1192.8540 us 0.6329312 PO
#> 5 fast 890.0690 us 1.1034133 LOGNO
#> 6 generic 982.1140 us 0.9062787 LOGNO
#> 7 fast 888.7775 us 1.0822281 GA
#> 8 generic 961.8600 us 0.9240196 GA
#> 9 fast 874.4480 us 1.0833646 IG
#> 10 generic 947.3460 us 0.9230503 IG
#> 11 fast 858.3965 us 0.5833353 LO
#> 12 generic 500.7330 us 1.7142799 LO
#> 13 fast 549.9125 us 1.7757689 GEOM
#> 14 generic 976.5175 us 0.5631363 GEOM
#> 15 fast 1135.8845 us 0.6736631 BE
#> 16 generic 765.2035 us 1.4844215 BE
if (nrow(res)) {
# Plot median times by family (if microbenchmark unit)
if (!is.null(attr(res_list[[1]], "unit"))) {
# simple barplot: generic vs fast for each family
op <- par(mfrow=c(1,1), mar=c(8,4,2,1))
fams <- unique(res$family)
med_fast <- tapply(res$median[res$method=="fast"], res$family[res$method=="fast"], median)
med_gen <- tapply(res$median[res$method=="generic"], res$family[res$method=="generic"], median)
mat <- rbind(med_gen[fams], med_fast[fams])
barplot(mat, beside=TRUE, las=2, legend.text = c("generic","fast"),
main="Median time (microbenchmark units)", ylab=attr(res_list[[1]], "unit"))
par(op)
}
}