Skip to contents

Overview

This vignette covers essential diagnostics and sensitivity analyses for ML-UMR models:

  1. MCMC Diagnostics: Ensuring reliable posterior samples
  2. Model Fit Assessment: DIC and posterior predictive checks
  3. Prior Sensitivity: Testing robustness to prior specifications
  4. Integration Point Sensitivity: Assessing QMC approximation quality
  5. Covariate Selection: Impact of included covariates

MCMC Diagnostics

Key Diagnostic Metrics

ML-UMR uses Stan’s NUTS sampler. Three essential diagnostics:

  1. R-hat (Gelman-Rubin statistic): Chain convergence
  2. Effective Sample Size (ESS): Number of independent samples
  3. Divergent Transitions: Sampling pathologies

Setup Example Data

set.seed(123)

data <- generate_simulation_data(
  n_index = 200,
  n_comparator = 150,
  n_covariates = 2,
  effect_modification = "none",
  seed = 123
)

ipd <- set_ipd_unanchored(
  data = data$ipd,
  treatment = "treatment",
  outcome = "outcome",
  covariates = c("x1", "x2")
)

agd <- set_agd_unanchored(
  data = data$agd,
  treatment = "treatment",
  outcome_n = "n_total",
  outcome_r = "n_events",
  cov_means = c("x1_mean", "x2_mean"),
  cov_sds = c("x1_sd", "x2_sd")
)

network <- combine_unanchored(ipd, agd)
network <- add_integration_unanchored(
  network,
  n_int = 64,
  x1 = distr(qnorm, mean = x1_mean, sd = x1_sd),
  x2 = distr(qnorm, mean = x2_mean, sd = x2_sd)
)
#> Computing correlation matrix from IPD...
#> Added 64 integration points for AgD

Checking Convergence

# Fit model
fit <- mlumr(
  data = network,
  spfa = TRUE,
  iter_warmup = 1000,
  iter_sampling = 2000,
  chains = 4,
  seed = 123
)

# View diagnostics summary
summary(fit)

# Key checks:
# 1. R-hat < 1.05 for all parameters
# 2. ESS_bulk > 400 for all parameters
# 3. ESS_tail > 400 for all parameters
# 4. No divergent transitions

R-hat Interpretation

R-hat Interpretation
< 1.01 Excellent convergence
1.01 - 1.05 Acceptable
1.05 - 1.1 Marginal (run longer chains)
> 1.1 Poor convergence (do not use results)
# Extract parameters with high R-hat
params <- extract_parameters(fit)
problematic <- params[params$rhat > 1.05, ]

if (nrow(problematic) > 0) {
  warning("Parameters with R-hat > 1.05:")
  print(problematic)
}

ESS Interpretation

ESS Interpretation
> 400 Adequate for reliable inference
100 - 400 Marginal (estimates may be noisy)
< 100 Insufficient (run longer chains)
# Check ESS
low_ess <- params[params$ess_bulk < 400, ]

if (nrow(low_ess) > 0) {
  warning("Parameters with ESS < 400:")
  print(low_ess)
}

Divergent Transitions

Divergences indicate the sampler struggled with posterior geometry:

# Check for divergences
fit$diagnostics$num_divergent

# If divergences > 0, try:
# 1. Increase adapt_delta (e.g., 0.99)
# 2. Use more informative priors
# 3. Check data for issues

fit_safe <- mlumr(
  data = network,
  spfa = TRUE,
  adapt_delta = 0.99,  # More careful sampling
  max_treedepth = 15,
  seed = 123
)

Visual Diagnostics

# Trace plots - should look like "fuzzy caterpillars"
plot_diagnostics(fit, pars = c("mu_index", "mu_comparator"), type = "trace")

# Density plots - chains should overlap
plot_diagnostics(fit, pars = c("mu_index", "mu_comparator"), type = "density")

# R-hat visualization
plot_diagnostics(fit, pars = c("mu_index", "mu_comparator", "beta[1]", "beta[2]"),
                type = "rhat")

# ESS visualization
plot_diagnostics(fit, pars = c("mu_index", "mu_comparator", "beta[1]", "beta[2]"),
                type = "ess")

Model Fit Assessment

Deviance Information Criterion (DIC)

DIC balances model fit and complexity:

\text{DIC} = \bar{D} + p_D

Where: - \bar{D} = Posterior mean deviance (fit) - p_D = Effective number of parameters (complexity)

# Calculate DIC
dic <- calculate_dic(fit)
print(dic)

# Interpretation:
# - Lower DIC = better model
# - Delta DIC > 5 = meaningful difference
# - pD indicates model complexity

Model Comparison

fit_spfa <- mlumr(network, spfa = TRUE, seed = 123)
fit_relaxed <- mlumr(network, spfa = FALSE, seed = 123)

# Compare models
comparison <- compare_models(fit_spfa, fit_relaxed)

# Look for:
# - Which model has lower DIC
# - Size of Delta_DIC
# - Difference in pD

Posterior Predictive Checks

Check if model predictions match observed data:

# Get predictions
preds <- predict(fit, population = "index", treatment = "index")

# Compare to observed
observed_rate <- sum(data$ipd$outcome) / nrow(data$ipd)
predicted_rate <- preds$mean[1]

cat("Observed event rate:", round(observed_rate, 3), "\n")
cat("Predicted event rate:", round(predicted_rate, 3), "\n")
cat("Difference:", round(observed_rate - predicted_rate, 3), "\n")

Prior Sensitivity Analysis

Why Prior Sensitivity Matters

In Bayesian analysis, priors can influence results, especially with: - Small sample sizes - Weak data signal - Many parameters (relaxed model)

Default Priors

ML-UMR uses weakly informative defaults: - Intercepts: N(0, 10) - Regression coefficients: N(0, 10)

Testing Prior Sensitivity

# 1. Default (weakly informative)
fit_default <- mlumr(
  network,
  spfa = TRUE,
  prior_intercept = prior_normal(0, 10),
  prior_beta = prior_normal(0, 10),
  seed = 123
)

# 2. More informative
fit_narrow <- mlumr(
  network,
  spfa = TRUE,
  prior_intercept = prior_normal(0, 2.5),
  prior_beta = prior_normal(0, 2.5),
  seed = 123
)

# 3. Less informative
fit_wide <- mlumr(
  network,
  spfa = TRUE,
  prior_intercept = prior_normal(0, 20),
  prior_beta = prior_normal(0, 20),
  seed = 123
)

# Compare results
results <- data.frame(
  Prior = c("Narrow (SD=2.5)", "Default (SD=10)", "Wide (SD=20)"),
  LOR_index_mean = c(
    marginal_effects(fit_narrow, population = "index")$mean[1],
    marginal_effects(fit_default, population = "index")$mean[1],
    marginal_effects(fit_wide, population = "index")$mean[1]
  ),
  LOR_index_sd = c(
    marginal_effects(fit_narrow, population = "index")$sd[1],
    marginal_effects(fit_default, population = "index")$sd[1],
    marginal_effects(fit_wide, population = "index")$sd[1]
  )
)

print(results)

# Results should be similar if data is informative
# Large differences suggest prior sensitivity

Prior Recommendations

Scenario Intercept Prior Beta Prior
Default N(0, 10) N(0, 10)
Strong prior knowledge N(μ, 2) N(0, 1)
Regulatory/HTA N(0, 10) N(0, 2.5)
Exploratory N(0, 20) N(0, 10)

Integration Point Sensitivity

Why Number of Integration Points Matters

QMC integration approximates the AgD likelihood. More points = better approximation but: - Slower computation - Diminishing returns beyond a certain point

Testing Integration Sensitivity

# Test with different numbers of integration points
n_int_values <- c(32, 64, 128, 256)

results_int <- lapply(n_int_values, function(n) {
  # Add integration points
  network_temp <- combine_unanchored(ipd, agd)
  network_temp <- add_integration_unanchored(
    network_temp,
    n_int = n,
    x1 = distr(qnorm, mean = x1_mean, sd = x1_sd),
    x2 = distr(qnorm, mean = x2_mean, sd = x2_sd)
  )

  # Fit model
  fit_temp <- mlumr(network_temp, spfa = TRUE, seed = 123,
                   iter_warmup = 500, iter_sampling = 1000)

  # Extract LOR
  eff <- marginal_effects(fit_temp, population = "comparator", effect_type = "lor")

  data.frame(
    n_int = n,
    lor_mean = eff$mean[1],
    lor_sd = eff$sd[1]
  )
})

results_int_df <- do.call(rbind, results_int)
print(results_int_df)

# Results should stabilize as n_int increases
# If results change substantially, use more integration points

Integration Point Recommendations

Number of Covariates Recommended n_int
1-2 64
3-4 100-128
5+ 256+

Covariate Selection Sensitivity

Impact of Covariate Choice

Including/excluding covariates affects: - Bias adjustment - Precision of estimates - SPFA assumption implications

Testing Covariate Sensitivity

# Generate data with 3 covariates
data_3cov <- generate_simulation_data(
  n_index = 200,
  n_comparator = 150,
  n_covariates = 3,
  effect_modification = "none",
  seed = 123
)

# Scenario 1: All covariates
ipd_full <- set_ipd_unanchored(data_3cov$ipd, "treatment", "outcome",
                              c("x1", "x2", "x3"))
agd_full <- set_agd_unanchored(data_3cov$agd, "treatment",
                              outcome_n = "n_total", outcome_r = "n_events",
                              cov_means = c("x1_mean", "x2_mean", "x3_mean"),
                              cov_sds = c("x1_sd", "x2_sd", "x3_sd"))
network_full <- combine_unanchored(ipd_full, agd_full)
# ... add integration and fit

# Scenario 2: Subset of covariates
ipd_sub <- set_ipd_unanchored(data_3cov$ipd, "treatment", "outcome",
                             c("x1", "x2"))  # Omit x3
agd_sub <- set_agd_unanchored(data_3cov$agd, "treatment",
                             outcome_n = "n_total", outcome_r = "n_events",
                             cov_means = c("x1_mean", "x2_mean"),
                             cov_sds = c("x1_sd", "x2_sd"))
network_sub <- combine_unanchored(ipd_sub, agd_sub)
# ... add integration and fit

# Compare results
# If omitting x3 substantially changes estimates, it's an important covariate

Covariate Selection Guidelines

  1. Include covariates that:
    • Are prognostic for the outcome
    • Differ between populations
    • Have biological plausibility
    • Are available in both IPD and AgD
  2. Exclude covariates that:
    • Have too much missing data
    • Are highly collinear with others
    • Are not reported in AgD
  3. Report sensitivity to covariate choice

Minimum Checks

# 1. Fit model
fit <- mlumr(network, spfa = TRUE, seed = 123)

# 2. Check convergence
summary(fit)  # R-hat < 1.05, ESS > 400

# 3. Check for divergences
fit$diagnostics$num_divergent  # Should be 0

# 4. Visual check
plot_diagnostics(fit, pars = c("mu_index", "mu_comparator"), type = "trace")

Comprehensive Checks

# 1. Basic convergence checks (above)

# 2. Prior sensitivity
fit_narrow <- mlumr(network, spfa = TRUE, prior_beta = prior_normal(0, 2.5))
fit_wide <- mlumr(network, spfa = TRUE, prior_beta = prior_normal(0, 20))
# Compare results

# 3. SPFA testing
fit_spfa <- mlumr(network, spfa = TRUE)
fit_relaxed <- mlumr(network, spfa = FALSE)
compare_models(fit_spfa, fit_relaxed)

# 4. Integration sensitivity (if uncertain)
# Test n_int = 64, 128, 256

# 5. Document and report
cat("Diagnostic Summary:\n")
cat("- Convergence: All R-hat <", max(fit$summary$rhat), "\n")
cat("- ESS: Minimum bulk ESS =", min(fit$summary$ess_bulk), "\n")
cat("- Divergences:", fit$diagnostics$num_divergent, "\n")
cat("- Prior sensitivity: [stable/moderate/high]\n")
cat("- SPFA: [supported/uncertain/rejected] based on DIC\n")

Troubleshooting Common Issues

Issue 1: High R-hat

Symptoms: R-hat > 1.1

Solutions:

# 1. Run longer chains
fit <- mlumr(..., iter_warmup = 2000, iter_sampling = 4000)

# 2. Use more chains
fit <- mlumr(..., chains = 8)

# 3. Check for multimodality (multiple peaks in posterior)

Issue 2: Low ESS

Symptoms: ESS < 100

Solutions:

# 1. Run longer chains
# 2. Increase adapt_delta
fit <- mlumr(..., adapt_delta = 0.95)

# 3. Check for highly correlated parameters

Issue 3: Divergences

Symptoms: num_divergent > 0

Solutions:

# 1. Increase adapt_delta
fit <- mlumr(..., adapt_delta = 0.99)

# 2. Use more informative priors
fit <- mlumr(..., prior_beta = prior_normal(0, 2.5))

# 3. Reparameterize or simplify model

Issue 4: Prior Sensitivity

Symptoms: Results change substantially with different priors

Solutions: - Report results under multiple prior specifications - Use external information to justify prior choice - Increase sample size if possible - Acknowledge limitation in discussion

Summary Checklist

Before reporting ML-UMR results, verify:

Session Information

sessionInfo()
#> R version 4.5.2 (2025-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.3 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] ggplot2_4.0.2 dplyr_1.2.0   mlumr_0.1.0  
#> 
#> loaded via a namespace (and not attached):
#>  [1] Matrix_1.7-4        gtable_0.3.6        jsonlite_2.0.0     
#>  [4] compiler_4.5.2      tidyselect_1.2.1    pspline_1.0-21     
#>  [7] jquerylib_0.1.4     systemfonts_1.3.1   scales_1.4.0       
#> [10] textshaping_1.0.4   yaml_2.3.12         fastmap_1.2.0      
#> [13] lattice_0.22-7      R6_2.6.1            generics_0.1.4     
#> [16] pcaPP_2.0-5         knitr_1.51          rngWELL_0.10-10    
#> [19] randtoolbox_2.0.5   tibble_3.3.1        desc_1.4.3         
#> [22] bslib_0.10.0        pillar_1.11.1       RColorBrewer_1.1-3 
#> [25] rlang_1.1.7         stabledist_0.7-2    ADGofTest_0.3      
#> [28] gsl_2.1-9           cachem_1.1.0        xfun_0.56          
#> [31] fs_1.6.6            sass_0.4.10         S7_0.2.1           
#> [34] cli_3.6.5           pkgdown_2.2.0       withr_3.0.2        
#> [37] magrittr_2.0.4      digest_0.6.39       grid_4.5.2         
#> [40] mvtnorm_1.3-3       copula_1.1-6        lifecycle_1.0.5    
#> [43] vctrs_0.7.1         evaluate_1.0.5      glue_1.8.0         
#> [46] numDeriv_2016.8-1.1 farver_2.1.2        ragg_1.5.0         
#> [49] stats4_4.5.2        rmarkdown_2.30      purrr_1.2.1        
#> [52] tools_4.5.2         pkgconfig_2.0.3     htmltools_0.5.9