diff --git a/R/data.R b/R/data.R index 2c11ee2..9631b79 100644 --- a/R/data.R +++ b/R/data.R @@ -171,3 +171,38 @@ #' hist(prior_predictive$chain_size) #' } "prior_predictive" + + +#' High-level performance results in simulations +#' +#' Analyses of a grid (over R, k, and number of chains) of datasets. +#' Point (posterior median, maximum likelihood) and interval (95% credible interval, +#' 95% confidence interval) estimates from both Bayesian and maximum likelihood analyses. +#' +#' @docType data +#' @usage +#' data(sim_based_testing) +#' @format data.frame with true values and estimates. +#' \describe{ +#' \item{index}{Simulation index for replicate simulations with shared (R, k, n_chains).} +#' \item{n_chains}{The number of chains in the simulated dataset.} +#' \item{r_true}{The true simulating value of R.} +#' \item{k_true}{The true simulating value of k.} +#' \item{r_point}{The posterior median or maximum likelihood value of R.} +#' \item{k_point}{The posterior median or maximum likelihood value of k.} +#' \item{r_low}{The lower bound of a 95% credible of confidence interval for R.} +#' \item{r_high}{The upper bound of a 95% credible of confidence interval for R.} +#' \item{k_low}{The lower bound of a 95% credible of confidence interval for k.} +#' \item{k_high}{The upper bound of a 95% credible of confidence interval for k.} +#' \item{estimator}{Type of analysis, bayes (Bayesian) or maxlik (maximum likelihood).} +#' } +#' @keywords datasets +#' @source Generated in R using nbbp. +#' +#' @examples +#' \dontrun{ +#' data(sbc_quants) +#' plot(sim_based_testing$r_true, sim_based_testing$r_point) +#' abline(a = 0, b = 1) +#' } +"sim_based_testing" diff --git a/data-raw/README.md b/data-raw/README.md new file mode 100644 index 0000000..72cc265 --- /dev/null +++ b/data-raw/README.md @@ -0,0 +1,10 @@ +# Generation of package data + +## `sim-study` +⚠️ Multi-step procedure; requires `make` and `timeout`. + +Simulates 100 datasets each on a predefined grid of R, k, and chain sizes. +Analyzes each with `nbbp::fit_nbbp_homogenous_bayes` and `fit_nbbp_homogenous_ml`, stores point and interval estimates into package data `sim_based_testing`. + +To run the `make`-based workflow, from inside `cd data-raw/sim-study` run `make all` to perform all simulations and fits. +After running `make`, from the top level of the repo, `data-raw/sim-study/src/gather_results/R` can be run to produce the package data. diff --git a/data-raw/sim-study/.gitignore b/data-raw/sim-study/.gitignore new file mode 100644 index 0000000..2211df6 --- /dev/null +++ b/data-raw/sim-study/.gitignore @@ -0,0 +1 @@ +*.txt diff --git a/data-raw/sim-study/Makefile b/data-raw/sim-study/Makefile new file mode 100644 index 0000000..9ce0b6f --- /dev/null +++ b/data-raw/sim-study/Makefile @@ -0,0 +1,54 @@ +rvec := 0.1 0.25 0.5 0.75 0.95 1.0 1.05 1.25 2.0 +kvec := 0.1111111 0.3333333 1.0000000 3.0000000 +nvec := 10 20 40 +ivec := 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 + +.SILENT: k.txt r.txt n.txt i.txt clean +.PHONY: simulate bayes maxlik check_timeout + +all: bayes maxlik + +maxlik: i.txt n.txt r.txt k.txt simulate + src/maxlik.sh + +bayes: i.txt n.txt r.txt k.txt simulate + src/bayes.sh + +# timeout is a downstream dependency but there's no point simulating if we can't analyze +simulate: i.txt n.txt r.txt k.txt check_timeout + src/simulate.sh + +i.txt: + printf "%s\n" $(ivec) > par/i.txt + +n.txt: + printf "%s\n" $(nvec) > par/n.txt + +r.txt: + printf "%s\n" $(rvec) > par/r.txt + +k.txt: + printf "%s\n" $(kvec) > par/k.txt + +# Makes sure timeout command exists +check_timeout: + @which timeout > which_timeout.txt + +clean: + rm which_timeout.txt + rm -f i.txt + rm -f n.txt + rm -f r.txt + rm -f k.txt + rm -rf par + rm -rf data + rm -rf bayes + rm -rf maxlik + mkdir par + mkdir data + mkdir bayes + mkdir maxlik + cat < par/.placeholder + cat < data/.placeholder + cat < bayes/.placeholder + cat < maxlik/.placeholder diff --git a/data-raw/sim-study/bayes/.placeholder b/data-raw/sim-study/bayes/.placeholder new file mode 100644 index 0000000..e69de29 diff --git a/data-raw/sim-study/data/.placeholder b/data-raw/sim-study/data/.placeholder new file mode 100644 index 0000000..e69de29 diff --git a/data-raw/sim-study/maxlik/.placeholder b/data-raw/sim-study/maxlik/.placeholder new file mode 100644 index 0000000..e69de29 diff --git a/data-raw/sim-study/par/.placeholder b/data-raw/sim-study/par/.placeholder new file mode 100644 index 0000000..e69de29 diff --git a/data-raw/sim-study/src/bayes.sh b/data-raw/sim-study/src/bayes.sh new file mode 100755 index 0000000..b08601c --- /dev/null +++ b/data-raw/sim-study/src/bayes.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +IFS1=$'\n' read -d '' -r -a ivec < par/i.txt +IFS2=$'\n' read -d '' -r -a nvec < par/n.txt +IFS3=$'\n' read -d '' -r -a rvec < par/r.txt +IFS4=$'\n' read -d '' -r -a kvec < par/k.txt + +seed=0 +for r in "${rvec[@]}"; do + for k in "${kvec[@]}"; do + offset=0 + for i in "${ivec[@]}"; do + for n in "${nvec[@]}"; do + timeout 10s Rscript --vanilla -e "source(\"src/run_analysis.R\"); fit_bayes(${offset}, ${n}, \"data/r${r}_k${k}.txt\", \"bayes/i${i}_n${n}_r${r}_k${k}.txt\", ${seed})" + seed=$((seed+1)) + offset=$((offset+n)) + done + done + done +done diff --git a/data-raw/sim-study/src/gather_results.R b/data-raw/sim-study/src/gather_results.R new file mode 100644 index 0000000..f0ba742 --- /dev/null +++ b/data-raw/sim-study/src/gather_results.R @@ -0,0 +1,40 @@ +parse_par <- function(fp) { + true_par <- gsub(".txt", "", basename(fp), fixed = TRUE) + true_par <- strsplit(true_par, "_", fixed = TRUE)[[1]] + true_par <- gsub("[[:alpha:]]", "", true_par) + return(data.frame( + index = as.integer(true_par[1]), + n_chains = as.integer(true_par[2]), + r_true = as.numeric(true_par[3]), + k_true = as.numeric(true_par[4]) + )) +} + +bayes <- lapply( + list.files("data-raw/sim-study/bayes", full.names = TRUE), + function(fp) { + true_par <- parse_par(fp) + est <- read.table(fp, header = FALSE, stringsAsFactors = FALSE, row.names = 1) |> t() + res <- cbind(true_par, est, data.frame(estimator = "bayes")) + row.names(res) <- NULL + return(res) + } +) + +ml <- lapply( + list.files("data-raw/sim-study/maxlik", full.names = TRUE), + function(fp) { + true_par <- parse_par(fp) + est <- read.table(fp, header = FALSE, stringsAsFactors = FALSE, row.names = 1) |> t() + res <- cbind(true_par, est, data.frame(estimator = "maxlik")) + row.names(res) <- NULL + return(res) + } +) + +sim_based_testing <- rbind(do.call(rbind, bayes), do.call(rbind, ml)) + +sim_unique <- sim_based_testing |> dplyr::distinct() +stopifnot("Simulation duplicates found" = dim(sim_unique) == dim(sim_based_testing)) + +usethis::use_data(sim_based_testing, overwrite = TRUE) diff --git a/data-raw/sim-study/src/maxlik.sh b/data-raw/sim-study/src/maxlik.sh new file mode 100755 index 0000000..99fc13e --- /dev/null +++ b/data-raw/sim-study/src/maxlik.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +IFS1=$'\n' read -d '' -r -a ivec < par/i.txt +IFS2=$'\n' read -d '' -r -a nvec < par/n.txt +IFS3=$'\n' read -d '' -r -a rvec < par/r.txt +IFS4=$'\n' read -d '' -r -a kvec < par/k.txt + +seed=0 +for r in "${rvec[@]}"; do + for k in "${kvec[@]}"; do + offset=0 + for i in "${ivec[@]}"; do + for n in "${nvec[@]}"; do + timeout 30s Rscript --vanilla -e "source(\"src/run_analysis.R\"); fit_ml(${offset}, ${n}, \"data/r${r}_k${k}.txt\", \"maxlik/i${i}_n${n}_r${r}_k${k}.txt\", ${seed})" + seed=$((seed+1)) + offset=$((offset+n)) + done + done + done +done diff --git a/data-raw/sim-study/src/run_analysis.R b/data-raw/sim-study/src/run_analysis.R new file mode 100644 index 0000000..f2d6a46 --- /dev/null +++ b/data-raw/sim-study/src/run_analysis.R @@ -0,0 +1,84 @@ +parse_chains <- function(datapath, offset, nchains) { + all_chains <- scan(datapath, what = numeric()) + stopifnot(offset >= 0) + stopifnot(offset + nchains <= length(all_chains)) + return(all_chains[offset + (1:nchains)]) +} + +fit_bayes <- function( + offset, nchains, + datapath, outpath, + seed, iter = 5000, alpha = 0.05, ess_thresh = 1000, rhat_thresh = 1.005) { + q_low <- alpha / 2 + q_high <- 1 - q_low + + chains <- parse_chains(datapath, offset, nchains) + stopifnot("Got unexpected number of chains" = (length(chains) == nchains)) + stopifnot("Got bad chains" = (all(is.numeric(chains)) && all(chains > 0))) + + bayes_ests <- c( + r_point = NA, k_point = NA, + r_low = NA, r_high = NA, + k_low = NA, k_high = NA + ) + + bayes <- nbbp::fit_nbbp_homogenous_bayes( + all_outbreaks = chains, + iter = iter, + seed = seed + ) + + par <- rstan::extract(bayes, c("r_eff", "dispersion", "inv_sqrt_dispersion"), permuted = FALSE) + min_ess <- min(sapply(1:3, function(i) { + rstan::ess_bulk(par[, , i]) + })) + max_rhat <- max(sapply(1:3, function(i) { + rstan::Rhat(par[, , i]) + })) + + if (min_ess > ess_thresh && max_rhat < rhat_thresh) { + par <- rstan::extract(bayes, c("r_eff", "dispersion", "inv_sqrt_dispersion"), permuted = TRUE) + bayes_ests <- c( + r_point = unname(median(par$r_eff)), + k_point = unname(median(par$dispersion)), + r_low = unname(quantile(par$r_eff, q_low)), + r_high = unname(quantile(par$r_eff, q_high)), + k_low = unname(quantile(par$dispersion, q_low)), + k_high = unname(quantile(par$dispersion, q_high)) + ) + } + + write.table(as.data.frame(bayes_ests), file = outpath, col.names = FALSE, quote = FALSE) +} + +fit_ml <- function(offset, nchains, datapath, outpath, seed, nboot = 1000, alpha = 0.05) { + q_low <- alpha / 2 + q_high <- 1 - q_low + + chains <- parse_chains(datapath, offset, nchains) + stopifnot("Got bad chains" = (all(is.numeric(chains)) && all(chains > 0))) + stopifnot("Got unexpected number of chains" = (length(chains) == nchains)) + + maxlik_ests <- c( + r_point = NA, k_point = NA, + r_low = NA, r_high = NA, + k_low = NA, k_high = NA + ) + maxlik <- nbbp::fit_nbbp_homogenous_ml( + all_outbreaks = chains, + nboot = nboot, + seed = seed + ) + if (maxlik$return_code == 0) { + maxlik_ests <- c( + r_point = unname(maxlik$par["r_eff"]), + k_point = unname(maxlik$par["dispersion"]), + r_low = unname(maxlik$ci["r_eff", 1]), + r_high = unname(maxlik$ci["r_eff", 2]), + k_low = unname(maxlik$ci["dispersion", 1]), + k_high = unname(maxlik$ci["dispersion", 2]) + ) + } + + write.table(as.data.frame(maxlik_ests), file = outpath, col.names = FALSE, quote = FALSE) +} diff --git a/data-raw/sim-study/src/simulate.sh b/data-raw/sim-study/src/simulate.sh new file mode 100755 index 0000000..84ebd0f --- /dev/null +++ b/data-raw/sim-study/src/simulate.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +IFS1=$'\n' read -d '' -r -a ivec < par/i.txt +IFS2=$'\n' read -d '' -r -a nvec < par/n.txt +IFS3=$'\n' read -d '' -r -a rvec < par/r.txt +IFS4=$'\n' read -d '' -r -a kvec < par/k.txt + +nchains=0 +for i in "${ivec[@]}"; do + for n in "${nvec[@]}"; do + nchains=$((nchains+n)) + done +done + +seed=0 +for r in "${rvec[@]}"; do + for k in "${kvec[@]}"; do + Rscript --vanilla -e "set.seed(${seed}); nbbp::rnbbp(${nchains}, $r, $k) |> cat(file=\"data/r${r}_k${k}.txt\")" + seed=$((seed+1)) + done +done diff --git a/data/sim_based_testing.rda b/data/sim_based_testing.rda new file mode 100644 index 0000000..7c0ed3a Binary files /dev/null and b/data/sim_based_testing.rda differ diff --git a/man/sim_based_testing.Rd b/man/sim_based_testing.Rd new file mode 100644 index 0000000..80134db --- /dev/null +++ b/man/sim_based_testing.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{sim_based_testing} +\alias{sim_based_testing} +\title{High-level performance results in simulations} +\format{ +data.frame with true values and estimates. +\describe{ +\item{index}{Simulation index for replicate simulations with shared (R, k, n_chains).} +\item{n_chains}{The number of chains in the simulated dataset.} +\item{r_true}{The true simulating value of R.} +\item{k_true}{The true simulating value of k.} +\item{r_point}{The posterior median or maximum likelihood value of R.} +\item{k_point}{The posterior median or maximum likelihood value of k.} +\item{r_low}{The lower bound of a 95\% credible of confidence interval for R.} +\item{r_high}{The upper bound of a 95\% credible of confidence interval for R.} +\item{k_low}{The lower bound of a 95\% credible of confidence interval for k.} +\item{k_high}{The upper bound of a 95\% credible of confidence interval for k.} +\item{estimator}{Type of analysis, bayes (Bayesian) or maxlik (maximum likelihood).} +} +} +\source{ +Generated in R using nbbp. +} +\usage{ +data(sim_based_testing) +} +\description{ +Analyses of a grid (over R, k, and number of chains) of datasets. +Point (posterior median, maximum likelihood) and interval (95\% credible interval, +95\% confidence interval) estimates from both Bayesian and maximum likelihood analyses. +} +\examples{ +\dontrun{ +data(sbc_quants) +plot(sim_based_testing$r_true, sim_based_testing$r_point) +abline(a = 0, b = 1) +} +} +\keyword{datasets} diff --git a/vignettes/estimator_performance.Rmd b/vignettes/estimator_performance.Rmd new file mode 100644 index 0000000..f3e3292 --- /dev/null +++ b/vignettes/estimator_performance.Rmd @@ -0,0 +1,254 @@ +co--- +title: "Estimator performance" +output: + rmarkdown::html_vignette: + fig_width: 7 + fig_height: 5 + code_folding: "hide" +vignette: > + %\VignetteIndexEntry{Estimator performance} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, echo = F} +library(knitr) +knitr::opts_chunk$set( + out.extra = 'style="display:block; margin:auto;"' +) +``` + +```{r, message=FALSE, warning=FALSE} +library(nbbp) +library(dplyr) +library(ggplot2) +library(scales) +library(tidyr) + +data(sim_based_testing) +``` + +This vignette mostly serves as a series of plots in which one can examine the quality of point and interval estimates from Bayesian inference and maximum likelihood using `nbbp` under default settings. +The data used to do this comes from simulating 100 datasets each at a grid of $R$ and $k$ values for several dataset sizes (10, 20, and 40 chains observed). +For each simulated dataset, `fit_nbbp_homogenous_bayes` and `fit_nbbp_homogenous_ml` were used to infer parameters, recording point estimates (posterior median, maximum likelihood) and 95% uncertainty (credible, confidence) intervals were recorded. +This is available as the package data `sim_based_testing`. + +# Point estimation performance + +The following code defines a plotting function to compare Bayesian and maximum likelihood point estimates side-by-side, while allowing us to control for and examine the impact of an additional variable on performance (namely, the other parameter in the model, and the amount of data). + +```{r} +est_labeler <- as_labeller(c( + "bayes" = "Bayesian", + "maxlik" = "Maximum likelihood" +)) + +plot_point_ests <- function(param, covar) { + truth <- ifelse(param == "r", "r_true", "k_true") + estimated <- ifelse(param == "r", "r_point", "k_point") + titlename <- ifelse(param == "r", "Estimation of R", "Estimation of k") + sim_based_testing$factor_truth <- factor( + sim_based_testing[[truth]], + levels = sort(unique(sim_based_testing[[truth]])) + ) + sim_based_testing$factor_covar <- factor( + sim_based_testing[[covar]], + levels = sort(unique(sim_based_testing[[covar]])) + ) + boxes <- ggplot(data = sim_based_testing, aes( + x = .data[["factor_truth"]], + y = .data[[estimated]], + fill = .data[["factor_covar"]] + )) + + geom_boxplot(outliers = FALSE, alpha = 0.5) + + scale_fill_viridis_d("viridis", name = covar) + + facet_wrap(vars(get("estimator")), labeller = est_labeler) + + theme_minimal() + + xlab("True value") + + ylab("Estimated value") + + ggtitle(titlename) + + true_vals <- sort(unique(sim_based_testing[[truth]])) + for (i in seq_along(true_vals)) { + segment <- local({ + i <- i + geom_segment( + aes(x = i - 0.5, xend = i + 0.5, y = true_vals[i]), + col = "red", + lty = 2 + ) + }) + boxes <- boxes + segment + } + + return(boxes) +} +``` +## $R$ + +As we can see below, maximum likelihood inference generally does a decent job estimating small $R$, but starts having trouble near 1, while Bayesian inference does best around 1. +Increasing amounts of data are helpful, as expected, though the effects are much more pronounced for maximum likelihood inference. +Larger true values of $k$ (regimes with less overdispersion) make inference better. + +### By number of chains in dataset + +```{r, message=FALSE, warning=FALSE} +plot_point_ests("r", "n_chains") +``` + +### By $k$ + +```{r, message=FALSE, warning=FALSE} +plot_point_ests("r", "k_true") +``` + +## $k$ +Maximum likelihood inference appears to provide significantly more variable estimates of $k$ than Bayesian inference, and is notably worse at larger values of $k$ than smaller. +Both do worse when $R$ is also small, though maximum likelihood does best at middling values while Bayesian inference seems to plateau. + +### By number of chains in dataset + +```{r, message=FALSE, warning=FALSE} +plot_point_ests("k", "n_chains") + scale_y_continuous(trans = "log") +``` + +### By $R$ + +```{r, message=FALSE, warning=FALSE} +plot_point_ests("k", "r_true") + scale_y_continuous(trans = "log") +``` + +# Coverage of 95% CIs + +The following code defines a plotting function to compare the coverage of Bayesian credible intervals with confidence intervals from maximum likelihood inference side-by-side, while allowing us to control for and examine the impact of an additional variable on performance (namely, the other parameter in the model, and the amount of data). + +Note that "good" performance can be counterintuitive here. +The simulations record the 95% CIs for both approaches, which means that the ideal coverage is 95%.[^1] +That is, a "good" 95% CI will be wrong 5% of the time. +(Otherwise it isn't a 95% CI, it's something else.) +All things being equal, we'd rather it covered a bit too much rather than not quite enough. +That is, 96% coverage would be preferable to 94%, but 94% is still better than 100%. + +[^1]: That is, assuming we want good frequentist properties from our Bayesian credible intervals. For the purposes of this comparison, we do. We leave philisophical matters aside as out of scope. +```{r} +plot_coverage <- function(param, covar) { + truth <- ifelse(param == "r", "r_true", "k_true") + low <- ifelse(param == "r", "r_low", "k_low") + high <- ifelse(param == "r", "r_high", "k_high") + estimated <- ifelse(param == "r", "r_point", "k_point") + titlename <- ifelse(param == "r", "Coverage of R", "Coverage of k") + sim_based_testing$color_col <- col_numeric("viridis", domain = NULL)(sim_based_testing[[covar]]) + sim_based_testing$covered <- sim_based_testing[[estimated]] > sim_based_testing[[low]] & + sim_based_testing[[estimated]] < sim_based_testing[[high]] + legend_labels <- unique(sim_based_testing[[covar]]) + legend_breaks <- unique(sim_based_testing[["color_col"]]) + sim_based_testing |> + mutate(covered = r_true > r_low & r_true < r_high) |> + group_by_at(c("estimator", covar, truth, "color_col")) |> + summarize(coverage = mean(get("covered"), na.rm = TRUE)) |> + ggplot(aes(x = .data[[truth]], y = .data[["coverage"]], col = .data[["color_col"]])) + + scale_color_identity( + name = covar, labels = legend_labels, breaks = legend_breaks, guide = "legend" + ) + + geom_point() + + theme_minimal() + + ylim(c(0, 1)) + + geom_hline(yintercept = 0.95, lty = 2, col = "red") + + facet_wrap(vars(get("estimator")), labeller = est_labeler) + + xlab("True value") + + ylab("Coverage") + + ggtitle(titlename) +} +``` + +## $R$ + +Coverage of $R$ by Bayesian credible intervals is generally good except for smaller $R$, where it is abysmal. +Confidence intervals are also generally close to the nominal coverage, though they have some difficulties around $R \approx 1$ (especially for large $k$). +For both approaches, more data is helpful. + +### By number of chains in dataset +```{r, message=FALSE, warning=FALSE} +plot_coverage("r", "n_chains") +``` + +### By $k$ +```{r, message=FALSE, warning=FALSE} +plot_coverage("r", "k_true") +``` + +## $k$ +Confidence intervals for $k$ display somewhat less parameter sensitivity than credible intervals, the latter of which have very poor coverage for small $R$. +Outside the small $R$ regime, coverage for $k$ is generally good for both CIs, with credible intervals appearing to be somewhat closer to the nominal 95% coverage. + +### By number of chains in dataset +```{r, message=FALSE, warning=FALSE} +plot_coverage("k", "n_chains") +``` + +### By $R$ + +```{r, message=FALSE, warning=FALSE} +plot_coverage("k", "r_true") +``` + +# CI widths + +Coverage is not the only thing we care about with our CIs. +We also care about how narrow or wide the intervals are: all else equal, we'd prefer a narrower interval. + +Here, all else is not always equal (coverage is sometimes meaningfully different), but credible intervals are often much, much narrower than confidence intervals. + +Below are plots showing the ratio of the widths of confidence to credible intervals for $R$ and $k$, grouped by the true (simulating) values. +We suppress $R = 2$ in order to see the rest of the values, as at $R = 2$ a good proportion of confidence intervals for $R$ are essentially infinite. + +```{r, warning=FALSE} +bayes <- sim_based_testing |> + mutate(width = r_high - r_low) |> + filter(estimator == "bayes") + +maxlik <- sim_based_testing |> + mutate(width = r_high - r_low) |> + filter(estimator == "maxlik") + +bayes |> + filter(r_true < 2) |> + inner_join( + maxlik, + by = c("index", "n_chains", "r_true", "k_true"), + suffix = c("_bayes", "_maxlik") + ) |> + mutate(width_ratio = width_maxlik / width_bayes) |> + ggplot(aes(x = factor(r_true), y = (width_ratio))) + + geom_boxplot(outliers = FALSE) + + geom_hline(yintercept = 1.0, lty = 2, lwd = 1.25, col = "red") + + scale_y_continuous(trans = "log") + + theme_minimal() + + xlab("True R") + + ylab("CI width ratio for R") +``` + +```{r, warning=FALSE} +bayes <- sim_based_testing |> + mutate(width = k_high - k_low) |> + filter(estimator == "bayes") + +maxlik <- sim_based_testing |> + mutate(width = k_high - k_low) |> + filter(estimator == "maxlik") + +bayes |> + inner_join( + maxlik, + by = c("index", "n_chains", "r_true", "k_true"), + suffix = c("_bayes", "_maxlik") + ) |> + mutate(width_ratio = width_maxlik / width_bayes) |> + ggplot(aes(x = factor(k_true), y = width_ratio)) + + geom_boxplot(outliers = FALSE) + + geom_hline(yintercept = 1.0, lty = 2, lwd = 1.25, col = "red") + + scale_y_continuous(trans = "log") + + theme_minimal() + + xlab("True k") + + ylab("CI width ratio for k") +```