diff --git a/R/data.R b/R/data.R
index 2c11ee2..9631b79 100644
--- a/R/data.R
+++ b/R/data.R
@@ -171,3 +171,38 @@
 #' hist(prior_predictive$chain_size)
 #' }
 "prior_predictive"
+
+
+#' High-level performance results in simulations
+#'
+#' Analyses of a grid (over R, k, and number of chains) of datasets.
+#' Point (posterior median, maximum likelihood) and interval (95% credible interval,
+#' 95% confidence interval) estimates from both Bayesian and maximum likelihood analyses.
+#'
+#' @docType data
+#' @usage
+#' data(sim_based_testing)
+#' @format data.frame with true values and estimates.
+#' \describe{
+#'   \item{index}{Simulation index for replicate simulations with shared (R, k, n_chains).}
+#'   \item{n_chains}{The number of chains in the simulated dataset.}
+#'   \item{r_true}{The true simulating value of R.}
+#'   \item{k_true}{The true simulating value of k.}
+#'   \item{r_point}{The posterior median or maximum likelihood value of R.}
+#'   \item{k_point}{The posterior median or maximum likelihood value of k.}
+#'   \item{r_low}{The lower bound of a 95% credible of confidence interval for R.}
+#'   \item{r_high}{The upper bound of a 95% credible of confidence interval for R.}
+#'   \item{k_low}{The lower bound of a 95% credible of confidence interval for k.}
+#'   \item{k_high}{The upper bound of a 95% credible of confidence interval for k.}
+#'   \item{estimator}{Type of analysis, bayes (Bayesian) or maxlik (maximum likelihood).}
+#' }
+#' @keywords datasets
+#' @source Generated in R using nbbp.
+#'
+#' @examples
+#' \dontrun{
+#' data(sbc_quants)
+#' plot(sim_based_testing$r_true, sim_based_testing$r_point)
+#' abline(a = 0, b = 1)
+#' }
+"sim_based_testing"
diff --git a/data-raw/README.md b/data-raw/README.md
new file mode 100644
index 0000000..72cc265
--- /dev/null
+++ b/data-raw/README.md
@@ -0,0 +1,10 @@
+# Generation of package data
+
+## `sim-study`
+⚠️ Multi-step procedure; requires `make` and `timeout`.
+
+Simulates 100 datasets each on a predefined grid of R, k, and chain sizes.
+Analyzes each with `nbbp::fit_nbbp_homogenous_bayes` and `fit_nbbp_homogenous_ml`, stores point and interval estimates into package data `sim_based_testing`.
+
+To run the `make`-based workflow, from inside `cd data-raw/sim-study` run `make all` to perform all simulations and fits.
+After running `make`, from the top level of the repo, `data-raw/sim-study/src/gather_results/R` can be run to produce the package data.
diff --git a/data-raw/sim-study/.gitignore b/data-raw/sim-study/.gitignore
new file mode 100644
index 0000000..2211df6
--- /dev/null
+++ b/data-raw/sim-study/.gitignore
@@ -0,0 +1 @@
+*.txt
diff --git a/data-raw/sim-study/Makefile b/data-raw/sim-study/Makefile
new file mode 100644
index 0000000..9ce0b6f
--- /dev/null
+++ b/data-raw/sim-study/Makefile
@@ -0,0 +1,54 @@
+rvec := 0.1 0.25 0.5 0.75 0.95 1.0 1.05 1.25 2.0
+kvec := 0.1111111 0.3333333 1.0000000 3.0000000
+nvec := 10 20 40
+ivec := 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
+
+.SILENT: k.txt r.txt n.txt i.txt clean
+.PHONY: simulate bayes maxlik check_timeout
+
+all: bayes maxlik
+
+maxlik: i.txt n.txt r.txt k.txt simulate
+	src/maxlik.sh
+
+bayes: i.txt n.txt r.txt k.txt simulate
+	src/bayes.sh
+
+# timeout is a downstream dependency but there's no point simulating if we can't analyze
+simulate: i.txt n.txt r.txt k.txt check_timeout
+	src/simulate.sh
+
+i.txt:
+	printf "%s\n" $(ivec) > par/i.txt
+
+n.txt:
+	printf "%s\n" $(nvec) > par/n.txt
+
+r.txt:
+	printf "%s\n" $(rvec) > par/r.txt
+
+k.txt:
+	printf "%s\n" $(kvec) > par/k.txt
+
+# Makes sure timeout command exists
+check_timeout:
+	@which timeout > which_timeout.txt
+
+clean:
+	rm which_timeout.txt
+	rm -f i.txt
+	rm -f n.txt
+	rm -f r.txt
+	rm -f k.txt
+	rm -rf par
+	rm -rf data
+	rm -rf bayes
+	rm -rf maxlik
+	mkdir par
+	mkdir data
+	mkdir bayes
+	mkdir maxlik
+	cat <<EOF > par/.placeholder
+	cat <<EOF > data/.placeholder
+	cat <<EOF > bayes/.placeholder
+	cat <<EOF > maxlik/.placeholder
diff --git a/data-raw/sim-study/bayes/.placeholder b/data-raw/sim-study/bayes/.placeholder
new file mode 100644
index 0000000..e69de29
diff --git a/data-raw/sim-study/data/.placeholder b/data-raw/sim-study/data/.placeholder
new file mode 100644
index 0000000..e69de29
diff --git a/data-raw/sim-study/maxlik/.placeholder b/data-raw/sim-study/maxlik/.placeholder
new file mode 100644
index 0000000..e69de29
diff --git a/data-raw/sim-study/par/.placeholder b/data-raw/sim-study/par/.placeholder
new file mode 100644
index 0000000..e69de29
diff --git a/data-raw/sim-study/src/bayes.sh b/data-raw/sim-study/src/bayes.sh
new file mode 100755
index 0000000..b08601c
--- /dev/null
+++ b/data-raw/sim-study/src/bayes.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+IFS1=$'\n' read -d '' -r -a ivec < par/i.txt
+IFS2=$'\n' read -d '' -r -a nvec < par/n.txt
+IFS3=$'\n' read -d '' -r -a rvec < par/r.txt
+IFS4=$'\n' read -d '' -r -a kvec < par/k.txt
+
+seed=0
+for r in "${rvec[@]}"; do
+    for k in "${kvec[@]}"; do
+        offset=0
+        for i in "${ivec[@]}"; do
+            for n in "${nvec[@]}"; do
+                timeout 10s Rscript --vanilla -e "source(\"src/run_analysis.R\"); fit_bayes(${offset}, ${n}, \"data/r${r}_k${k}.txt\", \"bayes/i${i}_n${n}_r${r}_k${k}.txt\", ${seed})"
+                seed=$((seed+1))
+                offset=$((offset+n))
+            done
+        done
+    done
+done
diff --git a/data-raw/sim-study/src/gather_results.R b/data-raw/sim-study/src/gather_results.R
new file mode 100644
index 0000000..f0ba742
--- /dev/null
+++ b/data-raw/sim-study/src/gather_results.R
@@ -0,0 +1,40 @@
+parse_par <- function(fp) {
+  true_par <- gsub(".txt", "", basename(fp), fixed = TRUE)
+  true_par <- strsplit(true_par, "_", fixed = TRUE)[[1]]
+  true_par <- gsub("[[:alpha:]]", "", true_par)
+  return(data.frame(
+    index = as.integer(true_par[1]),
+    n_chains = as.integer(true_par[2]),
+    r_true = as.numeric(true_par[3]),
+    k_true = as.numeric(true_par[4])
+  ))
+}
+
+bayes <- lapply(
+  list.files("data-raw/sim-study/bayes", full.names = TRUE),
+  function(fp) {
+    true_par <- parse_par(fp)
+    est <- read.table(fp, header = FALSE, stringsAsFactors = FALSE, row.names = 1) |> t()
+    res <- cbind(true_par, est, data.frame(estimator = "bayes"))
+    row.names(res) <- NULL
+    return(res)
+  }
+)
+
+ml <- lapply(
+  list.files("data-raw/sim-study/maxlik", full.names = TRUE),
+  function(fp) {
+    true_par <- parse_par(fp)
+    est <- read.table(fp, header = FALSE, stringsAsFactors = FALSE, row.names = 1) |> t()
+    res <- cbind(true_par, est, data.frame(estimator = "maxlik"))
+    row.names(res) <- NULL
+    return(res)
+  }
+)
+
+sim_based_testing <- rbind(do.call(rbind, bayes), do.call(rbind, ml))
+
+sim_unique <- sim_based_testing |> dplyr::distinct()
+stopifnot("Simulation duplicates found" = dim(sim_unique) == dim(sim_based_testing))
+
+usethis::use_data(sim_based_testing, overwrite = TRUE)
diff --git a/data-raw/sim-study/src/maxlik.sh b/data-raw/sim-study/src/maxlik.sh
new file mode 100755
index 0000000..99fc13e
--- /dev/null
+++ b/data-raw/sim-study/src/maxlik.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+IFS1=$'\n' read -d '' -r -a ivec < par/i.txt
+IFS2=$'\n' read -d '' -r -a nvec < par/n.txt
+IFS3=$'\n' read -d '' -r -a rvec < par/r.txt
+IFS4=$'\n' read -d '' -r -a kvec < par/k.txt
+
+seed=0
+for r in "${rvec[@]}"; do
+    for k in "${kvec[@]}"; do
+        offset=0
+        for i in "${ivec[@]}"; do
+            for n in "${nvec[@]}"; do
+                timeout 30s Rscript --vanilla -e "source(\"src/run_analysis.R\"); fit_ml(${offset}, ${n}, \"data/r${r}_k${k}.txt\", \"maxlik/i${i}_n${n}_r${r}_k${k}.txt\", ${seed})"
+                seed=$((seed+1))
+                offset=$((offset+n))
+            done
+        done
+    done
+done
diff --git a/data-raw/sim-study/src/run_analysis.R b/data-raw/sim-study/src/run_analysis.R
new file mode 100644
index 0000000..f2d6a46
--- /dev/null
+++ b/data-raw/sim-study/src/run_analysis.R
@@ -0,0 +1,84 @@
+parse_chains <- function(datapath, offset, nchains) {
+  all_chains <- scan(datapath, what = numeric())
+  stopifnot(offset >= 0)
+  stopifnot(offset + nchains <= length(all_chains))
+  return(all_chains[offset + (1:nchains)])
+}
+
+fit_bayes <- function(
+    offset, nchains,
+    datapath, outpath,
+    seed, iter = 5000, alpha = 0.05, ess_thresh = 1000, rhat_thresh = 1.005) {
+  q_low <- alpha / 2
+  q_high <- 1 - q_low
+
+  chains <- parse_chains(datapath, offset, nchains)
+  stopifnot("Got unexpected number of chains" = (length(chains) == nchains))
+  stopifnot("Got bad chains" = (all(is.numeric(chains)) && all(chains > 0)))
+
+  bayes_ests <- c(
+    r_point = NA, k_point = NA,
+    r_low = NA, r_high = NA,
+    k_low = NA, k_high = NA
+  )
+
+  bayes <- nbbp::fit_nbbp_homogenous_bayes(
+    all_outbreaks = chains,
+    iter = iter,
+    seed = seed
+  )
+
+  par <- rstan::extract(bayes, c("r_eff", "dispersion", "inv_sqrt_dispersion"), permuted = FALSE)
+  min_ess <- min(sapply(1:3, function(i) {
+    rstan::ess_bulk(par[, , i])
+  }))
+  max_rhat <- max(sapply(1:3, function(i) {
+    rstan::Rhat(par[, , i])
+  }))
+
+  if (min_ess > ess_thresh && max_rhat < rhat_thresh) {
+    par <- rstan::extract(bayes, c("r_eff", "dispersion", "inv_sqrt_dispersion"), permuted = TRUE)
+    bayes_ests <- c(
+      r_point = unname(median(par$r_eff)),
+      k_point = unname(median(par$dispersion)),
+      r_low = unname(quantile(par$r_eff, q_low)),
+      r_high = unname(quantile(par$r_eff, q_high)),
+      k_low = unname(quantile(par$dispersion, q_low)),
+      k_high = unname(quantile(par$dispersion, q_high))
+    )
+  }
+
+  write.table(as.data.frame(bayes_ests), file = outpath, col.names = FALSE, quote = FALSE)
+}
+
+fit_ml <- function(offset, nchains, datapath, outpath, seed, nboot = 1000, alpha = 0.05) {
+  q_low <- alpha / 2
+  q_high <- 1 - q_low
+
+  chains <- parse_chains(datapath, offset, nchains)
+  stopifnot("Got bad chains" = (all(is.numeric(chains)) && all(chains > 0)))
+  stopifnot("Got unexpected number of chains" = (length(chains) == nchains))
+
+  maxlik_ests <- c(
+    r_point = NA, k_point = NA,
+    r_low = NA, r_high = NA,
+    k_low = NA, k_high = NA
+  )
+  maxlik <- nbbp::fit_nbbp_homogenous_ml(
+    all_outbreaks = chains,
+    nboot = nboot,
+    seed = seed
+  )
+  if (maxlik$return_code == 0) {
+    maxlik_ests <- c(
+      r_point = unname(maxlik$par["r_eff"]),
+      k_point = unname(maxlik$par["dispersion"]),
+      r_low = unname(maxlik$ci["r_eff", 1]),
+      r_high = unname(maxlik$ci["r_eff", 2]),
+      k_low = unname(maxlik$ci["dispersion", 1]),
+      k_high = unname(maxlik$ci["dispersion", 2])
+    )
+  }
+
+  write.table(as.data.frame(maxlik_ests), file = outpath, col.names = FALSE, quote = FALSE)
+}
diff --git a/data-raw/sim-study/src/simulate.sh b/data-raw/sim-study/src/simulate.sh
new file mode 100755
index 0000000..84ebd0f
--- /dev/null
+++ b/data-raw/sim-study/src/simulate.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+IFS1=$'\n' read -d '' -r -a ivec < par/i.txt
+IFS2=$'\n' read -d '' -r -a nvec < par/n.txt
+IFS3=$'\n' read -d '' -r -a rvec < par/r.txt
+IFS4=$'\n' read -d '' -r -a kvec < par/k.txt
+
+nchains=0
+for i in "${ivec[@]}"; do
+    for n in "${nvec[@]}"; do
+        nchains=$((nchains+n))
+    done
+done
+
+seed=0
+for r in "${rvec[@]}"; do
+    for k in "${kvec[@]}"; do
+                Rscript --vanilla -e "set.seed(${seed}); nbbp::rnbbp(${nchains}, $r, $k) |> cat(file=\"data/r${r}_k${k}.txt\")"
+                seed=$((seed+1))
+    done
+done
diff --git a/data/sim_based_testing.rda b/data/sim_based_testing.rda
new file mode 100644
index 0000000..7c0ed3a
Binary files /dev/null and b/data/sim_based_testing.rda differ
diff --git a/man/sim_based_testing.Rd b/man/sim_based_testing.Rd
new file mode 100644
index 0000000..80134db
--- /dev/null
+++ b/man/sim_based_testing.Rd
@@ -0,0 +1,41 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{sim_based_testing}
+\alias{sim_based_testing}
+\title{High-level performance results in simulations}
+\format{
+data.frame with true values and estimates.
+\describe{
+\item{index}{Simulation index for replicate simulations with shared (R, k, n_chains).}
+\item{n_chains}{The number of chains in the simulated dataset.}
+\item{r_true}{The true simulating value of R.}
+\item{k_true}{The true simulating value of k.}
+\item{r_point}{The posterior median or maximum likelihood value of R.}
+\item{k_point}{The posterior median or maximum likelihood value of k.}
+\item{r_low}{The lower bound of a 95\% credible of confidence interval for R.}
+\item{r_high}{The upper bound of a 95\% credible of confidence interval for R.}
+\item{k_low}{The lower bound of a 95\% credible of confidence interval for k.}
+\item{k_high}{The upper bound of a 95\% credible of confidence interval for k.}
+\item{estimator}{Type of analysis, bayes (Bayesian) or maxlik (maximum likelihood).}
+}
+}
+\source{
+Generated in R using nbbp.
+}
+\usage{
+data(sim_based_testing)
+}
+\description{
+Analyses of a grid (over R, k, and number of chains) of datasets.
+Point (posterior median, maximum likelihood) and interval (95\% credible interval,
+95\% confidence interval) estimates from both Bayesian and maximum likelihood analyses.
+}
+\examples{
+\dontrun{
+data(sbc_quants)
+plot(sim_based_testing$r_true, sim_based_testing$r_point)
+abline(a = 0, b = 1)
+}
+}
+\keyword{datasets}
diff --git a/vignettes/estimator_performance.Rmd b/vignettes/estimator_performance.Rmd
new file mode 100644
index 0000000..f3e3292
--- /dev/null
+++ b/vignettes/estimator_performance.Rmd
@@ -0,0 +1,254 @@
+co---
+title: "Estimator performance"
+output:
+  rmarkdown::html_vignette:
+    fig_width: 7
+    fig_height: 5
+    code_folding: "hide"
+vignette: >
+  %\VignetteIndexEntry{Estimator performance}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, echo = F}
+library(knitr)
+knitr::opts_chunk$set(
+  out.extra = 'style="display:block; margin:auto;"'
+)
+```
+
+```{r, message=FALSE, warning=FALSE}
+library(nbbp)
+library(dplyr)
+library(ggplot2)
+library(scales)
+library(tidyr)
+
+data(sim_based_testing)
+```
+
+This vignette mostly serves as a series of plots in which one can examine the quality of point and interval estimates from Bayesian inference and maximum likelihood using `nbbp` under default settings.
+The data used to do this comes from simulating 100 datasets each at a grid of $R$ and $k$ values for several dataset sizes (10, 20, and 40 chains observed).
+For each simulated dataset, `fit_nbbp_homogenous_bayes` and `fit_nbbp_homogenous_ml` were used to infer parameters, recording point estimates (posterior median, maximum likelihood) and 95% uncertainty (credible, confidence) intervals were recorded.
+This is available as the package data `sim_based_testing`.
+
+# Point estimation performance
+
+The following code defines a plotting function to compare Bayesian and maximum likelihood point estimates side-by-side, while allowing us to  control for and examine the impact of an additional variable on performance (namely, the other parameter in the model, and the amount of data).
+
+```{r}
+est_labeler <- as_labeller(c(
+  "bayes" = "Bayesian",
+  "maxlik" = "Maximum likelihood"
+))
+
+plot_point_ests <- function(param, covar) {
+  truth <- ifelse(param == "r", "r_true", "k_true")
+  estimated <- ifelse(param == "r", "r_point", "k_point")
+  titlename <- ifelse(param == "r", "Estimation of R", "Estimation of k")
+  sim_based_testing$factor_truth <- factor(
+    sim_based_testing[[truth]],
+    levels = sort(unique(sim_based_testing[[truth]]))
+  )
+  sim_based_testing$factor_covar <- factor(
+    sim_based_testing[[covar]],
+    levels = sort(unique(sim_based_testing[[covar]]))
+  )
+  boxes <- ggplot(data = sim_based_testing, aes(
+    x = .data[["factor_truth"]],
+    y = .data[[estimated]],
+    fill = .data[["factor_covar"]]
+  )) +
+    geom_boxplot(outliers = FALSE, alpha = 0.5) +
+    scale_fill_viridis_d("viridis", name = covar) +
+    facet_wrap(vars(get("estimator")), labeller = est_labeler) +
+    theme_minimal() +
+    xlab("True value") +
+    ylab("Estimated value") +
+    ggtitle(titlename)
+
+  true_vals <- sort(unique(sim_based_testing[[truth]]))
+  for (i in seq_along(true_vals)) {
+    segment <- local({
+      i <- i
+      geom_segment(
+        aes(x = i - 0.5, xend = i + 0.5, y = true_vals[i]),
+        col = "red",
+        lty = 2
+      )
+    })
+    boxes <- boxes + segment
+  }
+
+  return(boxes)
+}
+```
+## $R$
+
+As we can see below, maximum likelihood inference generally does a decent job estimating small $R$, but starts having trouble near 1, while Bayesian inference does best around 1.
+Increasing amounts of data are helpful, as expected, though the effects are much more pronounced for maximum likelihood inference.
+Larger true values of $k$ (regimes with less overdispersion) make inference better.
+
+### By number of chains in dataset
+
+```{r, message=FALSE, warning=FALSE}
+plot_point_ests("r", "n_chains")
+```
+
+### By $k$
+
+```{r, message=FALSE, warning=FALSE}
+plot_point_ests("r", "k_true")
+```
+
+## $k$
+Maximum likelihood inference appears to provide significantly more variable estimates of $k$ than Bayesian inference, and is notably worse at larger values of $k$ than smaller.
+Both do worse when $R$ is also small, though maximum likelihood does best at middling values while Bayesian inference seems to plateau.
+
+### By number of chains in dataset
+
+```{r, message=FALSE, warning=FALSE}
+plot_point_ests("k", "n_chains") + scale_y_continuous(trans = "log")
+```
+
+### By $R$
+
+```{r, message=FALSE, warning=FALSE}
+plot_point_ests("k", "r_true") + scale_y_continuous(trans = "log")
+```
+
+# Coverage of 95% CIs
+
+The following code defines a plotting function to compare the coverage of Bayesian credible intervals with confidence intervals from maximum likelihood inference side-by-side, while allowing us to  control for and examine the impact of an additional variable on performance (namely, the other parameter in the model, and the amount of data).
+
+Note that "good" performance can be counterintuitive here.
+The simulations record the 95% CIs for both approaches, which means that the ideal coverage is 95%.[^1]
+That is, a "good" 95% CI will be wrong 5% of the time.
+(Otherwise it isn't a 95% CI, it's something else.)
+All things being equal, we'd rather it covered a bit too much rather than not quite enough.
+That is, 96% coverage would be preferable to 94%, but 94% is still better than 100%.
+
+[^1]: That is, assuming we want good frequentist properties from our Bayesian credible intervals. For the purposes of this comparison, we do. We leave philisophical matters aside as out of scope.
+```{r}
+plot_coverage <- function(param, covar) {
+  truth <- ifelse(param == "r", "r_true", "k_true")
+  low <- ifelse(param == "r", "r_low", "k_low")
+  high <- ifelse(param == "r", "r_high", "k_high")
+  estimated <- ifelse(param == "r", "r_point", "k_point")
+  titlename <- ifelse(param == "r", "Coverage of R", "Coverage of k")
+  sim_based_testing$color_col <- col_numeric("viridis", domain = NULL)(sim_based_testing[[covar]])
+  sim_based_testing$covered <- sim_based_testing[[estimated]] > sim_based_testing[[low]] &
+    sim_based_testing[[estimated]] < sim_based_testing[[high]]
+  legend_labels <- unique(sim_based_testing[[covar]])
+  legend_breaks <- unique(sim_based_testing[["color_col"]])
+  sim_based_testing |>
+    mutate(covered = r_true > r_low & r_true < r_high) |>
+    group_by_at(c("estimator", covar, truth, "color_col")) |>
+    summarize(coverage = mean(get("covered"), na.rm = TRUE)) |>
+    ggplot(aes(x = .data[[truth]], y = .data[["coverage"]], col = .data[["color_col"]])) +
+    scale_color_identity(
+      name = covar, labels = legend_labels, breaks = legend_breaks, guide = "legend"
+    ) +
+    geom_point() +
+    theme_minimal() +
+    ylim(c(0, 1)) +
+    geom_hline(yintercept = 0.95, lty = 2, col = "red") +
+    facet_wrap(vars(get("estimator")), labeller = est_labeler) +
+    xlab("True value") +
+    ylab("Coverage") +
+    ggtitle(titlename)
+}
+```
+
+## $R$
+
+Coverage of $R$ by Bayesian credible intervals is generally good except for smaller $R$, where it is abysmal.
+Confidence intervals are also generally close to the nominal coverage, though they have some difficulties around $R \approx 1$ (especially for large $k$).
+For both approaches, more data is helpful.
+
+### By number of chains in dataset
+```{r, message=FALSE, warning=FALSE}
+plot_coverage("r", "n_chains")
+```
+
+### By $k$
+```{r, message=FALSE, warning=FALSE}
+plot_coverage("r", "k_true")
+```
+
+## $k$
+Confidence intervals for $k$ display somewhat less parameter sensitivity than credible intervals, the latter of which have very poor coverage for small $R$.
+Outside the small $R$ regime, coverage for $k$ is generally good for both CIs, with credible  intervals appearing to be somewhat closer to the nominal 95% coverage.
+
+### By number of chains in dataset
+```{r, message=FALSE, warning=FALSE}
+plot_coverage("k", "n_chains")
+```
+
+### By $R$
+
+```{r, message=FALSE, warning=FALSE}
+plot_coverage("k", "r_true")
+```
+
+# CI widths
+
+Coverage is not the only thing we care about with our CIs.
+We also care about how narrow or wide the intervals are: all else equal, we'd prefer a narrower interval.
+
+Here, all else is not always equal (coverage is sometimes meaningfully different), but credible intervals are often much, much narrower than confidence intervals.
+
+Below are plots showing the ratio of the widths of confidence to credible intervals for $R$ and $k$, grouped by the true (simulating) values.
+We suppress $R = 2$ in order to see the rest of the values, as at $R = 2$ a good proportion of confidence intervals for $R$ are essentially infinite.
+
+```{r, warning=FALSE}
+bayes <- sim_based_testing |>
+  mutate(width = r_high - r_low) |>
+  filter(estimator == "bayes")
+
+maxlik <- sim_based_testing |>
+  mutate(width = r_high - r_low) |>
+  filter(estimator == "maxlik")
+
+bayes |>
+  filter(r_true < 2) |>
+  inner_join(
+    maxlik,
+    by = c("index", "n_chains", "r_true", "k_true"),
+    suffix = c("_bayes", "_maxlik")
+  ) |>
+  mutate(width_ratio = width_maxlik / width_bayes) |>
+  ggplot(aes(x = factor(r_true), y = (width_ratio))) +
+  geom_boxplot(outliers = FALSE) +
+  geom_hline(yintercept = 1.0, lty = 2, lwd = 1.25, col = "red") +
+  scale_y_continuous(trans = "log") +
+  theme_minimal() +
+  xlab("True R") +
+  ylab("CI width ratio for R")
+```
+
+```{r, warning=FALSE}
+bayes <- sim_based_testing |>
+  mutate(width = k_high - k_low) |>
+  filter(estimator == "bayes")
+
+maxlik <- sim_based_testing |>
+  mutate(width = k_high - k_low) |>
+  filter(estimator == "maxlik")
+
+bayes |>
+  inner_join(
+    maxlik,
+    by = c("index", "n_chains", "r_true", "k_true"),
+    suffix = c("_bayes", "_maxlik")
+  ) |>
+  mutate(width_ratio = width_maxlik / width_bayes) |>
+  ggplot(aes(x = factor(k_true), y = width_ratio)) +
+  geom_boxplot(outliers = FALSE) +
+  geom_hline(yintercept = 1.0, lty = 2, lwd = 1.25, col = "red") +
+  scale_y_continuous(trans = "log") +
+  theme_minimal() +
+  xlab("True k") +
+  ylab("CI width ratio for k")
+```