## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, echo = FALSE, message = FALSE, warning=FALSE----------------------
# List all suggested packages needed for this vignette
required_pkgs <- c(
  "imuGAP", "data.table", "bayesplot", "ggplot2",
  "tidyr", "dplyr"
)

# Check if they are available
pkgs_available <- all(sapply(required_pkgs, requireNamespace, quietly = TRUE))

# If not available, hide the rest of the document or knit dynamically
if (!pkgs_available) {
  knitr::opts_chunk$set(eval = FALSE)
  message("Some suggested packages are missing. Vignette code will not be executed.")
} else {
  # Assign the result so the chunk does not auto-print the logical vector
  # sapply() returns (the setup block should render no output).
  chk <- suppressPackageStartupMessages(
    sapply(required_pkgs, require, character.only = TRUE)
  )
}

## ----locations----------------------------------------------------------------
data("locations_sim", package = "imuGAP")
head(locations_sim)

# Canonicalize and validate
canonical_locations <- canonicalize_locations(locations_sim)
head(canonical_locations)

## ----observations-------------------------------------------------------------
data("observations_sim", package = "imuGAP")
head(observations_sim[, .(obs_id, loc_id, positive, sample_n, censored)])

# Canonicalize and validate
canonical_observations <- canonicalize_observations(observations_sim)
head(canonical_observations)

## ----populations--------------------------------------------------------------
data("populations_sim", package = "imuGAP")
head(populations_sim)

# Canonicalize and validate
canonical_populations <- canonicalize_populations(
  populations_sim, observations_sim, locations_sim
)
head(canonical_populations)

## ----validation-failure-obs---------------------------------------------------
# Create a copy with an invalid observation (positive > sample_n)
invalid_obs <- copy(observations_sim[, .(obs_id, loc_id, positive, sample_n, censored)])
invalid_obs[1, positive := sample_n + 10]

# This will fail validation and throw an error:
tryCatch(
  canonicalize_observations(invalid_obs),
  error = function(e) message("Caught expected error: ", e$message)
)

## ----validation-failure-locs--------------------------------------------------
# Create a copy with a duplicate location ID
invalid_locs <- rbind(
  locations_sim,
  data.frame(loc_id = "Scruggs", parent_id = "State")
)

# This will fail validation:
tryCatch(
  canonicalize_locations(invalid_locs),
  error = function(e) message("Caught expected error: ", e$message)
)

## ----sampling-code, eval = FALSE----------------------------------------------
# fit_sim <- sampling(
#   observations_sim, populations_sim, locations_sim,
#   stan_opts = stan_options(
#     iter = 2000, chains = 4, refresh = 0, seed = 1L
#   )
# )

## ----load-fit-----------------------------------------------------------------
data("fit_sim", package = "imuGAP")

## ----extract-params-----------------------------------------------------------
beta_draws <- extract_imugap(fit_sim, pars = "beta_bs")
str(beta_draws)

## ----trace-plot---------------------------------------------------------------
bayesplot::mcmc_trace(fit_sim$stanfit,
  pars = c(
    "beta_bs[1]", "beta_bs[2]", "beta_bs[3]",
    "beta_bs[4]", "beta_bs[5]"
  )
) + theme_bw() +
  theme(legend.position = c(.9, .1), legend.justification = c(1, 0))

## ----target-------------------------------------------------------------------
target_sim <- create_target(
  fit = fit_sim, location = unique(locations_sim$loc_id), age = 1:18,
  cohort = max(populations_sim$cohort) - 18, dose = c(1, 2), mode = "snapshot"
)
head(target_sim)

## ----predict-code, eval = FALSE-----------------------------------------------
# predict_sim <- predict(object = fit_sim, target = target_sim, posterior_size = 100)

## ----load-predictions---------------------------------------------------------
data("predict_sim", package = "imuGAP")

## ----summarize-predictions----------------------------------------------------
# Calculate the posterior mean coverage probability for each location and dose at age 5
summary_predict <- summary(predict_sim)
head(summary_predict)

## ----state-viz----------------------------------------------------------------
summary_predict |>
  subset(loc_id == "State" & dose == 2 & age > 4) |>
  ggplot() +
  aes(x = age) +
  geom_line(aes(y = q50)) +
  geom_ribbon(aes(ymin = q2_5, ymax = q97_5), alpha = 0.5) +
  theme_bw() +
  scale_x_continuous(breaks = 5:18, minor_breaks = NULL) +
  labs(x = "Age", y = "Coverage", title = "State-level two dose coverage")

## ----county-viz---------------------------------------------------------------
summary_predict |>
  subset(loc_id %in% c("Scruggs", "Simone", "Watson") & dose == 2 & age > 4) |>
  ggplot() +
  aes(x = age) +
  geom_line(aes(y = q50, color = loc_id)) +
  geom_ribbon(aes(ymin = q2_5, ymax = q97_5, fill = loc_id), alpha = 0.2) +
  theme_bw() +
  theme(
    legend.position = "inside", legend.position.inside = c(.2, 0.05),
    legend.justification.inside = c(0, 0)
  ) +
  scale_x_continuous(breaks = 5:18, minor_breaks = NULL) +
  scale_color_discrete(NULL, aesthetics = c("color", "fill")) +
  labs(
    x = "Age", y = "Coverage", title = "County-level two dose coverage"
  )

## ----grade-viz----------------------------------------------------------------
scruggs_schools <- locations_sim[parent_id == "Scruggs", loc_id]
summary_predict |>
  subset(
    loc_id %in% scruggs_schools & dose == 2 & age > 4
  ) |>
  ggplot() +
  geom_boxplot(aes(x = factor(age), y = q50)) +
  theme_bw() +
  labs(
    x = "Age", y = "Coverage",
    title = "Distribution of School-Level Coverage For Scruggs County"
  )

## -----------------------------------------------------------------------------
schools <- c(
  "Towhee Children's Academy", # ~380 per grade
  "Flycatcher Elementary", # ~110 per grade
  "Sparrow School" # ~60 per grade
)

# Subset to targets of interest (all retained posterior draws)
predict_sub <- predict_sim |>
  subset(loc_id %in% schools & dose == 2 & age > 4)

# Get the pre-computed background coverage matching the subsetted target
latent_ref <- copy(predict_sub$target)
latent_ref$coverage <- latent_params_sim$coverage[predict_sub$target$obs_id]


# Convert predictions to a long-format data.frame
draws_df <- as.data.frame(predict_sub)

# Now plot it all
ggplot() +
  aes(age, coverage, color = loc_id) +
  geom_point(
    data = draws_df,
    alpha = 1 / 256, shape = 16,
    position = position_jitterdodge(
      dodge.width = 0.5,
      jitter.width = 0.15
    )
  ) +
  geom_point(
    data = latent_ref,
    mapping = aes(shape = "True value"),
    fill = NA
  ) +
  theme_bw() +
  scale_shape_manual(
    name = "",
    values = c("True value" = 24)
  ) +
  scale_color_discrete(NULL, aesthetics = c("color", "fill")) +
  scale_x_continuous(breaks = 5:18, minor_breaks = NULL) +
  theme(legend.position = "bottom") +
  labs(color = "School", x = "Age", y = "Coverage")