## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse  = TRUE,
  comment   = "#>",
  eval      = FALSE
)

## ----install------------------------------------------------------------------
# # Install from CRAN
# install.packages("llmimpute")

## ----quickstart---------------------------------------------------------------
# library(llmimpute)
# 
# # Example dataset with missing values
# df <- data.frame(
#   age    = c(45L, NA, 38L, 62L, 29L),
#   bp     = c(130, 140, 120, 155, NA),
#   smoker = c("No", "Yes", "No", NA, "No"),
#   stringsAsFactors = FALSE
# )
# 
# # 1. Diagnose missingness (no API call)
# lmi_diagnose(df)
# 
# # 2. Impute — offline fallback used automatically when no API key is set
# result <- lmi_impute(df)
# 
# # 3. Access results
# result$data          # imputed data frame
# result$imputations   # audit trail with confidence scores and reasoning
# summary(result)      # per-column statistics
# 
# # 4. Export to disk
# lmi_export(result, path = tempdir(), prefix = "my_study")

## ----methods------------------------------------------------------------------
# # List all 19 available offline methods
# lmi_methods()
# 
# # Use a specific method
# result_rf  <- lmi_impute(df, offline = TRUE, offline_method = "random_forest")
# result_si  <- lmi_impute(df, offline = TRUE, offline_method = "softimpute")
# result_br  <- lmi_impute(df, offline = TRUE, offline_method = "bayesian_ridge")
# 
# # Let the package choose per column (default)
# result_auto <- lmi_impute(df)

## ----llm----------------------------------------------------------------------
# library(llmimpute)
# 
# # Set key for this session (reads ANTHROPIC_API_KEY from environment)
# lmi_set_api_key()
# 
# # Impute with domain context
# result <- lmi_impute(df, domain = "healthcare")
# 
# # Flag anomalous existing values in addition to imputing
# result2 <- lmi_impute(df, domain = "healthcare", flag_suspicious = TRUE)
# result2$suspicious   # data.frame of flagged cells

## ----model--------------------------------------------------------------------
# # See available models
# lmi_models()
# 
# # Higher capability (slower, more expensive)
# lmi_set_model("claude-opus-4-20250514")
# 
# # Faster and cheaper
# lmi_set_model("claude-haiku-4-5-20251001")

## ----audit--------------------------------------------------------------------
# head(result$imputations)
# #   row    col original imputed confidence reasoning
# # 1   2    age       NA      45         72  knn ...
# # 2   5     bp       NA     130         68  mean ...

## ----filter-------------------------------------------------------------------
# high_conf <- result$imputations[result$imputations$confidence >= 70, ]

## ----chunks-------------------------------------------------------------------
# result <- lmi_impute(big_df, domain = "financial", max_rows = 30L,
#                      verbose = TRUE)