---
title: "3) Example of DBV-SBV models for species mixtures in a tester-based design"
author: "J. Salomon, J. Enjalbert, T. Flutre"
abstract: "This document aims at simulating data with a DBV-SBV model for species mixtures (tester design), fitting it, and checking the estimates."
date: "`r format(Sys.time(), '%d/%m/%Y')`"
output:
  html_document:
    toc: true
    toc_depth: 5
    toc_float: true
    number_sections: true
    code_folding: show
colorlinks: true
urlcolor: blue
editor_options: 
  chunk_output_type: console
vignette: >
  %\VignetteIndexEntry{3) Example of DBV-SBV models for species mixtures in a tester-based design}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, echo=FALSE}
suppressPackageStartupMessages(library(knitr))
opts_chunk$set(
  echo = TRUE, warning = TRUE, message = TRUE, cache = FALSE,
  fig.align = "center", collapse = TRUE
)
opts_knit$set(progress = TRUE, verbose = TRUE)
```



# Preamble

Dependencies:
```{r}
suppressPackageStartupMessages(library(plantmix))
suppressPackageStartupMessages(library(emmeans))
suppressPackageStartupMessages(library(ggplot2))
```

```{r time_0, echo=FALSE}
## Execution time (see the appendix):
t0 <- proc.time()
```


# Simulate data

```{r}
set.seed(12345)
```

## Simulate the genotypes

### Set the dimensions

```{r}
levSpecies <- c("S1", "S2")

nbGenos <- c("S1" = 500, "S2" = 500)
levGenos <- list(
  "S1" = sprintf(
    fmt = paste0("gS1_%0", floor(log10(nbGenos["S1"])) + 1, "i"),
    1:nbGenos["S1"]
  ),
  "S2" = sprintf(
    fmt = paste0("gS2_%0", floor(log10(nbGenos["S2"])) + 1, "i"),
    1:nbGenos["S2"]
  )
)

nbSnps <- c("S1" = 1000, "S2" = 1000)
levSnps <- list(
  "S1" = sprintf(
    fmt = paste0("sS1_%0", floor(log10(nbSnps["S1"])) + 1, "i"),
    1:nbSnps["S1"]
  ),
  "S2" = sprintf(
    fmt = paste0("sS2_%0", floor(log10(nbSnps["S2"])) + 1, "i"),
    1:nbSnps["S2"]
  )
)
```

### SNP genotypes

```{r}
nb_pops <- 10
weak_div_pops <- diag(nb_pops)
weak_div_pops[upper.tri(weak_div_pops)] <- 0.9
weak_div_pops[lower.tri(weak_div_pops)] <- weak_div_pops[upper.tri(weak_div_pops)]
snpGenos <- lapply(levSpecies, function(species) {
  tmp <- rep(nbGenos[species] / nb_pops, nb_pops - 1)
  tmp <- c(tmp, nbGenos[species] - sum(tmp))
  simulGenosDoseStruct(
    nb_genos = tmp,
    nb_snps = nbSnps[species],
    div_pops = weak_div_pops,
    geno_IDs = levGenos[[species]],
    snp_IDs = levSnps[[species]]
  )
})
names(snpGenos) <- levSpecies
sapply(snpGenos, dim)
snpGenos$S1[1:3, 1:4]
table(snpGenos$S1)
```

### Additive genetic relationships

For simplicity, the first estimator of VanRaden (2008) is used, that assumes linkage equilibrium and Hard-Weinberg equilibrium.

```{r}
snpAFs <- lapply(snpGenos, function(M) {
  colSums(M) / (2 * nrow(M))
})
GRMs_vr <- lapply(levSpecies, function(species) {
  GRM <- estimGRM(snpGenos[[species]], snpAFs[[species]])
  as.matrix(Matrix::nearPD(GRM)$mat)
})
names(GRMs_vr) <- levSpecies
```

```{r, class.source='fold-hide'}
species <- "S1"
GRM <- GRMs_vr[[species]]
image(Matrix(GRM), main = paste0("GRM for ", species))
hist(diag(GRM), main = paste0("GRM for ", species))
hist(GRM[upper.tri(GRM)], main = paste0("GRM for ", species))
```

## Simulate the phenotypes

As in Salomon et al (2026), the design will be incomplete (sparse) but balanced, and tester-based, involving:

* many genotypes of the first species, the focal one, whose breeding values will be statistically modeled as random, with kinship matrix $K$;

* and a small number of genotypes of the second species, the tester one, whose breeding values will be statistically modeled as fixed.

The yield data are generated according to the following equations:

* intercrops: $Y_{IC} = X_{IC} B_{IC} + Z_{DS_f} BV_f + Z_{D{\times}S} \, DBV{\times}SBV + E_{IC}$

* sole crops:

    * focal species: $y_{SC_f} = X_{SC_f} \beta_{SC_f} + Z_{D_f} DBV_f + Z_{D_f} SIGV_f + e_{SC_f}$ where $\beta_f$ only includes the contrasts for the "block" explanatory factor

    * tester species: $y_{SC_t} = X_{SC_t} \beta_{SC_t} + e_{SC_t}$ where $\beta_t$ includes the contrasts for the "block" and "DBV" explanatory factors (the "SIGV" explanatory factor for the tester species is ignored)

The parameter values correspond to cereal-legume mixtures such as winter wheat and pea, inspired from the papers of Moutier et al (2022) and Haug et al (2023).

```{r}
nbGenosTrial <- c("S1" = 300, "S2" = 2)
levGenosTrial <- lapply(levSpecies, function(species) {
  sample(levGenos[[species]], nbGenosTrial[species])
})
names(levGenosTrial) <- levSpecies

GRMs_vr_trial <- list()
GRMs_vr_trial$S1 <- GRMs_vr$S1[levGenosTrial$S1, levGenosTrial$S1]
GRMs_vr_trial$S2 <- diag(nbGenosTrial["S2"]) # diag because modeled as fixed
dimnames(GRMs_vr_trial$S2) <- list(levGenosTrial$S2, levGenosTrial$S2)

set.seed(12345)
out <- simulDBVSBVinter(GRMs_vr_trial)
names(out)
tmp <- list2env(out, envir = environment())
```

### Design sparsity

```{r, fig.width=10}
plantmix:::plotAllocSchemeInterMixDesign(datW)
```

### Indices of plant mixtures

Several indices can be used to compare sole crops and intercrops.
Below some of them are computed using the true breeding values, i.e., with neither block effects nor experimental errors, to give an idea of what the simulated data correspond to.

#### RYT (LER) and RYP

```{r, class.source='fold-hide'}
## Reformat and compute
is_mix <- which(datW$type == "IC")
true_RYTs <- list()
true_RYPs <- list()
for (idx in is_mix) {
  true_y1_IC <- as.vector(truth$mu[["S1"]]["IC"]) + datW$true_gen_yield_S1[idx]
  true_y2_IC <- as.vector(truth$mu[["S2"]]["IC"]) + datW$true_gen_yield_S2[idx]
  g1 <- as.character(datW$geno_S1[idx])
  g2 <- as.character(datW$geno_S2[idx])
  true_y1_SC <- as.vector(truth$mu[["S1"]]["SC"]) +
    datW$true_gen_yield_S1[datW$geno_S1 == g1 &
      is.na(datW$geno_S2)]
  true_y2_SC <- as.vector(truth$mu[["S2"]]["SC"]) +
    datW$true_gen_yield_S2[datW$geno_S2 == g2 &
      is.na(datW$geno_S1)]
  true_y2_SC <- unique(true_y2_SC)
  yields <- data.frame(
    SCcrop = c(true_y1_SC, true_y2_SC),
    intercrop = c(true_y1_IC, true_y2_IC),
    row.names = c(g1, g2)
  )
  tmp <- LER(yields)
  mixId <- paste0(g1, "-", g2)
  true_RYTs[[mixId]] <- c(
    "RY_S1" = as.numeric(tmp$pLER[1]),
    "RY_S2" = as.numeric(tmp$pLER[2]),
    "RYT" = tmp$LER
  )
  true_RYPs[[mixId]] <- c(
    "RYP_S1" = true_y1_IC /
      (props[["S1"]] * true_y1_SC),
    "RYP_S2" = true_y2_IC /
      (props[["S2"]] * true_y2_SC)
  )
}
true_RYTs <- data.frame(
  mix = names(true_RYTs),
  geno_S1 = sapply(strsplit(names(true_RYTs), "-"), `[`, 1),
  geno_S2 = sapply(strsplit(names(true_RYTs), "-"), `[`, 2),
  as.data.frame(do.call(rbind, true_RYTs)),
  stringsAsFactors = TRUE
)
str(true_RYTs)
summary(true_RYTs[, grep("RY_", colnames(true_RYTs))])
summary(true_RYTs[, grep("RYT", colnames(true_RYTs))])
true_RYPs <- data.frame(
  mix = names(true_RYPs),
  geno_S1 = sapply(strsplit(names(true_RYPs), "-"), `[`, 1),
  geno_S2 = sapply(strsplit(names(true_RYPs), "-"), `[`, 2),
  as.data.frame(do.call(rbind, true_RYPs)),
  stringsAsFactors = TRUE
)
str(true_RYPs)
summary(true_RYPs[, grep("RYP", colnames(true_RYPs))])

if (FALSE) {
  ## using the RYT() function
  keys <- unique(paste0(datL$focal, " in ", datL$standID))
  tmp <- do.call(rbind, strsplit(keys, " in "))
  datLavg <- data.frame(
    key = keys,
    focal = tmp[, 1],
    standID = tmp[, 2],
    stringsAsFactors = TRUE
  )
  datLavg$type <- "IC"
  datLavg$type[as.character(datLavg$focal) == as.character(datLavg$standID)] <- "SC"
  datLavg$focal_species <- "S1"
  datLavg$focal_species[grep("^gS2_", datLavg$focal)] <- "S2"
  datLavg$prop <- 1
  datLavg$prop[datLavg$type == "IC" & datLavg$focal_species == "S1"] <- props["S1"]
  datLavg$prop[datLavg$type == "IC" & datLavg$focal_species == "S2"] <- props["S2"]
  for (i in 1:nrow(datLavg)) {
    idx <- which(datL$focal == datLavg$focal[i] &
      datL$standID == datLavg$standID[i])
    datLavg$focal_yield[i] <- mean(datL$focal_yield[idx])
  }
  true_RYTs2 <- RYT(datLavg, "standID", "focal", "prop", "focal_yield")
  true_RYTs2 <- droplevels(true_RYTs2[!is.na(true_RYTs2$RYT), ])
  true_RYTs2 <- droplevels(true_RYTs2[!duplicated(true_RYTs2$standID), ])
}

## Plot
ggplot(true_RYTs) +
  aes(x = RY_S1) +
  geom_histogram(color = "white", bins = 30) +
  geom_vline(
    xintercept = sowingDensities$S1["IC"] /
      sowingDensities$S1["SC"],
    col = "red", linewidth = 2
  ) +
  labs(
    title = "True relative yields (partial land-equivalent ratios) of species 1 for all mixtures",
    x = "RY (partial LER) of species 1"
  ) +
  theme_bw()

ggplot(true_RYTs) +
  aes(x = RY_S2) +
  geom_histogram(color = "white", bins = 30) +
  geom_vline(
    xintercept = sowingDensities$S2["IC"] /
      sowingDensities$S2["SC"],
    col = "red", linewidth = 2
  ) +
  labs(
    title = "True partial land-equivalent ratio of species 2 for all mixtures",
    x = "RY (partial LER) of species 2"
  ) +
  theme_bw()

ggplot(true_RYTs) +
  aes(x = geno_S2, y = RYT) +
  geom_violin(aes(fill = geno_S2), trim = FALSE, show.legend = FALSE) +
  geom_boxplot(width = 0.2) +
  labs(
    title = "True land-equivalent ratio for all mixtures"
  ) +
  theme_bw()

p <- ggplot(true_RYTs) +
  aes(x = RY_S1, y = RY_S2, color = geno_S2)
for (i in seq(0.75, 2, by = 0.25)) {
  if (i == 1) {
    p <- p + geom_abline(intercept = i, slope = -1, linetype = "solid", color = "black")
  } else {
    p <- p + geom_abline(intercept = i, slope = -1, linetype = "dotted", color = "black")
  }
}
p + geom_abline(intercept = 0, slope = 1, linetype = "dotted", color = "black") +
  geom_point(size = 2) +
  labs(
    title = "True relative yields (RYs, a.k.a. partial LERs)",
    x = "relative yield (partial LER) of species 1",
    y = "relative yiedl (partial LER) of species 2",
    color = "Tester"
  ) +
  ## guides(color="none") +
  scale_x_continuous(breaks = seq(0, 1.4, by = 0.1)) +
  scale_y_continuous(breaks = seq(0, 1.4, by = 0.1)) +
  coord_cartesian(xlim = c(0, 1.4), ylim = c(0, 1.4)) +
  theme(aspect.ratio = 1) +
  theme_bw()

ggplot(true_RYPs) +
  aes(x = RYP_S1, y = RYP_S2, color = geno_S2) +
  geom_abline(intercept = 0, slope = 1, linetype = "solid", color = "black") +
  geom_hline(yintercept = 1) +
  geom_vline(xintercept = 1) +
  geom_point(size = 2) +
  labs(
    title = "True relative yields per plant (RYPs)",
    x = "RYP of species 1",
    y = "RYP of species 2",
    color = "Tester"
  ) +
  ## guides(color="none") +
  scale_x_continuous(breaks = seq(0, 6.5, by = 1)) +
  scale_y_continuous(breaks = seq(0, 6.5, by = 1)) +
  coord_cartesian(xlim = c(0, 6.5), ylim = c(0, 6.5)) +
  theme(aspect.ratio = 1) +
  theme_bw()
```

#### RYM

```{r, class.source='fold-hide', fig.width=10}
## Reformat and compute
tmp <- datW[, c("geno_S1", "geno_S2", "true_yield_S1", "true_yield_S2")]
tmp$ID <- NA
tmp$props <- NA
tmp$true_yield <- NA
## sole crop of species 1:
idx <- which(!is.na(tmp$geno_S1) & is.na(tmp$geno_S2))
tmp$ID[idx] <- as.character(tmp$geno_S1[idx])
tmp$props[idx] <- "1"
tmp$true_yield[idx] <- tmp$true_yield_S1[idx]
## sole crop of species 2:
idx <- which(is.na(tmp$geno_S1) & !is.na(tmp$geno_S2))
tmp$ID[idx] <- as.character(tmp$geno_S2[idx])
tmp$props[idx] <- "1"
tmp$true_yield[idx] <- tmp$true_yield_S2[idx]
## intercrops of species 1 and 2:
idx <- which(!is.na(tmp$geno_S1) & !is.na(tmp$geno_S2))
sep <- "|"
tmp$ID[idx] <- as.character(paste0(
  tmp$geno_S1[idx], sep,
  tmp$geno_S2[idx]
))
prop1 <- props["S1"]
prop2 <- props["S2"]
stopifnot(prop1 + prop2 == 1)
prop1 <- round(prop1, 2)
prop2 <- 1 - prop1
tmp$props[idx] <- paste0(prop1, sep, prop2)
tmp$true_yield[idx] <- tmp$true_yield_S1[idx] + tmp$true_yield_S2[idx]
stopifnot(all(!is.na(tmp$ID)))
tmp$ID <- factor(tmp$ID)
tmp$props <- factor(tmp$props)
## keep only one yield (the true one) per modality
dupIDs <- table(as.character(tmp$ID))
(dupIDs <- names(dupIDs)[dupIDs > 1])
for (dupID in dupIDs) {
  idx <- which(as.character(tmp$ID) == dupID)
  tmp <- droplevels(tmp[-idx[2:length(idx)], ])
}
rm(dupIDs)

tmp <- RYM(tmp,
  colIDstand = "ID", colIDcomps = "ID", colProps = "props",
  colY = "true_yield", sep = "|"
)
summary(tmp$RYM)

## Plot
ggplot(tmp) +
  aes(x = RYM) +
  geom_histogram(na.rm = TRUE, bins = 30, color = "white") +
  geom_vline(xintercept = 1, linewidth = 2) +
  geom_vline(xintercept = mean(tmp$RYM, na.rm = TRUE), linewidth = 2, color = "red") +
  labs(title = "True relative yields of mixtures (RYMs)") +
  theme_bw()
```




# Explore the data

In this section, an exploratory data analysis is done on the data including block effects and experimental errors, so that it can be easily applied on real data, too.

```{r}
str(datW)
tapply(datW$type, datW$block, table)
```

```{r, class.source='fold-hide'}
ggplot(datL) +
  aes(x = block, y = focal_yield) +
  geom_violin(aes(fill = block)) +
  geom_boxplot(fill = "white", width = 0.2) +
  theme_bw() +
  facet_grid(focal_species ~ type)

is_mix <- datW$type == "IC"
subDatW <- droplevels(datW[is_mix, ])
ggplot(subDatW) +
  aes(x = yield_S1, y = yield_S2, color = geno_S1, shape = geno_S2) +
  geom_abline(intercept = seq(0, 200, by = 10), slope = -1, linetype = "dotted", color = "black") +
  geom_point(size = 2) +
  labs(
    title = "Partial yields in intercrop",
    x = "species 1 (in qt.ha-1)", y = "species 2 (in qt.ha-1)",
    shape = "Tester (species S2)"
  ) +
  guides(color = "none") +
  theme_bw()
```

## Yield map

```{r, class.source='fold-hide', fig.width=10}
## Add the empty micro-plots:
coords <- data.frame(
  x = rep(sort(unique(datW$x)), each = length(unique(datW$y))),
  y = sort(unique(datW$y)),
  block = "A",
  plot = NA
)
coords$block[coords$x >= min(datW$x[datW$block != "A"])] <- "B"
coords$plot <- paste0(coords$x, coords$block, coords$y)
length(idx <- which(!coords$plot %in% as.character(datW$plot)))
tmp <- as.data.frame(matrix(nrow = length(idx), ncol = ncol(datW)))
colnames(tmp) <- colnames(datW)
tmp[, c("x", "y", "block", "plot")] <- coords[idx, ]
datWSupp <- rbind(
  datW,
  as.data.frame(tmp)
)

## Plot
xranges <- do.call(rbind, tapply(datW$x, datW$block, range))
p <- ggplot(datWSupp) +
  aes(x = x, y = y) +
  theme_bw() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank()
  ) +
  scale_x_continuous(breaks = sort(unique(datW$x))) +
  scale_y_continuous(
    breaks = sort(unique(datW$y)),
    sec.axis = dup_axis()
  ) +
  guides(x = guide_axis(angle = 90)) +
  geom_tile(na.rm = TRUE) +
  geom_rect(aes(xmin = x - 0.5, xmax = x + 0.5, ymin = y - 0.5, ymax = y + 0.5),
    color = "white"
  ) +
  geom_text(
    x = sum(xranges[1, ]) / 2, y = 10.7, label = "Block A",
    hjust = 0, color = "black"
  ) +
  geom_text(
    x = sum(xranges[2, ]) / 2, y = 10.7, label = "Block B",
    hjust = 0, color = "black"
  ) +
  geom_vline(
    xintercept = max(datW$x[datW$block == "A"]),
    color = "black", linetype = "dashed", linewidth = 1
  )

p + aes(fill = type) +
  labs(title = "Types of microplots") +
  scale_fill_discrete()

scaleCols <- c("#CB2027", "#ffec1b", "#b3e93e", "#60BD68", "#059748")
scaleLim <- range(datW$tot_yield)
p + aes(fill = tot_yield) +
  labs(title = "Total yield for each microplot") +
  scale_fill_continuous(type = "viridis")
```



# Infer the parameters

## Using intercrops only

### Prepare the inputs

```{r}
idxIC <- which(!is.na(datW$geno_S1) & !is.na(datW$geno_S2))
datW_IC <- droplevels(datW[idxIC, ])

listY <- list(Y_IC = datW_IC[, c("yield_S1", "yield_S2")])
listX <- list(X_IC = model.matrix(~ 1 + block + geno_S2, datW_IC,
  contrasts.arg = list(
    "block" = "contr.sum",
    "geno_S2" = "contr.sum"
  )
))
listZ <- list(Z_DS_f = model.matrix(~ 0 + geno_S1, datW_IC))
colnames(listZ$Z_DS_f) <- gsub("^geno_S1", "", colnames(listZ$Z_DS_f))
listVCov <- list(K = GRMs_vr_trial$S1[
  levels(datW_IC$geno_S1),
  levels(datW_IC$geno_S1)
])
```

```{r, echo=FALSE, eval=FALSE}
if (FALSE) {
  listZ$Z_DxS_f <- model.matrix(~ 0 + standID, datW_IC)
  colnames(listZ$Z_DxS_f) <- gsub("^standID", "", colnames(listZ$Z_DxS_f))
  listVCov$Kmixpair <- diag(nlevels(datW_IC$standID))
  dimnames(listVCov$Kmixpair) <- list(
    colnames(listZ$Z_DxS_f), colnames(listZ$Z_DxS_f)
  )
  ## TODO: listVCov$invKmixpair <- getInvKmixpairDxS(dat, "focal", "neighbor", sep="+")
}
```

### Fit the model

```{r}
fitsTmb <- list()
i <- 1
for (REML in c(TRUE, FALSE)) {
  print(paste0("fit model with ", ifelse(REML, "REML", "ML"), "..."))
  st <- system.time(
    fitTmb <- fitDBVSBVinter(listY, listX, listZ, listVCov,
      lOptions = list(iter.max = 20),
      REML = REML, verbose = 0
    )
  )
  print(st)
  fitsTmb[[i]] <- fitTmb
  i <- i + 1
  break # skip ML to speed-up
}
```

```{r, class.source='fold-hide'}
for (i in seq_along(fitsTmb)) {
  fitTmb <- fitsTmb[[i]]
  p <- ggplot(fitTmb$trace) +
    aes(x = iter, y = objfn) +
    geom_point() +
    geom_line() +
    labs(
      title = "Optimization convergence",
      subtitle = paste0("REML=", fitTmb$REML)
    ) +
    theme_bw()
  print(p)
}
```

### Checks

```{r, fig.width=9}
for (i in seq_along(fitsTmb)) {
  fitTmb <- fitsTmb[[i]]
  print(paste0("REML=", fitTmb$REML))

  print("Check fixed effects:")
  checks <- data.frame(
    species = rep(c("S1", "S2"), each = nrow(truth$B_IC)),
    truth = c(truth$B_IC),
    estim = c(fitTmb$report$B_IC)
  )
  checks$nBE <- normBiasError(checks$estim, checks$truth)
  print(checks)

  print("Check (co)variances of random genetic effects:")
  checks <- data.frame(
    ID = c("var(DBV)_S1", "var(SBV)_S1", "cor(DBVxSBV)_S1"),
    truth = c(
      truth$var_DBV["S1"],
      truth$var_SBV["S1"],
      truth$cor_DBV_SBV["S1"]
    ),
    estim = c(
      fitTmb$report$vars_BV_f,
      fitTmb$report$Cor_BV[1, 2]
    )
  )
  checks$nBE <- normBiasError(checks$estim, checks$truth)
  print(checks)

  print("Check (co)variances of residual errors:")
  checks <- data.frame(
    ID = c("var(err)_IC_S1", "var(err)_IC_S2", "cor(err)"),
    truth = c(truth$var_err_IC, truth$cor_err_IC),
    estim = c(fitTmb$report$vars_E_IC, fitTmb$report$Cor_E_IC[1, 2])
  )
  checks$nBE <- normBiasError(checks$estim, checks$truth)
  print(checks)

  print(fitTmb$sry_sdr[grep("^log_sd|^unconstr_cor", rownames(fitTmb$sry_sdr)), ])
  if (FALSE) {
    print(paste0(
      "AIC = ", round(fitTmb$AIC),
      " (k = ", attr(fitTmb$AIC, "k"), ")"
    ))
  }

  print("Check random genetic effects of the focal species:")
  checks <- data.frame(
    type = c(
      rep(c("DBV", "SBV"), each = nrow(truth$BV$S1)),
      rep("BV_IC", length(truth$BV_IC$S1))
    ),
    truth = c(
      truth$BV$S1[levels(datW_IC$geno_S1), ],
      truth$BV_IC$S1
    ),
    estim = c(
      fitTmb$sry_sdr[grep("^BV_f$", rownames(fitTmb$sry_sdr)), "Estimate"],
      fitTmb$report$BV_IC_f[names(truth$BV_IC$S1)]
    )
  )
  checks$type <- factor(checks$type,
                        levels = c("BV_IC", "DBV", "SBV"))
  checks$nBE <- normBiasError(checks$estim, checks$truth)
  print(tapply(1:nrow(checks), checks$type, function(idx) {
    cor(checks$truth[idx], checks$esti[idx])
  }))
  p <- ggplot(checks) +
    aes(x = estim, y = truth) +
    geom_hline(yintercept = 0, linetype = "dotted") +
    geom_vline(xintercept = 0, linetype = "dotted") +
    geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
    geom_point() +
    labs(
      title = "Results with intercrop-only data",
      subtitle = paste0("REML=", fitTmb$REML)
    ) +
    theme_bw() +
    facet_wrap(~type)
  print(p)
}
```

### Parametric bootstrap

```{r}
if (FALSE) { # slow
  system.time(
    pmTmb <- paramBoot4TMB(fitTmb, nb_boot = 5)
  )
  summary(do.call(rbind, pmTmb))
  fitTmb$sry_sdr[names(pmTmb[[1]]), ]
}
```

## Using both sole crops and intercrops

### Prepare the inputs

```{r}
idxIC <- which(!is.na(datW$geno_S1) & !is.na(datW$geno_S2))
datW_IC <- droplevels(datW[idxIC, ])
idxSCf <- which(datL$type == "SC" & datL$focal_species == "S1")
datL_SC_f <- droplevels(datL[idxSCf, ])
idxSCt <- which(datL$type == "SC" & datL$focal_species == "S2")
datL_SC_t <- droplevels(datL[idxSCt, ])

listY <- list()
listY$Y_IC <- datW_IC[, c("yield_S1", "yield_S2")]
listY$y_SC_f <- datL_SC_f$focal_yield
listY$y_SC_t <- datL_SC_t$focal_yield
sapply(listY[-1], length)

listX <- list()
listX$X_IC <- model.matrix(~ 1 + block + geno_S2,
  data = datW_IC,
  contrasts.arg = list(
    "block" = "contr.sum",
    "geno_S2" = "contr.sum"
  )
)
listX$X_SC_f <- model.matrix(~ 1 + block, datL_SC_f,
  contrasts.arg = list(block = "contr.sum")
)
listX$X_SC_t <- model.matrix(~ 1 + block + focal, datL_SC_t,
  contrasts.arg = list(
    block = "contr.sum",
    focal = "contr.sum"
  )
)

listZ <- list()
listZ$Z_DS_f <- model.matrix(~ 0 + geno_S1, datW_IC)
colnames(listZ$Z_DS_f) <- gsub("^geno_S1", "", colnames(listZ$Z_DS_f))
listZ$Z_D_f <- model.matrix(~ 0 + focal, datL_SC_f)
colnames(listZ$Z_D_f) <- gsub("^focal", "", colnames(listZ$Z_D_f))

listVCov <- list(K = GRMs_vr_trial$S1[
  levels(datW_IC$geno_S1),
  levels(datW_IC$geno_S1)
])
```

```{r, echo=FALSE, eval=FALSE}
if (FALSE) {
  listZ$Z_DxS_f <- model.matrix(~ 0 + standID, datW_IC)
  colnames(listZ$Z_DxS_f) <- gsub("^standID", "", colnames(listZ$Z_DxS_f))
  listVCov$Kmixpair <- diag(nlevels(datW_IC$standID))
  dimnames(listVCov$Kmixpair) <- list(
    colnames(listZ$Z_DxS_f), colnames(listZ$Z_DxS_f)
  )
  ## TODO: listVCov$invKmixpair <- getInvKmixpairDxS(dat, "focal", "neighbor", sep="+")
}
```

### Fit the model

```{r}
fitsTmb <- list()
i <- 1
for (REML in c(TRUE, FALSE)) {
  print(paste0("fit model with ", ifelse(REML, "REML", "ML"), "..."))
  st <- system.time(
    fitTmb <- fitDBVSBVinter(listY, listX, listZ, listVCov,
      lOptions = list(iter.max = 20),
      REML = REML, verbose = 0
    )
  )
  print(st)
  fitsTmb[[i]] <- fitTmb
  i <- i + 1
  break # skip ML to speed-up
}
```

```{r, class.source='fold-hide'}
for (i in seq_along(fitsTmb)) {
  fitTmb <- fitsTmb[[i]]
  p <- ggplot(fitTmb$trace) +
    aes(x = iter, y = objfn) +
    geom_point() +
    geom_line() +
    labs(
      title = "Optimization convergence",
      subtitle = paste0("REML=", fitTmb$REML)
    ) +
    theme_bw()
  print(p)
}
```

### Checks

```{r, fig.width=12, fig.height=9}
for (i in seq_along(fitsTmb)) {
  fitTmb <- fitsTmb[[i]]
  print(paste0("REML=", fitTmb$REML))

  print("Check fixed effects:")
  checks <- data.frame(
    species = rep(c("S1", "S2"), each = nrow(truth$B_IC)),
    truth = c(truth$B_IC),
    estim = c(fitTmb$report$B_IC)
  )
  checks$nBE <- normBiasError(checks$estim, checks$truth)
  print(checks)
  checks <- data.frame(
    species = "S1",
    truth = obsMC$blObsContrs$S1[, "SC"],
    estim = fitTmb$report$beta_SC_f
  )
  checks$nBE <- normBiasError(checks$estim, checks$truth)
  print(checks)
  checks <- data.frame(
    species = "S2",
    truth = c(
      obsMC$blObsContrs$S2[, "SC"],
      obsMC$BVObsContrs$S2[-1, "SC", "DBV"]
    ),
    estim = fitTmb$report$beta_SC_t
  )
  checks$nBE <- normBiasError(checks$estim, checks$truth)
  print(checks)

  print("Check (co)variances of random genetic effects:")
  checks <- data.frame(
    ID = c("var(DBV)", "var(SBV)", "var(SIGV)", "cor(DBVxSBV)"),
    truth = c(
      truth$var_DBV["S1"],
      truth$var_SBV["S1"],
      truth$var_SIGV["S1"],
      truth$cor_DBV_SBV["S1"]
    ),
    estim = c(
      fitTmb$report$vars_BV_f,
      fitTmb$report$var_SIGV_f,
      fitTmb$report$Cor_BV[1, 2]
    )
  )
  checks$nBE <- normBiasError(checks$estim, checks$truth)
  print(checks)

  print("Check (co)variances of residual errors:")
  checks <- data.frame(
    ID = c(
      "var(err)_S1", "var(err)_S2", "cor(err)",
      "var(err)_SC_S1", "var(err)_SC_S2"
    ),
    truth = c(
      truth$var_err_IC, truth$cor_err_IC,
      truth$var_err_SC
    ),
    estim = c(
      fitTmb$report$vars_E_IC, fitTmb$report$Cor_E_IC[1, 2],
      fitTmb$report$var_err_SC_f, fitTmb$report$var_err_SC_t
    )
  )
  checks$nBE <- normBiasError(checks$estim, checks$truth)
  print(checks)

  print(fitTmb$sry_sdr[grep("^log_sd|^unconstr_cor", rownames(fitTmb$sry_sdr)), ])
  if (FALSE) {
    print(paste0(
      "AIC = ", round(fitTmb$AIC),
      " (k = ", attr(fitTmb$AIC, "k"), ")"
    ))
  }

  print("Check random genetic effects of the focal species:")
  checks <- data.frame(
    type = c(
      rep(c("DBV", "SBV", "SIGV"), each = nrow(truth$BV$S1)),
      rep(c("BV_IC", "BV_SC"), each = length(truth$BV_IC$S1))
    ),
    truth = c(
      truth$BV$S1[levels(datW_IC$geno_S1), ],
      truth$SIGVs$S1[levels(datW_IC$geno_S1)],
      truth$BV_IC$S1,
      truth$BV_SC$S1
    ),
    estim = c(
      fitTmb$sry_sdr[grep("^BV_f$", rownames(fitTmb$sry_sdr)), "Estimate"],
      fitTmb$sry_sdr[grep("^SIGV_f$", rownames(fitTmb$sry_sdr)), "Estimate"],
      fitTmb$report$BV_IC_f[names(truth$BV_IC$S1)],
      fitTmb$report$BV_SC_f[names(truth$BV_SC$S1)]
    )
  )
  checks$type <- factor(checks$type,
                        levels = c("BV_SC", "BV_IC", "DBV", "SBV", "SIGV"))
  checks$nBE <- normBiasError(checks$estim, checks$truth)
  print(tapply(1:nrow(checks), checks$type, function(idx) {
    cor(checks$truth[idx], checks$esti[idx])
  }))
  p <- ggplot(checks) +
    aes(x = estim, y = truth) +
    geom_hline(yintercept = 0, linetype = "dotted") +
    geom_vline(xintercept = 0, linetype = "dotted") +
    geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
    geom_point() +
    labs(
      title = "Results with both sole-crop and intercrop data",
      subtitle = paste0("REML=", fitTmb$REML)
    ) +
    theme_bw() +
    facet_wrap(~type)
  print(p)
}
```



# Reference

See the article for more details:

* Salomon J, Enjalbert J, Flutre T. 2026. Joint modeling of social genetic effects in mono- and pluri-specific groups: case study in intercrops. https://doi.org/10.64898/2026.03.27.714849




# Appendix

```{r info}
t1 <- proc.time()
t1 - t0
print(sessionInfo(), locale = FALSE)
```
