Abstract
This document aims at simulating data with a DBV-SBV model for species mixtures (tester design), fitting it, and checking the estimates.Dependencies:
suppressPackageStartupMessages(library(plantmix))
suppressPackageStartupMessages(library(emmeans))
suppressPackageStartupMessages(library(ggplot2))
set.seed(12345)
levSpecies <- c("S1", "S2")
nbGenos <- c("S1" = 500, "S2" = 500)
levGenos <- list(
"S1" = sprintf(
fmt = paste0("gS1_%0", floor(log10(nbGenos["S1"])) + 1, "i"),
1:nbGenos["S1"]
),
"S2" = sprintf(
fmt = paste0("gS2_%0", floor(log10(nbGenos["S2"])) + 1, "i"),
1:nbGenos["S2"]
)
)
nbSnps <- c("S1" = 1000, "S2" = 1000)
levSnps <- list(
"S1" = sprintf(
fmt = paste0("sS1_%0", floor(log10(nbSnps["S1"])) + 1, "i"),
1:nbSnps["S1"]
),
"S2" = sprintf(
fmt = paste0("sS2_%0", floor(log10(nbSnps["S2"])) + 1, "i"),
1:nbSnps["S2"]
)
)
nb_pops <- 10
weak_div_pops <- diag(nb_pops)
weak_div_pops[upper.tri(weak_div_pops)] <- 0.9
weak_div_pops[lower.tri(weak_div_pops)] <- weak_div_pops[upper.tri(weak_div_pops)]
snpGenos <- lapply(levSpecies, function(species) {
tmp <- rep(nbGenos[species] / nb_pops, nb_pops - 1)
tmp <- c(tmp, nbGenos[species] - sum(tmp))
simulGenosDoseStruct(
nb_genos = tmp,
nb_snps = nbSnps[species],
div_pops = weak_div_pops,
geno_IDs = levGenos[[species]],
snp_IDs = levSnps[[species]]
)
})
names(snpGenos) <- levSpecies
sapply(snpGenos, dim)
## S1 S2
## [1,] 500 500
## [2,] 1000 1000
snpGenos$S1[1:3, 1:4]
## sS1_0001 sS1_0002 sS1_0003 sS1_0004
## gS1_001 0 1 1 1
## gS1_002 1 1 2 1
## gS1_003 0 0 1 0
table(snpGenos$S1)
##
## 0 1 2
## 154021 201021 144958
For simplicity, the first estimator of VanRaden (2008) is used, that assumes linkage equilibrium and Hard-Weinberg equilibrium.
snpAFs <- lapply(snpGenos, function(M) {
colSums(M) / (2 * nrow(M))
})
GRMs_vr <- lapply(levSpecies, function(species) {
GRM <- estimGRM(snpGenos[[species]], snpAFs[[species]])
as.matrix(Matrix::nearPD(GRM)$mat)
})
names(GRMs_vr) <- levSpecies
species <- "S1"
GRM <- GRMs_vr[[species]]
image(Matrix(GRM), main = paste0("GRM for ", species))
hist(diag(GRM), main = paste0("GRM for ", species))
hist(GRM[upper.tri(GRM)], main = paste0("GRM for ", species))
As in Salomon et al (2026), the design will be incomplete (sparse) but balanced, and tester-based, involving:
many genotypes of the first species, the focal one, whose breeding values will be statistically modeled as random, with kinship matrix \(K\);
and a small number of genotypes of the second species, the tester one, whose breeding values will be statistically modeled as fixed.
The yield data are generated according to the following equations:
intercrops: \(Y_{IC} = X_{IC} B_{IC} + Z_{DS_f} BV_f + Z_{D{\times}S} \, DBV{\times}SBV + E_{IC}\)
sole crops:
focal species: \(y_{SC_f} = X_{SC_f} \beta_{SC_f} + Z_{D_f} DBV_f + Z_{D_f} SIGV_f + e_{SC_f}\) where \(\beta_f\) only includes the contrasts for the “block” explanatory factor
tester species: \(y_{SC_t} = X_{SC_t} \beta_{SC_t} + e_{SC_t}\) where \(\beta_t\) includes the contrasts for the “block” and “DBV” explanatory factors (the “SIGV” explanatory factor for the tester species is ignored)
The parameter values correspond to cereal-legume mixtures such as winter wheat and pea, inspired from the papers of Moutier et al (2022) and Haug et al (2023).
nbGenosTrial <- c("S1" = 300, "S2" = 2)
levGenosTrial <- lapply(levSpecies, function(species) {
sample(levGenos[[species]], nbGenosTrial[species])
})
names(levGenosTrial) <- levSpecies
GRMs_vr_trial <- list()
GRMs_vr_trial$S1 <- GRMs_vr$S1[levGenosTrial$S1, levGenosTrial$S1]
GRMs_vr_trial$S2 <- diag(nbGenosTrial["S2"]) # diag because modeled as fixed
dimnames(GRMs_vr_trial$S2) <- list(levGenosTrial$S2, levGenosTrial$S2)
set.seed(12345)
out <- simulDBVSBVinter(GRMs_vr_trial)
names(out)
## [1] "truth" "datW" "datL" "obsMC"
## [5] "sowingDensities" "props"
tmp <- list2env(out, envir = environment())
plantmix:::plotAllocSchemeInterMixDesign(datW)
Several indices can be used to compare sole crops and intercrops. Below some of them are computed using the true breeding values, i.e., with neither block effects nor experimental errors, to give an idea of what the simulated data correspond to.
## Reformat and compute
is_mix <- which(datW$type == "IC")
true_RYTs <- list()
true_RYPs <- list()
for (idx in is_mix) {
true_y1_IC <- as.vector(truth$mu[["S1"]]["IC"]) + datW$true_gen_yield_S1[idx]
true_y2_IC <- as.vector(truth$mu[["S2"]]["IC"]) + datW$true_gen_yield_S2[idx]
g1 <- as.character(datW$geno_S1[idx])
g2 <- as.character(datW$geno_S2[idx])
true_y1_SC <- as.vector(truth$mu[["S1"]]["SC"]) +
datW$true_gen_yield_S1[datW$geno_S1 == g1 &
is.na(datW$geno_S2)]
true_y2_SC <- as.vector(truth$mu[["S2"]]["SC"]) +
datW$true_gen_yield_S2[datW$geno_S2 == g2 &
is.na(datW$geno_S1)]
true_y2_SC <- unique(true_y2_SC)
yields <- data.frame(
SCcrop = c(true_y1_SC, true_y2_SC),
intercrop = c(true_y1_IC, true_y2_IC),
row.names = c(g1, g2)
)
tmp <- LER(yields)
mixId <- paste0(g1, "-", g2)
true_RYTs[[mixId]] <- c(
"RY_S1" = as.numeric(tmp$pLER[1]),
"RY_S2" = as.numeric(tmp$pLER[2]),
"RYT" = tmp$LER
)
true_RYPs[[mixId]] <- c(
"RYP_S1" = true_y1_IC /
(props[["S1"]] * true_y1_SC),
"RYP_S2" = true_y2_IC /
(props[["S2"]] * true_y2_SC)
)
}
true_RYTs <- data.frame(
mix = names(true_RYTs),
geno_S1 = sapply(strsplit(names(true_RYTs), "-"), `[`, 1),
geno_S2 = sapply(strsplit(names(true_RYTs), "-"), `[`, 2),
as.data.frame(do.call(rbind, true_RYTs)),
stringsAsFactors = TRUE
)
str(true_RYTs)
## 'data.frame': 300 obs. of 6 variables:
## $ mix : Factor w/ 300 levels "gS1_001-gS2_301",..: 24 210 257 230 285 118 224 267 158 205 ...
## $ geno_S1: Factor w/ 300 levels "gS1_001","gS1_002",..: 24 210 257 230 285 118 224 267 158 205 ...
## $ geno_S2: Factor w/ 2 levels "gS2_191","gS2_301": 1 1 1 1 1 1 1 1 1 1 ...
## $ RY_S1 : num 0.389 0.591 0.48 0.476 0.522 ...
## $ RY_S2 : num 1.003 0.697 1.086 0.977 0.859 ...
## $ RYT : num 1.39 1.29 1.57 1.45 1.38 ...
summary(true_RYTs[, grep("RY_", colnames(true_RYTs))])
## RY_S1 RY_S2
## Min. :0.3219 Min. :0.6613
## 1st Qu.:0.4592 1st Qu.:0.8462
## Median :0.4925 Median :0.8918
## Mean :0.4910 Mean :0.8959
## 3rd Qu.:0.5218 3rd Qu.:0.9500
## Max. :0.6148 Max. :1.1151
summary(true_RYTs[, grep("RYT", colnames(true_RYTs))])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.245 1.346 1.387 1.387 1.426 1.566
true_RYPs <- data.frame(
mix = names(true_RYPs),
geno_S1 = sapply(strsplit(names(true_RYPs), "-"), `[`, 1),
geno_S2 = sapply(strsplit(names(true_RYPs), "-"), `[`, 2),
as.data.frame(do.call(rbind, true_RYPs)),
stringsAsFactors = TRUE
)
str(true_RYPs)
## 'data.frame': 300 obs. of 5 variables:
## $ mix : Factor w/ 300 levels "gS1_001-gS2_301",..: 24 210 257 230 285 118 224 267 158 205 ...
## $ geno_S1: Factor w/ 300 levels "gS1_001","gS1_002",..: 24 210 257 230 285 118 224 267 158 205 ...
## $ geno_S2: Factor w/ 2 levels "gS2_191","gS2_301": 1 1 1 1 1 1 1 1 1 1 ...
## $ RYP_S1 : num 0.493 0.748 0.608 0.602 0.662 ...
## $ RYP_S2 : num 4.76 3.31 5.16 4.64 4.08 ...
summary(true_RYPs[, grep("RYP", colnames(true_RYPs))])
## RYP_S1 RYP_S2
## Min. :0.4077 Min. :3.141
## 1st Qu.:0.5817 1st Qu.:4.020
## Median :0.6238 Median :4.236
## Mean :0.6219 Mean :4.255
## 3rd Qu.:0.6610 3rd Qu.:4.512
## Max. :0.7788 Max. :5.297
if (FALSE) {
## using the RYT() function
keys <- unique(paste0(datL$focal, " in ", datL$standID))
tmp <- do.call(rbind, strsplit(keys, " in "))
datLavg <- data.frame(
key = keys,
focal = tmp[, 1],
standID = tmp[, 2],
stringsAsFactors = TRUE
)
datLavg$type <- "IC"
datLavg$type[as.character(datLavg$focal) == as.character(datLavg$standID)] <- "SC"
datLavg$focal_species <- "S1"
datLavg$focal_species[grep("^gS2_", datLavg$focal)] <- "S2"
datLavg$prop <- 1
datLavg$prop[datLavg$type == "IC" & datLavg$focal_species == "S1"] <- props["S1"]
datLavg$prop[datLavg$type == "IC" & datLavg$focal_species == "S2"] <- props["S2"]
for (i in 1:nrow(datLavg)) {
idx <- which(datL$focal == datLavg$focal[i] &
datL$standID == datLavg$standID[i])
datLavg$focal_yield[i] <- mean(datL$focal_yield[idx])
}
true_RYTs2 <- RYT(datLavg, "standID", "focal", "prop", "focal_yield")
true_RYTs2 <- droplevels(true_RYTs2[!is.na(true_RYTs2$RYT), ])
true_RYTs2 <- droplevels(true_RYTs2[!duplicated(true_RYTs2$standID), ])
}
## Plot
ggplot(true_RYTs) +
aes(x = RY_S1) +
geom_histogram(color = "white", bins = 30) +
geom_vline(
xintercept = sowingDensities$S1["IC"] /
sowingDensities$S1["SC"],
col = "red", linewidth = 2
) +
labs(
title = "True relative yields (partial land-equivalent ratios) of species 1 for all mixtures",
x = "RY (partial LER) of species 1"
) +
theme_bw()
ggplot(true_RYTs) +
aes(x = RY_S2) +
geom_histogram(color = "white", bins = 30) +
geom_vline(
xintercept = sowingDensities$S2["IC"] /
sowingDensities$S2["SC"],
col = "red", linewidth = 2
) +
labs(
title = "True partial land-equivalent ratio of species 2 for all mixtures",
x = "RY (partial LER) of species 2"
) +
theme_bw()
ggplot(true_RYTs) +
aes(x = geno_S2, y = RYT) +
geom_violin(aes(fill = geno_S2), trim = FALSE, show.legend = FALSE) +
geom_boxplot(width = 0.2) +
labs(
title = "True land-equivalent ratio for all mixtures"
) +
theme_bw()
p <- ggplot(true_RYTs) +
aes(x = RY_S1, y = RY_S2, color = geno_S2)
for (i in seq(0.75, 2, by = 0.25)) {
if (i == 1) {
p <- p + geom_abline(intercept = i, slope = -1, linetype = "solid", color = "black")
} else {
p <- p + geom_abline(intercept = i, slope = -1, linetype = "dotted", color = "black")
}
}
p + geom_abline(intercept = 0, slope = 1, linetype = "dotted", color = "black") +
geom_point(size = 2) +
labs(
title = "True relative yields (RYs, a.k.a. partial LERs)",
x = "relative yield (partial LER) of species 1",
y = "relative yiedl (partial LER) of species 2",
color = "Tester"
) +
## guides(color="none") +
scale_x_continuous(breaks = seq(0, 1.4, by = 0.1)) +
scale_y_continuous(breaks = seq(0, 1.4, by = 0.1)) +
coord_cartesian(xlim = c(0, 1.4), ylim = c(0, 1.4)) +
theme(aspect.ratio = 1) +
theme_bw()
ggplot(true_RYPs) +
aes(x = RYP_S1, y = RYP_S2, color = geno_S2) +
geom_abline(intercept = 0, slope = 1, linetype = "solid", color = "black") +
geom_hline(yintercept = 1) +
geom_vline(xintercept = 1) +
geom_point(size = 2) +
labs(
title = "True relative yields per plant (RYPs)",
x = "RYP of species 1",
y = "RYP of species 2",
color = "Tester"
) +
## guides(color="none") +
scale_x_continuous(breaks = seq(0, 6.5, by = 1)) +
scale_y_continuous(breaks = seq(0, 6.5, by = 1)) +
coord_cartesian(xlim = c(0, 6.5), ylim = c(0, 6.5)) +
theme(aspect.ratio = 1) +
theme_bw()
## Reformat and compute
tmp <- datW[, c("geno_S1", "geno_S2", "true_yield_S1", "true_yield_S2")]
tmp$ID <- NA
tmp$props <- NA
tmp$true_yield <- NA
## sole crop of species 1:
idx <- which(!is.na(tmp$geno_S1) & is.na(tmp$geno_S2))
tmp$ID[idx] <- as.character(tmp$geno_S1[idx])
tmp$props[idx] <- "1"
tmp$true_yield[idx] <- tmp$true_yield_S1[idx]
## sole crop of species 2:
idx <- which(is.na(tmp$geno_S1) & !is.na(tmp$geno_S2))
tmp$ID[idx] <- as.character(tmp$geno_S2[idx])
tmp$props[idx] <- "1"
tmp$true_yield[idx] <- tmp$true_yield_S2[idx]
## intercrops of species 1 and 2:
idx <- which(!is.na(tmp$geno_S1) & !is.na(tmp$geno_S2))
sep <- "|"
tmp$ID[idx] <- as.character(paste0(
tmp$geno_S1[idx], sep,
tmp$geno_S2[idx]
))
prop1 <- props["S1"]
prop2 <- props["S2"]
stopifnot(prop1 + prop2 == 1)
prop1 <- round(prop1, 2)
prop2 <- 1 - prop1
tmp$props[idx] <- paste0(prop1, sep, prop2)
tmp$true_yield[idx] <- tmp$true_yield_S1[idx] + tmp$true_yield_S2[idx]
stopifnot(all(!is.na(tmp$ID)))
tmp$ID <- factor(tmp$ID)
tmp$props <- factor(tmp$props)
## keep only one yield (the true one) per modality
dupIDs <- table(as.character(tmp$ID))
(dupIDs <- names(dupIDs)[dupIDs > 1])
## [1] "gS2_191" "gS2_301"
for (dupID in dupIDs) {
idx <- which(as.character(tmp$ID) == dupID)
tmp <- droplevels(tmp[-idx[2:length(idx)], ])
}
rm(dupIDs)
tmp <- RYM(tmp,
colIDstand = "ID", colIDcomps = "ID", colProps = "props",
colY = "true_yield", sep = "|"
)
summary(tmp$RYM)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.8713 0.9730 1.0126 1.0182 1.0583 1.2095 302
## Plot
ggplot(tmp) +
aes(x = RYM) +
geom_histogram(na.rm = TRUE, bins = 30, color = "white") +
geom_vline(xintercept = 1, linewidth = 2) +
geom_vline(xintercept = mean(tmp$RYM, na.rm = TRUE), linewidth = 2, color = "red") +
labs(title = "True relative yields of mixtures (RYMs)") +
theme_bw()
In this section, an exploratory data analysis is done on the data including block effects and experimental errors, so that it can be easily applied on real data, too.
str(datW)
## 'data.frame': 604 obs. of 20 variables:
## $ standID : Factor w/ 602 levels "gS1_001","gS1_001+gS2_301",..: 47 351 567 253 419 471 89 513 459 289 ...
## $ geno_S1 : Factor w/ 300 levels "gS1_001","gS1_002",..: 24 176 284 127 210 236 45 257 230 145 ...
## $ geno_S2 : Factor w/ 2 levels "gS2_191","gS2_301": NA NA NA NA NA NA NA NA NA NA ...
## $ type : Factor w/ 2 levels "SC","IC": 1 1 1 1 1 1 1 1 1 1 ...
## $ type2 : Factor w/ 3 levels "sole_S1","sole_S2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ block : Factor w/ 2 levels "A","B": 1 1 1 1 1 1 1 1 1 1 ...
## $ x : int 6 23 16 19 29 21 4 23 21 11 ...
## $ y : int 9 6 2 10 4 2 7 3 4 3 ...
## $ plot : Factor w/ 604 levels "10A1","10A10",..: 575 143 61 88 200 119 426 140 121 14 ...
## $ true_gen_yield_S1: num -2.9044 -0.0841 -9.0603 -2.8195 12.7576 ...
## $ true_gen_yield_S2: num NA NA NA NA NA NA NA NA NA NA ...
## $ true_yield_S1 : num 61.2 64 55.1 61.3 76.9 ...
## $ true_yield_S2 : num NA NA NA NA NA NA NA NA NA NA ...
## $ true_fix_yield_S1: num 64.1 64.1 64.1 64.1 64.1 ...
## $ true_fix_yield_S2: num NA NA NA NA NA NA NA NA NA NA ...
## $ true_rnd_yield_S1: num -2.9044 -0.0841 -9.0603 -2.8195 12.7576 ...
## $ true_rnd_yield_S2: num NA NA NA NA NA NA NA NA NA NA ...
## $ yield_S1 : num 61 61.7 56.5 61.8 77 ...
## $ yield_S2 : num NA NA NA NA NA NA NA NA NA NA ...
## $ tot_yield : num 61 61.7 56.5 61.8 77 ...
tapply(datW$type, datW$block, table)
## $A
##
## SC IC
## 152 150
##
## $B
##
## SC IC
## 152 150
ggplot(datL) +
aes(x = block, y = focal_yield) +
geom_violin(aes(fill = block)) +
geom_boxplot(fill = "white", width = 0.2) +
theme_bw() +
facet_grid(focal_species ~ type)
is_mix <- datW$type == "IC"
subDatW <- droplevels(datW[is_mix, ])
ggplot(subDatW) +
aes(x = yield_S1, y = yield_S2, color = geno_S1, shape = geno_S2) +
geom_abline(intercept = seq(0, 200, by = 10), slope = -1, linetype = "dotted", color = "black") +
geom_point(size = 2) +
labs(
title = "Partial yields in intercrop",
x = "species 1 (in qt.ha-1)", y = "species 2 (in qt.ha-1)",
shape = "Tester (species S2)"
) +
guides(color = "none") +
theme_bw()
## Add the empty micro-plots:
coords <- data.frame(
x = rep(sort(unique(datW$x)), each = length(unique(datW$y))),
y = sort(unique(datW$y)),
block = "A",
plot = NA
)
coords$block[coords$x >= min(datW$x[datW$block != "A"])] <- "B"
coords$plot <- paste0(coords$x, coords$block, coords$y)
length(idx <- which(!coords$plot %in% as.character(datW$plot)))
## [1] 16
tmp <- as.data.frame(matrix(nrow = length(idx), ncol = ncol(datW)))
colnames(tmp) <- colnames(datW)
tmp[, c("x", "y", "block", "plot")] <- coords[idx, ]
datWSupp <- rbind(
datW,
as.data.frame(tmp)
)
## Plot
xranges <- do.call(rbind, tapply(datW$x, datW$block, range))
p <- ggplot(datWSupp) +
aes(x = x, y = y) +
theme_bw() +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_blank()
) +
scale_x_continuous(breaks = sort(unique(datW$x))) +
scale_y_continuous(
breaks = sort(unique(datW$y)),
sec.axis = dup_axis()
) +
guides(x = guide_axis(angle = 90)) +
geom_tile(na.rm = TRUE) +
geom_rect(aes(xmin = x - 0.5, xmax = x + 0.5, ymin = y - 0.5, ymax = y + 0.5),
color = "white"
) +
geom_text(
x = sum(xranges[1, ]) / 2, y = 10.7, label = "Block A",
hjust = 0, color = "black"
) +
geom_text(
x = sum(xranges[2, ]) / 2, y = 10.7, label = "Block B",
hjust = 0, color = "black"
) +
geom_vline(
xintercept = max(datW$x[datW$block == "A"]),
color = "black", linetype = "dashed", linewidth = 1
)
p + aes(fill = type) +
labs(title = "Types of microplots") +
scale_fill_discrete()
scaleCols <- c("#CB2027", "#ffec1b", "#b3e93e", "#60BD68", "#059748")
scaleLim <- range(datW$tot_yield)
p + aes(fill = tot_yield) +
labs(title = "Total yield for each microplot") +
scale_fill_continuous(type = "viridis")
idxIC <- which(!is.na(datW$geno_S1) & !is.na(datW$geno_S2))
datW_IC <- droplevels(datW[idxIC, ])
listY <- list(Y_IC = datW_IC[, c("yield_S1", "yield_S2")])
listX <- list(X_IC = model.matrix(~ 1 + block + geno_S2, datW_IC,
contrasts.arg = list(
"block" = "contr.sum",
"geno_S2" = "contr.sum"
)
))
listZ <- list(Z_DS_f = model.matrix(~ 0 + geno_S1, datW_IC))
colnames(listZ$Z_DS_f) <- gsub("^geno_S1", "", colnames(listZ$Z_DS_f))
listVCov <- list(K = GRMs_vr_trial$S1[
levels(datW_IC$geno_S1),
levels(datW_IC$geno_S1)
])
fitsTmb <- list()
i <- 1
for (REML in c(TRUE, FALSE)) {
print(paste0("fit model with ", ifelse(REML, "REML", "ML"), "..."))
st <- system.time(
fitTmb <- fitDBVSBVinter(listY, listX, listZ, listVCov,
lOptions = list(iter.max = 20),
REML = REML, verbose = 0
)
)
print(st)
fitsTmb[[i]] <- fitTmb
i <- i + 1
break # skip ML to speed-up
}
## [1] "fit model with REML..."
## user system elapsed
## 33.750 0.048 33.800
for (i in seq_along(fitsTmb)) {
fitTmb <- fitsTmb[[i]]
p <- ggplot(fitTmb$trace) +
aes(x = iter, y = objfn) +
geom_point() +
geom_line() +
labs(
title = "Optimization convergence",
subtitle = paste0("REML=", fitTmb$REML)
) +
theme_bw()
print(p)
}
for (i in seq_along(fitsTmb)) {
fitTmb <- fitsTmb[[i]]
print(paste0("REML=", fitTmb$REML))
print("Check fixed effects:")
checks <- data.frame(
species = rep(c("S1", "S2"), each = nrow(truth$B_IC)),
truth = c(truth$B_IC),
estim = c(fitTmb$report$B_IC)
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
print("Check (co)variances of random genetic effects:")
checks <- data.frame(
ID = c("var(DBV)_S1", "var(SBV)_S1", "cor(DBVxSBV)_S1"),
truth = c(
truth$var_DBV["S1"],
truth$var_SBV["S1"],
truth$cor_DBV_SBV["S1"]
),
estim = c(
fitTmb$report$vars_BV_f,
fitTmb$report$Cor_BV[1, 2]
)
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
print("Check (co)variances of residual errors:")
checks <- data.frame(
ID = c("var(err)_IC_S1", "var(err)_IC_S2", "cor(err)"),
truth = c(truth$var_err_IC, truth$cor_err_IC),
estim = c(fitTmb$report$vars_E_IC, fitTmb$report$Cor_E_IC[1, 2])
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
print(fitTmb$sry_sdr[grep("^log_sd|^unconstr_cor", rownames(fitTmb$sry_sdr)), ])
if (FALSE) {
print(paste0(
"AIC = ", round(fitTmb$AIC),
" (k = ", attr(fitTmb$AIC, "k"), ")"
))
}
print("Check random genetic effects of the focal species:")
checks <- data.frame(
type = c(
rep(c("DBV", "SBV"), each = nrow(truth$BV$S1)),
rep("BV_IC", length(truth$BV_IC$S1))
),
truth = c(
truth$BV$S1[levels(datW_IC$geno_S1), ],
truth$BV_IC$S1
),
estim = c(
fitTmb$sry_sdr[grep("^BV_f$", rownames(fitTmb$sry_sdr)), "Estimate"],
fitTmb$report$BV_IC_f[names(truth$BV_IC$S1)]
)
)
checks$type <- factor(checks$type,
levels = c("BV_IC", "DBV", "SBV"))
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(tapply(1:nrow(checks), checks$type, function(idx) {
cor(checks$truth[idx], checks$esti[idx])
}))
p <- ggplot(checks) +
aes(x = estim, y = truth) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_vline(xintercept = 0, linetype = "dotted") +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
geom_point() +
labs(
title = "Results with intercrop-only data",
subtitle = paste0("REML=", fitTmb$REML)
) +
theme_bw() +
facet_wrap(~type)
print(p)
}
## [1] "REML=TRUE"
## [1] "Check fixed effects:"
## species truth estim nBE
## 1 S1 32.0000000 32.3638098 1.1369058
## 2 S1 -0.8791112 -0.7986116 -9.1569310
## 3 S1 0.3530473 0.7292304 106.5531880
## 4 S2 27.0000000 26.8807304 -0.4417393
## 5 S2 1.4664826 1.5104990 3.0014955
## 6 S2 -0.9739075 -1.1812426 21.2889905
## [1] "Check (co)variances of random genetic effects:"
## ID truth estim nBE
## 1 var(DBV)_S1 27.040 12.192495 -54.909411
## 2 var(SBV)_S1 5.408 2.474440 -54.244829
## 3 cor(DBVxSBV)_S1 -0.900 -0.860585 -4.379442
## [1] "Check (co)variances of residual errors:"
## ID truth estim nBE
## S1 var(err)_IC_S1 4.577666 21.318351 365.7035
## S2 var(err)_IC_S2 0.975124 3.310185 239.4630
## cor(err) -0.200000 -0.751424 275.7120
## Estimate Std. Error
## log_sd_BV_f 1.250410 0.1812862
## log_sd_BV_f 0.453007 0.2073727
## unconstr_cor_DS_f -1.689718 0.6342338
## log_sd_E_IC 1.529784 0.1037033
## log_sd_E_IC 0.598502 0.1489517
## unconstr_cor_E_IC -1.138832 0.2370345
## [1] "Check random genetic effects of the focal species:"
## BV_IC DBV SBV
## NA 0.8959716 0.8761980
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_point()`).
if (FALSE) { # slow
system.time(
pmTmb <- paramBoot4TMB(fitTmb, nb_boot = 5)
)
summary(do.call(rbind, pmTmb))
fitTmb$sry_sdr[names(pmTmb[[1]]), ]
}
idxIC <- which(!is.na(datW$geno_S1) & !is.na(datW$geno_S2))
datW_IC <- droplevels(datW[idxIC, ])
idxSCf <- which(datL$type == "SC" & datL$focal_species == "S1")
datL_SC_f <- droplevels(datL[idxSCf, ])
idxSCt <- which(datL$type == "SC" & datL$focal_species == "S2")
datL_SC_t <- droplevels(datL[idxSCt, ])
listY <- list()
listY$Y_IC <- datW_IC[, c("yield_S1", "yield_S2")]
listY$y_SC_f <- datL_SC_f$focal_yield
listY$y_SC_t <- datL_SC_t$focal_yield
sapply(listY[-1], length)
## y_SC_f y_SC_t
## 300 4
listX <- list()
listX$X_IC <- model.matrix(~ 1 + block + geno_S2,
data = datW_IC,
contrasts.arg = list(
"block" = "contr.sum",
"geno_S2" = "contr.sum"
)
)
listX$X_SC_f <- model.matrix(~ 1 + block, datL_SC_f,
contrasts.arg = list(block = "contr.sum")
)
listX$X_SC_t <- model.matrix(~ 1 + block + focal, datL_SC_t,
contrasts.arg = list(
block = "contr.sum",
focal = "contr.sum"
)
)
listZ <- list()
listZ$Z_DS_f <- model.matrix(~ 0 + geno_S1, datW_IC)
colnames(listZ$Z_DS_f) <- gsub("^geno_S1", "", colnames(listZ$Z_DS_f))
listZ$Z_D_f <- model.matrix(~ 0 + focal, datL_SC_f)
colnames(listZ$Z_D_f) <- gsub("^focal", "", colnames(listZ$Z_D_f))
listVCov <- list(K = GRMs_vr_trial$S1[
levels(datW_IC$geno_S1),
levels(datW_IC$geno_S1)
])
fitsTmb <- list()
i <- 1
for (REML in c(TRUE, FALSE)) {
print(paste0("fit model with ", ifelse(REML, "REML", "ML"), "..."))
st <- system.time(
fitTmb <- fitDBVSBVinter(listY, listX, listZ, listVCov,
lOptions = list(iter.max = 20),
REML = REML, verbose = 0
)
)
print(st)
fitsTmb[[i]] <- fitTmb
i <- i + 1
break # skip ML to speed-up
}
## [1] "fit model with REML..."
## user system elapsed
## 63.220 0.056 63.280
for (i in seq_along(fitsTmb)) {
fitTmb <- fitsTmb[[i]]
p <- ggplot(fitTmb$trace) +
aes(x = iter, y = objfn) +
geom_point() +
geom_line() +
labs(
title = "Optimization convergence",
subtitle = paste0("REML=", fitTmb$REML)
) +
theme_bw()
print(p)
}
for (i in seq_along(fitsTmb)) {
fitTmb <- fitsTmb[[i]]
print(paste0("REML=", fitTmb$REML))
print("Check fixed effects:")
checks <- data.frame(
species = rep(c("S1", "S2"), each = nrow(truth$B_IC)),
truth = c(truth$B_IC),
estim = c(fitTmb$report$B_IC)
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
checks <- data.frame(
species = "S1",
truth = obsMC$blObsContrs$S1[, "SC"],
estim = fitTmb$report$beta_SC_f
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
checks <- data.frame(
species = "S2",
truth = c(
obsMC$blObsContrs$S2[, "SC"],
obsMC$BVObsContrs$S2[-1, "SC", "DBV"]
),
estim = fitTmb$report$beta_SC_t
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
print("Check (co)variances of random genetic effects:")
checks <- data.frame(
ID = c("var(DBV)", "var(SBV)", "var(SIGV)", "cor(DBVxSBV)"),
truth = c(
truth$var_DBV["S1"],
truth$var_SBV["S1"],
truth$var_SIGV["S1"],
truth$cor_DBV_SBV["S1"]
),
estim = c(
fitTmb$report$vars_BV_f,
fitTmb$report$var_SIGV_f,
fitTmb$report$Cor_BV[1, 2]
)
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
print("Check (co)variances of residual errors:")
checks <- data.frame(
ID = c(
"var(err)_S1", "var(err)_S2", "cor(err)",
"var(err)_SC_S1", "var(err)_SC_S2"
),
truth = c(
truth$var_err_IC, truth$cor_err_IC,
truth$var_err_SC
),
estim = c(
fitTmb$report$vars_E_IC, fitTmb$report$Cor_E_IC[1, 2],
fitTmb$report$var_err_SC_f, fitTmb$report$var_err_SC_t
)
)
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(checks)
print(fitTmb$sry_sdr[grep("^log_sd|^unconstr_cor", rownames(fitTmb$sry_sdr)), ])
if (FALSE) {
print(paste0(
"AIC = ", round(fitTmb$AIC),
" (k = ", attr(fitTmb$AIC, "k"), ")"
))
}
print("Check random genetic effects of the focal species:")
checks <- data.frame(
type = c(
rep(c("DBV", "SBV", "SIGV"), each = nrow(truth$BV$S1)),
rep(c("BV_IC", "BV_SC"), each = length(truth$BV_IC$S1))
),
truth = c(
truth$BV$S1[levels(datW_IC$geno_S1), ],
truth$SIGVs$S1[levels(datW_IC$geno_S1)],
truth$BV_IC$S1,
truth$BV_SC$S1
),
estim = c(
fitTmb$sry_sdr[grep("^BV_f$", rownames(fitTmb$sry_sdr)), "Estimate"],
fitTmb$sry_sdr[grep("^SIGV_f$", rownames(fitTmb$sry_sdr)), "Estimate"],
fitTmb$report$BV_IC_f[names(truth$BV_IC$S1)],
fitTmb$report$BV_SC_f[names(truth$BV_SC$S1)]
)
)
checks$type <- factor(checks$type,
levels = c("BV_SC", "BV_IC", "DBV", "SBV", "SIGV"))
checks$nBE <- normBiasError(checks$estim, checks$truth)
print(tapply(1:nrow(checks), checks$type, function(idx) {
cor(checks$truth[idx], checks$esti[idx])
}))
p <- ggplot(checks) +
aes(x = estim, y = truth) +
geom_hline(yintercept = 0, linetype = "dotted") +
geom_vline(xintercept = 0, linetype = "dotted") +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
geom_point() +
labs(
title = "Results with both sole-crop and intercrop data",
subtitle = paste0("REML=", fitTmb$REML)
) +
theme_bw() +
facet_wrap(~type)
print(p)
}
## [1] "REML=TRUE"
## [1] "Check fixed effects:"
## species truth estim nBE
## 1 S1 32.0000000 32.4375254 1.3672668
## 2 S1 -0.8791112 -0.9980429 13.5286218
## 3 S1 0.3530473 0.6294462 78.2894929
## 4 S2 27.0000000 26.8649213 -0.5002914
## 5 S2 1.4664826 1.5686429 6.9663490
## 6 S2 -0.9739075 -1.1524156 18.3290531
## species truth estim nBE
## (Intercept) S1 65.1400649 65.292049 0.2333187
## blockB S1 -0.8957405 -1.042209 16.3516902
## species truth estim nBE
## (Intercept) S2 31.241459 31.241459 1.137179e-14
## blockB S2 1.112205 1.112205 0.000000e+00
## S2 -1.267077 -1.267077 1.401933e-13
## [1] "Check (co)variances of random genetic effects:"
## ID truth estim nBE
## 1 var(DBV) 27.040 30.3383826 12.1981605
## 2 var(SBV) 5.408 4.7672014 -11.8490872
## 3 var(SIGV) 13.520 20.8457794 54.1847591
## 4 cor(DBVxSBV) -0.900 -0.9021213 0.2357053
## [1] "Check (co)variances of residual errors:"
## ID truth estim nBE
## 1 var(err)_S1 4.577666 6.0977068 33.20560
## 2 var(err)_S2 0.975124 1.3634721 39.82550
## 3 cor(err) -0.200000 -0.4012402 100.62012
## 4 var(err)_SC_S1 17.382857 10.1436226 -41.64583
## 5 var(err)_SC_S2 2.468571 0.4305296 -82.55957
## Estimate Std. Error
## log_sd_BV_f 1.7062068 0.05693504
## log_sd_BV_f 0.7808797 0.07010394
## unconstr_cor_DS_f -2.0907488 0.39552046
## log_sd_E_IC 0.9039564 0.13514830
## log_sd_E_IC 0.1550172 0.14600358
## unconstr_cor_E_IC -0.4380482 0.19577748
## log_sd_SIGV_f 1.5185758 0.12991890
## log_sd_e_SC_f 1.1584226 0.20920567
## log_sd_e_SC_t -0.4213697 2.23124598
## [1] "Check random genetic effects of the focal species:"
## BV_SC BV_IC DBV SBV SIGV
## 0.8945796 NA 0.9358907 0.9299767 0.7146412
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_point()`).
See the article for more details:
t1 <- proc.time()
t1 - t0
## user system elapsed
## 118.789 0.273 119.032
print(sessionInfo(), locale = FALSE)
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-pc-linux-gnu
## Running under: Debian GNU/Linux 12 (bookworm)
##
## Matrix products: default
## BLAS: /usr/local/R/4.5.1/lib/R/lib/libRblas.so
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.11.0 LAPACK version 3.11.0
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] emmeans_1.11.1 plantmix_1.0.2 lme4_1.1-37 Matrix_1.7-3 ggplot2_3.5.2
## [6] knitr_1.50
##
## loaded via a namespace (and not attached):
## [1] sandwich_3.1-1 sass_0.4.10 generics_0.1.4 lattice_0.22-7
## [5] digest_0.6.37 magrittr_2.0.3 evaluate_1.0.4 grid_4.5.1
## [9] estimability_1.5.1 RColorBrewer_1.1-3 mvtnorm_1.3-3 fastmap_1.2.0
## [13] jsonlite_2.0.0 survival_3.8-3 multcomp_1.4-28 viridisLite_0.4.2
## [17] scales_1.4.0 TH.data_1.1-3 codetools_0.2-20 jquerylib_0.1.4
## [21] reformulas_0.4.1 Rdpack_2.6.4 cli_3.6.5 rlang_1.1.7
## [25] rbibutils_2.3 splines_4.5.1 withr_3.0.2 cachem_1.1.0
## [29] yaml_2.3.10 tools_4.5.1 nloptr_2.2.1 coda_0.19-4.1
## [33] minqa_1.2.8 dplyr_1.1.4 boot_1.3-31 vctrs_0.6.5
## [37] R6_2.6.1 zoo_1.8-14 lifecycle_1.0.5 MASS_7.3-65
## [41] pkgconfig_2.0.3 pillar_1.11.0 bslib_0.9.0 gtable_0.3.6
## [45] glue_1.8.0 Rcpp_1.1.0 xfun_0.52 tibble_3.3.0
## [49] tidyselect_1.2.1 dichromat_2.0-0.1 xtable_1.8-4 farver_2.1.2
## [53] htmltools_0.5.8.1 nlme_3.1-168 igraph_2.1.4 labeling_0.4.3
## [57] rmarkdown_2.29 TMB_1.9.17 compiler_4.5.1