This vignette shows how to use SignacX with Seurat and SPRING to learn a new cell type category from single cell data.
We start with CITE-seq data that were already classified with SignacX using the SPRING pipeline.
library(Seurat)
library(SignacX)
Load CITE-seq data from 10X Genomics processed with SPRING and classified with SignacX already.
# load CITE-seq data
= './CITESEQ_EXPLORATORY_CITESEQ_5K_PBMCS/FullDataset_v1_protein'
data.dir = CID.LoadData(data.dir = data.dir)
E
# Load labels
= rjson::fromJSON(file=paste0(data.dir,'/categorical_coloring_data.json')) json_data
Create a Seurat object for the protein expression data; we will use this as a reference.
# separate protein and gene expression data
= grepl("Total", rownames(E))
logik = E[logik,]
P = E[!logik,]
E
# CLR normalization in Seurat
colnames(P) <- 1:ncol(P)
colnames(E) <- 1:ncol(E)
<- CreateSeuratObject(E)
reference "ADT"]] <- CreateAssayObject(counts = P)
reference[[<- NormalizeData(reference, assay = "ADT", normalization.method = "CLR") reference
Identify CD56 bright NK cells based on protein expression data.
# generate labels
= json_data$CellStates$label_list
lbls != "NK"] = "Unclassified"
lbls[lbls = reference@assays$ADT@counts[rownames(reference@assays$ADT@counts) == "CD16-TotalSeqB-CD16",]
CD16 = reference@assays$ADT@counts[rownames(reference@assays$ADT@counts) == "CD56-TotalSeqB-CD56",]
CD56 = log2(CD56) > 10 & log2(CD16) < 7.5 & lbls == "NK"; sum(logik)
logik = "NK.CD56bright" lbls[logik]
Generate a training data set from the reference data and save it for later use. Note:
# generate bootstrapped single cell data
= SignacBoot(E = E, spring.dir = data.dir, L = c("NK", "NK.CD56bright"), labels = lbls, logfc.threshold = 1)
R_learned
# save the training data
save(R_learned, file = "training_NKBright_v207.rda")
Load expression data for a different data set (this was also previously processed through SPRING and SignacX)
# Classify another data set with new model
# load new data
= "./PBMCs_5k_10X/FullDataset_v1"
new.data.dir = CID.LoadData(data.dir = new.data.dir)
E # load cell types identified with Signac
= rjson::fromJSON(file=paste0(new.data.dir,'/categorical_coloring_data.json')) json_data
Generate new labels. Note:
# generate new labels
= Signac(E = E, R = R_learned, spring.dir = new.data.dir) cr_learned
Now we amend the existing labels (classified previously with SignacX); we add the new labels and generate a new SPRING layout.Note:
# modify the existing labels
= lapply(json_data, function(x) x$label_list)
cr = cr$CellStates == 'NK'
logik $CellStates[logik] = cr_learned[logik]
cr= cr$CellStates_novel == 'NK'
logik $CellStates_novel[logik] = cr_learned[logik]
cr= paste0(new.data.dir, "_Learned") new.data.dir
Save results
# save
= CID.writeJSON(cr, spring.dir = new.data.dir, new_colors = c('red'), new_populations = c( 'NK.CD56bright')) dat
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_4.0.3 magrittr_2.0.1 formatR_1.7 htmltools_0.5.1.1
## [5] tools_4.0.3 yaml_2.2.1 stringi_1.5.3 rmarkdown_2.6
## [9] highr_0.8 knitr_1.30 stringr_1.4.0 digest_0.6.27
## [13] xfun_0.20 rlang_0.4.10 evaluate_0.14