Example Workflow for Single Cell Annotation Using CellMarker2.0

You can view an example script for this workflow by running the following command

file.show(system.file(package = 'easybio', 'example-single-cell.R'))

The example marker data from pbmc3k datasets:

library(easybio)
head(pbmc.markers)
#>               p_val avg_log2FC pct.1 pct.2     p_val_adj cluster  gene
#> RPS12 1.273332e-143  0.7387061 1.000 0.991 1.746248e-139       0 RPS12
#> RPS6  6.817653e-143  0.6934523 1.000 0.995 9.349729e-139       0  RPS6
#> RPS27 4.661810e-141  0.7372604 0.999 0.992 6.393206e-137       0 RPS27
#> RPL32 8.158412e-138  0.6266075 0.999 0.995 1.118845e-133       0 RPL32
#> RPS14 5.177478e-130  0.6336957 1.000 0.994 7.100394e-126       0 RPS14
#> RPS25 3.244898e-123  0.7689940 0.997 0.975 4.450053e-119       0 RPS25
(marker <- matchCellMarker2(marker = pbmc.markers, n = 50, spc = 'Human')[, head(.SD, 2), by=cluster])
#> Key: <cluster>
#>     cluster                        cell_name uniqueN     N
#>      <fctr>                           <char>   <int> <int>
#>  1:       0                Naive CD8+ T cell       6    34
#>  2:       0                Naive T(Th0) cell       3    32
#>  3:       1                         Monocyte       9   133
#>  4:       1                       Macrophage       8    63
#>  5:       2          Regulatory T(Treg) cell      11   148
#>  6:       2                           T cell      11    82
#>  7:       3                           B cell       9   317
#>  8:       3                     Naive B cell       6    33
#>  9:       4                           T cell      15   104
#> 10:       4              Natural killer cell      17    99
#> 11:       5                       Macrophage       4    34
#> 12:       5                         Monocyte       3    10
#> 13:       6              Natural killer cell      14   196
#> 14:       6                 Cytotoxic T cell       4    24
#> 15:       7 Plasmacytoid dendritic cell(pDC)       8    42
#> 16:       7                   Dendritic cell       6    38
#> 17:       8                    Megakaryocyte       9    52
#> 18:       8                 Endothelial cell       6    41
#>                                   ordered_symbol                      orderN
#>                                           <list>                      <list>
#>  1:               CCR7,LEF1,CD8B,MAL,NELL2,TSHZ2           14,12, 2, 2, 2, 2
#>  2:                              CCR7,LEF1,LRRN3                    23, 8, 1
#>  3: CD14,S100A8,S100A9,S100A12,FCGR1A,MS4A6A,...       82,22,15, 5, 4, 2,...
#>  4:    CD14,FCGR1A,CCL2,PLA2G7,RNASE1,S100A8,...       46, 6, 2, 2, 2, 2,...
#>  5:  FOXP3,IL2RA,CTLA4,TNFRSF4,TNFRSF18,ICOS,...       55,45,22, 7, 6, 4,...
#>  6:        CD2,CTLA4,FOXP3,IL2RA,CD40LG,CCR6,...       32,12, 8, 7, 6, 4,...
#>  7:       CD79A,CD19,MS4A1,FCER2,TCL1A,IGLL5,... 102, 97, 97,  6,  5,  3,...
#>  8:           TCL1A,MS4A1,CD19,FCER2,CD79A,PCDH9           13, 6, 5, 5, 3, 1
#>  9:           CD8A,CD8B,GZMK,TIGIT,CCL5,GZMA,...       38,10, 7, 7, 6, 6,...
#> 10:          NKG7,KLRB1,GZMA,CCL5,CD160,CD8A,...       50, 9, 6, 5, 5, 3,...
#> 11:                       C1QA,C1QB,MS4A7,MS4A4A                 13,10, 7, 4
#> 12:                              MS4A7,C1QB,C1QA                       7,2,1
#> 13:          NCAM1,GNLY,KLRF1,GZMB,NCR1,XCL1,...       61,42,25,16,14,11,...
#> 14:                        PRF1,GZMB,GNLY,FGFBP2                     9,8,6,1
#> 15:  CLEC4C,LILRA4,SCT,LAMP5,LRRC26,SERPINF1,...       19,16, 2, 1, 1, 1,...
#> 16:       FCER1A,CLEC10A,LILRA4,FLT3,CD1E,CLEC4C           16,11, 4, 3, 2, 2
#> 17:           PPBP,PF4,ITGA2B,GP9,MYL9,TUBB1,...       15,12, 9, 4, 4, 3,...
#> 18:         CLDN5,ESAM,GNG11,LCN2,SERPINE1,SPARC           36, 1, 1, 1, 1, 1
#>                                          markerWith
#>                                              <list>
#>  1:               LEF1,CCR7,MAL,LEF1,TSHZ2,CCR7,...
#>  2:              CCR7,CCR7,LRRN3,CCR7,CCR7,CCR7,...
#>  3:       S100A9,S100A8,CD14,S100A8,S100A9,CD14,...
#>  4:               CD14,CD14,CD14,CD14,CD14,CD14,...
#>  5:    CTLA4,TNFRSF4,IL2RA,TNFRSF18,FOXP3,FOXP3,...
#>  6:         IL2RA,IL2RA,CD40LG,CD40LG,CD2,CTLA4,...
#>  7:           MS4A1,CD19,CD79A,CD79A,CD19,MS4A1,...
#>  8:         MS4A1,TCL1A,PCDH9,CD79A,TCL1A,FCER2,...
#>  9:               CD8A,CD8A,CD8A,GZMA,CD8B,CD8A,...
#> 10:              NKG7,NKG7,KLRB1,CD8A,NKG7,NKG7,...
#> 11:             MS4A7,C1QB,C1QA,MS4A7,C1QA,C1QB,...
#> 12:          MS4A7,MS4A7,MS4A7,MS4A7,MS4A7,C1QB,...
#> 13:           GNLY,NCAM1,NCAM1,KLRF1,GNLY,NCAM1,...
#> 14:             PRF1,GZMB,FGFBP2,GNLY,GZMB,PRF1,...
#> 15: CLEC4C,LILRA4,SERPINF1,CLEC4C,LILRA4,LILRA4,...
#> 16: FCER1A,CLEC10A,FCER1A,LILRA4,CLEC10A,FCER1A,...
#> 17:            PF4,PPBP,SPARC,PPBP,ITGA2B,TUBB1,...
#> 18:         CLDN5,CLDN5,CLDN5,CLDN5,SPARC,CLDN5,...
plotPossibleCell(marker)

Explanation:

To annotate, you can simply use the top-matched cell type:

cl2cell <- marker[, head(.SD, 1), by = .(cluster)]
cl2cell <- setNames(cl2cell[["cell_name"]], cl2cell[["cluster"]])
cl2cell
#>                                  0                                  1 
#>                "Naive CD8+ T cell"                         "Monocyte" 
#>                                  2                                  3 
#>          "Regulatory T(Treg) cell"                           "B cell" 
#>                                  4                                  5 
#>                           "T cell"                       "Macrophage" 
#>                                  6                                  7 
#>              "Natural killer cell" "Plasmacytoid dendritic cell(pDC)" 
#>                                  8 
#>                    "Megakaryocyte"

Visualize marker dot plots for similar clusters:

cls <- list(
  c(1, 5, 7), 
  c(8),
  c(3),
  c(0,2, 4, 6)
)
dotplotList <- plotSeuratDot(seuratObject, cls, marker = pbmc.markers, n = 50, spc = 'Human', topcellN = 2)

Explanation:

Construct a named vector for annotation:

cl2cell <- finsert(
  expression(
  c(1, 5) == "Monocyte",
  c(7) == "DC",
  c(8) == "megakaryocyte",
  c(3) == "B.cell",
  c(0, 2) == "Naive.CD8.T.cell",
  c(4) == "Cytotoxic.T.Cell",
  c(6) == "Natural.killer.cell",
), len = 9)
cl2cell
#>                     0                     1                     2 
#>    "Naive.CD8.T.cell"            "Monocyte"    "Naive.CD8.T.cell" 
#>                     3                     4                     5 
#>              "B.cell"    "Cytotoxic.T.Cell"            "Monocyte" 
#>                     6                     7                     8 
#> "Natural.killer.cell"                  "DC"       "megakaryocyte"

You can also directly retrieve markers:

get_marker(spc = 'Human', cell = c('Monocyte', 'Neutrophil'), number = 5, min.count = 1)
#> $Monocyte
#> [1] "CD14"   "FCGR3A" "LYZ"    "S100A8" "FCN1"  
#> 
#> $Neutrophil
#> [1] "FCGR3B" "S100A9" "CSF3R"  "S100A8" "FCGR3A"

or Check the distribution of the marker directly:

plotMarkerDistribution(mkr = "CD68")