Project definition

We organize the data and the results

fcs_dir = "c:/demo/200205-atelier/CLEAN_DATA"
res_dir = "c:/demo/200205-atelier/CLEAN_DATA_results"
marker_file = "ck_markers_main.txt"
if (!dir.exists(res_dir)) dir.create(res_dir)

FCS Overview

library(flowCore)

# read the FCS as a flowset, ie a group of compatible FCS
fs = read.flowSet(path = fcs_dir, pattern = "*.fcs", transformation = FALSE, truncate_max_range = FALSE)

# view the flowset
fs
## A flowSet with 6 experiments.
## 
##   column names:
##   129 131 132 133 134 136 137 138 139 140 146 151 155 157 161 169 172 173 CCR4-149 CCR5-144 CCR7-159 CD16-165 CD19-142 CD20-147 CD27-167 CD28-160 CD3-170 CD33-158 CD38-148 CD4-145 CD43-150 CD44-166 CD45-154 CD45RA-153 CD45RO-164 CD56-176 CD69-162 CD8-168 CXCR3-156 CXCR5-171 Cell_length Cisplatin-195 HLA-DR-163 ICOS-141 NA-191 NA-193 PD-1-174 PD-L1-175 TCRgd-152 TIM3-143 Time
# view annotations
pData(fs)
##                      name
## clean_D1.fcs clean_D1.fcs
## clean_D2.fcs clean_D2.fcs
## clean_D3.fcs clean_D3.fcs
## clean_P1.fcs clean_P1.fcs
## clean_P2.fcs clean_P2.fcs
## clean_P3.fcs clean_P3.fcs
# here, none

# view the cell counts
fsApply(fs, nrow)
##               [,1]
## clean_D1.fcs 15608
## clean_D2.fcs 16075
## clean_D3.fcs 15340
## clean_P1.fcs 15105
## clean_P2.fcs 14724
## clean_P3.fcs 15469
# view a specific FCS
ff = fs[[1]]  # extract 1st FCS as flowframe
# view some keywords
keyword(ff, "$CYT")  # cytometer
## $`$CYT`
## NULL
keyword(ff, "$GUID")
## $`$GUID`
## NULL
keyword(ff, "$FIL")  # file name
## $`$FIL`
## [1] "clean_D1.fcs"
keyword(ff, "$TOT")  # cell count
## $`$TOT`
## [1] "15608"
keyword(ff, "$COM")  # comment
## $`$COM`
## [1] "FCS file exported using Cytobank"
# view all keywords
kwd = keyword(ff)
length(kwd)  # there are too many
## [1] 378
# filter out some keywords
kwd_1 = kwd[!grepl("flowCore", names(kwd))]  # not flowcore
kwd_2 = kwd_1[-grep("^\\$P", names(kwd_1))]  # not standard parameters
#kwd_2  # display remaining keywords
unlist(kwd_2)
##                                       FCSversion 
##                                              "3" 
##                                             $FIL 
##                                   "clean_D1.fcs" 
##                                             $TOT 
##                                          "15608" 
##                                         $BYTEORD 
##                                        "4,3,2,1" 
##                                        $DATATYPE 
##                                              "F" 
##                                   FJ_FCS_VERSION 
##                                              "3" 
##                                   $BEGINANALYSIS 
##                                              "0" 
##                                     $ENDANALYSIS 
##                                              "0" 
##                                      $BEGINSTEXT 
##                                              "0" 
##                                        $ENDSTEXT 
##                                              "0" 
##                                        $NEXTDATA 
##                                              "0" 
##                                            $MODE 
##                                              "L" 
##                                             $COM 
##               "FCS file exported using Cytobank" 
##                                               ID 
##                                             "D1" 
##                                       $BEGINDATA 
##                                           "5014" 
##                                         $ENDDATA 
##                                        "3189045" 
##                                         FILENAME 
## "c:/demo/200205-atelier/CLEAN_DATA/clean_D1.fcs" 
##                                             GUID 
##                                   "clean_D1.fcs" 
##                                     ORIGINALGUID 
##                                   "clean_D1.fcs" 
##                                    GUID.original 
##                                   "clean_D1.fcs"

cytofkit analysis

Run by hand

library(cytofkit)
# Launch the Graphical User Interface for tuning the run
cytofkit_GUI()
# Note the path to the result file

# Launch the Shiny interface to view and annotate the analysis
cytofkitShinyAPP()
# Launch the Shiny interface using a defined path
analysis_file = "c:/demo/200205-atelier/CLEAN_DATA_results/run_5k/run_5k.RData"
if (file.exists(analysis_file))
  cytofkitShinyAPP(analysis_file)

Script version for tuning parameters

Cytofast post-analysis

Main configuration

# Tell where the FCS files of cytofkit are
analysis_fcs_dir = file.path(res_dir, "run_5k", "run_5k_analyzedFCS")
stopifnot(dir.exists(analysis_fcs_dir))
selected_clustering_method = "FlowSOM_clusterIDs"
#selected_clustering_method = "Rphenograph_clusterIDs"

Read Cytofkit FCS export

# Load libraries
library(flowCore)
library(cytofast)

# Load cytofkit FCS files
cfData = readCytofkitFCS(analysis_fcs_dir, clusterID = selected_clustering_method)
## Reading .FCS sample: 1
## Reading .FCS sample: 2
## Reading .FCS sample: 3
## Reading .FCS sample: 4
## Reading .FCS sample: 5
## Reading .FCS sample: 6
# cfData = readCytofkitFCS("demo/cytofkit_demo_analyzedFCS", clusterID = "Rphenograph_clusterIDs")
table(cfData@expr$clusterID)
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
##  107 2834  613 2471  256  194 1027  613  914  995   54 4352  943  207  527  229 
##   17   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32 
##   44  347   44   38  352  118  123  461  824  624  255 4083  798   58  139  449 
##   33   34   35   36   37   38   39   40 
##  331 1490  428  942  998  272  192  254

Optionnally, do some channel cleaning

# Remove unneeded channels
keep <- colnames(cfData@expr)  # all
keep <- keep[!grepl("^(Time|Event_length|Cell_length)", keep, ignore.case = TRUE)]
keep <- keep[!grepl("^(Cisplatin)", keep, ignore.case = TRUE)]
keep <- keep[!grepl("(_ADN1|_ADN2)", keep, ignore.case = TRUE)]
keep <- keep[!grepl("^(NA\\.)", keep, ignore.case = TRUE)]
keep <- keep[!grepl("^X\\d+", keep) | grepl("_", keep)]  # unannotated X channel
cfData@expr <- cfData@expr[, keep]

# Rename channels
colnames(cfData@expr) <- gsub("(^X\\d+.+?_)", "", keep)  # remove metal tag
colnames(cfData@expr) <- gsub("(\\.\\d+.$)", "", keep)  # remove metal tag

# Manual ordering of channels
first <- c("CD20", "CD19", "CD3", "CD4", "CD8", "TCRgd", "CD56", "CD16", "CD45RA", "CD95", "CD127")
first <- intersect(first, colnames(cfData@expr))

keep <- colnames(cfData@expr)  # all
cfData@expr <- cfData@expr[, c(keep[1:2], first, setdiff(keep[-(1:2)], first))]

Read meta data that group FCS samples

# To compute statistical tests, we need to define groups
# Here are the un-annotated samples
cfData@samples
##          sampleID           FCSfilename
## clean_D1 clean_D1 cytofkit_clean_D1.fcs
## clean_D2 clean_D2 cytofkit_clean_D2.fcs
## clean_D3 clean_D3 cytofkit_clean_D3.fcs
## clean_P1 clean_P1 cytofkit_clean_P1.fcs
## clean_P2 clean_P2 cytofkit_clean_P2.fcs
## clean_P3 clean_P3 cytofkit_clean_P3.fcs
# Either we fill the annotation using an Excel template
# the template consists in a first column of sample identifiers
# and a second column called status
meta <- data.frame(cfData@samples, status = "")
# The template is written on disk
if (!file.exists("meta.csv")) {
  write.csv(meta, "meta.csv")
}
# Now you can annotate the template using Excel
# and store it as CSV file keeping the original format

# Either we fill meta data programmatically
meta$status <- c(rep(c("D", "P"), each = 3))
write.csv(meta, "meta.csv")

# Fill the template using Excel or so

# Import annotation
if (file.exists("meta.csv")) {
  meta <- read.csv("meta.csv", row.names = 1)
}
# meta <- meta[match(row.names(cfData@samples), meta[,"sampleID"]),] # match sampleID
cfData@samples <- cbind.data.frame(cfData@samples, meta[, -1, drop = FALSE])
cfData@samples
##          sampleID           FCSfilename           FCSfilename status
## clean_D1 clean_D1 cytofkit_clean_D1.fcs cytofkit_clean_D1.fcs      D
## clean_D2 clean_D2 cytofkit_clean_D2.fcs cytofkit_clean_D2.fcs      D
## clean_D3 clean_D3 cytofkit_clean_D3.fcs cytofkit_clean_D3.fcs      D
## clean_P1 clean_P1 cytofkit_clean_P1.fcs cytofkit_clean_P1.fcs      P
## clean_P2 clean_P2 cytofkit_clean_P2.fcs cytofkit_clean_P2.fcs      P
## clean_P3 clean_P3 cytofkit_clean_P3.fcs cytofkit_clean_P3.fcs      P

Transform expression values, in the same way you display marker intensity

# Transform expression with asinh( x / 5 )
cfData@expr[,-(1:2)] <- asinh(cfData@expr[,-(1:2)]/5)

Works with cell counts

# Retrieve cell counts
cfData <- cellCounts(cfData)
head(cfData@counts)
##           1   2   3   4  5  6   7   8   9  10 11  12  13 14 15 16 17 18 19 20
## clean_D1  0 227  66 207 43  9 130  40  88 113  6 956 146 43 92 46  9 79  5  5
## clean_D2  0 247  55 205 41 16 138  40  85 103  4 939 167 61 84 77  8 76  1  6
## clean_D3  1 254  53 242 41 12 116  50  88 110 12 932 165 69 88 70 12 60  3  8
## clean_P1 33 687 152 628 35 50 211 160 208 238 11 517 142 14 86 13  6 44 15  6
## clean_P2 46 701 124 580 40 53 211 156 204 224 10 499 177  5 85  8  5 44  6  6
## clean_P3 27 718 163 609 56 54 221 167 241 207 11 509 146 15 92 15  4 44 14  7
##          21 22 23 24  25  26 27  28  29 30 31  32 33  34  35  36  37 38 39 40
## clean_D1 86 24 29 84 233 175 56 901 204  1 30 120 37 346 106 101 114 16 19  8
## clean_D2 95 20 17 55 187 176 73 890 205  0 40 132 42 357 114 100 111 11 20  2
## clean_D3 97 10 21 74 186 186 46 846 202  1 29 109 37 407 114 104  94 25 25  1
## clean_P1 26 25 11 93  68  28 30 474  59 14 13  30 66 129  30 212 230 83 47 76
## clean_P2 19 25 23 78  85  24 27 525  61 27 15  32 81 132  42 220 217 71 29 83
## clean_P3 29 14 22 77  65  35 23 447  67 15 12  26 68 119  22 205 232 66 52 84
# View(cfData@counts)
cytoHeatmaps(cfData, group="status", legend=TRUE)

Cell counts need to be transformed

# Store raw counts
cellCountRaw <- cfData@counts

# Transform cell counts with log2( x + floor )
cfData@counts <- log2(cfData@counts+10)
# Center
cfData@counts <- sweep(cfData@counts, 2, colMeans(cfData@counts), "-")
round(head(cfData@counts))
##           1  2  3  4 5  6 7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## clean_D1 -1 -1  0 -1 0 -1 0 -1 -1  0  0  0  0  1  0  0  0  0  0  0  1  0  0  0
## clean_D2 -1 -1 -1 -1 0 -1 0 -1 -1 -1  0  0  0  1  0  1  0  0 -1  0  1  0  0  0
## clean_D3 -1 -1 -1 -1 0 -1 0 -1 -1  0  0  0  0  1  0  1  0  0  0  0  1 -1  0  0
## clean_P1  1  1  1  1 0  1 0  1  1  1  0  0  0 -1  0 -1  0  0  1  0 -1  0 -1  0
## clean_P2  1  1  0  1 0  1 0  1  1  0  0  0  0 -1  0 -1  0  0  0  0 -1  0  0  0
## clean_P3  1  1  1  1 0  1 0  1  1  0  0  0  0 -1  0 -1  0  0  1  0 -1  0  0  0
##          25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## clean_D1  1  1  0  0  1 -1  0  1  0  1  1 -1  0 -1  0 -1
## clean_D2  1  1  1  0  1 -1  1  1  0  1  1 -1  0 -1  0 -2
## clean_D3  1  1  0  0  1 -1  0  1  0  1  1  0 -1  0  0 -2
## clean_P1 -1 -1  0  0 -1  0  0 -1  0 -1 -1  0  1  1  1  1
## clean_P2  0 -1  0  0 -1  1  0 -1  1 -1  0  1  0  1  0  1
## clean_P3 -1 -1 -1 -1 -1  1 -1 -1  0 -1 -1  0  1  1  1  1
cytoHeatmaps(cfData, group="status", legend=TRUE)

Let’s take status D as reference

# Center to D
cfData@counts <- sweep(cfData@counts, 2, colMeans(cfData@counts[cfData@samples$status == "D",]), "-")
round(head(cfData@counts)*10)
##           1  2  3  4  5  6  7  8  9 10 11 12 13  14 15  16 17 18 19 20  21 22
## clean_D1  0 -1  2 -1  0 -2  0 -1  0  1 -1  0 -1  -3  1  -4  0  1  2 -1  -1  3
## clean_D2  0  0 -1 -1  0  2  1 -1  0 -1 -3  0  1   1 -1   3 -1  1 -2  0   0  1
## clean_D3  1  1 -1  1  0  0 -1  2  0  0  4  0  0   2  0   1  2 -2  0  1   1 -5
## clean_P1 21 15 13 15 -2 14  7 17 12 11  3 -9 -2 -15  0 -17 -3 -6 10  0 -15  4
## clean_P2 24 15 10 14  0 15  7 16 11 10  2 -9  1 -22  0 -20 -4 -6  3  0 -18  4
## clean_P3 18 15 14 14  4 15  7 17 14  9  3 -9 -1 -14  1 -15 -5 -6  9  1 -14 -2
##          23 24  25  26  27  28  29 30  31  32 33  34  35 36 37 38 39 40
## clean_D1  3  2   2   0   0   0   0  0  -1   0  0  -1  -1  0  1  0 -1  4
## clean_D2 -2 -3  -1   0   3   0   0 -1   2   1  1   0   0  0  1 -3 -1 -2
## clean_D3  0  1  -1   1  -3  -1   0  0  -1  -1  0   1   0  0 -2  4  2 -3
## clean_P1 -6  4 -14 -23  -8  -9 -16 12  -9 -17  6 -14 -16 10 10 18  9 27
## clean_P2  0  1 -12 -25  -9  -7 -16 18  -8 -16  9 -14 -12 10 10 16  3 28
## clean_P3  0  1 -15 -21 -10 -10 -15 12 -10 -19  7 -16 -19  9 11 15 10 28
cytoHeatmaps(cfData, group="status", legend=TRUE)

Box plots

# Detailed view of counts aka percentages
cytoBoxplots(cfData, group = "status")

Functional marker histograms

# Detailed of functional markers
msiPlot(cfData, markers = c("CD8", "CD4"), byGroup='status')

Do some statistics

# Add t.test
cfData <- cytottest(cfData, group="status", adjustMethod = "fdr")
head(cfData@results)
##   clusters        mean_D     mean_P       pvalue    adjusted
## 1        1 -7.401487e-17 2.11326174 0.0044446607 0.009357180
## 2        2 -3.700969e-17 1.49591777 0.0002350696 0.001880557
## 3        3  0.000000e+00 1.19766406 0.0013465276 0.004896464
## 4        4 -3.700743e-17 1.43625178 0.0006430074 0.003462525
## 5        5 -2.312965e-18 0.03577742 0.8487521500 0.893423316
## 6        6 -3.700433e-17 1.49212389 0.0057601568 0.010344499
# Add some columns
# fold is a multiplicative coefficient applied to the reference
# fold is negative to stand for 1/fold
cfData@results <- within(cfData@results, {
  diff <- mean_P - mean_D
  fold <- sign(diff) * round(2^abs(diff), 2)
  unselected <- abs(diff) < 0.5 | pvalue > 0.05
  label <- clusters
  label[unselected] <- NA
  })

View the Volcano plot

library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:cytofast':
## 
##     expr
library(ggrepel)
set.seed(42)

p <- ggplot(cfData@results, aes(diff, -log10(pvalue))) + geom_point() + geom_hline(yintercept = -log10(c(0.05, 1))) + geom_vline(xintercept = c(-0.5, 0 , 0.5)) + ggtitle("Volcano Plot, diff = Log2 Fold Change of percentages")
p + geom_text_repel(aes(label = label))
## Warning: Removed 11 rows containing missing values (geom_text_repel).

Alternate scale

p <- ggplot(cfData@results, aes(2^diff, -log10(pvalue))) + geom_point() + geom_hline(yintercept = -log10(c(0.05, 1))) + geom_vline(xintercept = c(1/1.5, 1 , 1.5)) + ggtitle("Volcano Plot, diff = Log2 Fold Change of percentages") + scale_x_continuous(trans = "log2")
p + geom_text_repel(aes(label = label))
## Warning: Removed 11 rows containing missing values (geom_text_repel).

and the table of counts

library(knitr)
kable(
  cbind(cfData@results[with(cfData@results, !unselected),], t(
    cellCountRaw[,with(cfData@results, !unselected)]))[,-c(1:3,6:7)]
)
pvalue adjusted fold diff clean_D1 clean_D2 clean_D3 clean_P1 clean_P2 clean_P3
1 0.0044447 0.0093572 4.33 2.1132617 0 0 1 33 46 27
2 0.0002351 0.0018806 2.82 1.4959178 227 247 254 687 701 718
3 0.0013465 0.0048965 2.29 1.1976641 66 55 53 152 124 163
4 0.0006430 0.0034625 2.71 1.4362518 207 205 242 628 580 609
6 0.0057602 0.0103445 2.81 1.4921239 9 16 12 50 53 54
7 0.0054537 0.0103445 1.63 0.7038460 130 138 116 211 211 221
8 0.0012854 0.0048965 3.22 1.6858110 40 40 50 160 156 167
9 0.0025327 0.0066377 2.34 1.2273065 88 85 88 208 204 241
10 0.0003242 0.0021612 1.96 0.9721761 113 103 110 238 224 207
12 0.0000026 0.0001030 -1.84 -0.8776517 956 939 932 517 499 509
14 0.0059481 0.0103445 -3.21 -1.6819130 43 61 69 14 5 15
16 0.0027074 0.0066377 -3.35 -1.7449611 46 77 70 13 8 15
18 0.0321336 0.0428447 -1.50 -0.5888729 79 76 60 44 44 44
21 0.0028210 0.0066377 -2.98 -1.5757889 86 95 97 26 19 29
25 0.0007619 0.0034625 -2.57 -1.3591659 233 187 186 68 85 65
26 0.0017456 0.0058185 -4.88 -2.2860022 175 176 186 28 24 35
27 0.0182757 0.0261081 -1.85 -0.8836710 56 73 46 30 27 23
28 0.0021898 0.0066377 -1.81 -0.8560822 901 890 846 474 525 447
29 0.0007791 0.0034625 -2.96 -1.5641741 204 205 202 59 61 67
30 0.0157821 0.0233808 2.64 1.3991602 1 0 1 14 27 15
31 0.0071710 0.0119516 -1.83 -0.8747789 30 40 29 13 15 12
32 0.0000683 0.0009111 -3.31 -1.7275874 120 132 109 30 32 26
33 0.0031106 0.0069124 1.67 0.7438357 37 42 37 66 81 68
34 0.0002145 0.0018806 -2.78 -1.4731193 346 357 407 129 132 119
35 0.0142354 0.0219006 -2.99 -1.5813353 106 114 114 30 42 22
36 0.0000561 0.0009111 1.99 0.9931444 101 100 104 212 220 205
37 0.0026399 0.0066377 2.04 1.0262677 114 111 94 230 217 232
38 0.0087517 0.0140028 3.11 1.6349654 16 11 25 83 71 66
40 0.0049719 0.0099439 6.81 2.7685644 8 2 1 76 83 84