JT AFC 05/02/2020 - cytofkit / cytofast

Project definition
FCS Overview
cytofkit analysis
Cytofast post-analysis

Project definition

We organize the data and the results

fcs_dir = "c:/demo/200205-atelier/CLEAN_DATA"
res_dir = "c:/demo/200205-atelier/CLEAN_DATA_results"
marker_file = "ck_markers_main.txt"
if (!dir.exists(res_dir)) dir.create(res_dir)

FCS Overview

library(flowCore)

# read the FCS as a flowset, ie a group of compatible FCS
fs = read.flowSet(path = fcs_dir, pattern = "*.fcs", transformation = FALSE, truncate_max_range = FALSE)

# view the flowset
fs

## A flowSet with 6 experiments.
## 
##   column names:
##   129 131 132 133 134 136 137 138 139 140 146 151 155 157 161 169 172 173 CCR4-149 CCR5-144 CCR7-159 CD16-165 CD19-142 CD20-147 CD27-167 CD28-160 CD3-170 CD33-158 CD38-148 CD4-145 CD43-150 CD44-166 CD45-154 CD45RA-153 CD45RO-164 CD56-176 CD69-162 CD8-168 CXCR3-156 CXCR5-171 Cell_length Cisplatin-195 HLA-DR-163 ICOS-141 NA-191 NA-193 PD-1-174 PD-L1-175 TCRgd-152 TIM3-143 Time

# view annotations
pData(fs)

##                      name
## clean_D1.fcs clean_D1.fcs
## clean_D2.fcs clean_D2.fcs
## clean_D3.fcs clean_D3.fcs
## clean_P1.fcs clean_P1.fcs
## clean_P2.fcs clean_P2.fcs
## clean_P3.fcs clean_P3.fcs

# here, none

# view the cell counts
fsApply(fs, nrow)

##               [,1]
## clean_D1.fcs 15608
## clean_D2.fcs 16075
## clean_D3.fcs 15340
## clean_P1.fcs 15105
## clean_P2.fcs 14724
## clean_P3.fcs 15469

# view a specific FCS
ff = fs[[1]]  # extract 1st FCS as flowframe
# view some keywords
keyword(ff, "$CYT")  # cytometer

## $`$CYT`
## NULL

keyword(ff, "$GUID")

## $`$GUID`
## NULL

keyword(ff, "$FIL")  # file name

## $`$FIL`
## [1] "clean_D1.fcs"

keyword(ff, "$TOT")  # cell count

## $`$TOT`
## [1] "15608"

keyword(ff, "$COM")  # comment

## $`$COM`
## [1] "FCS file exported using Cytobank"

# view all keywords
kwd = keyword(ff)
length(kwd)  # there are too many

## [1] 378

# filter out some keywords
kwd_1 = kwd[!grepl("flowCore", names(kwd))]  # not flowcore
kwd_2 = kwd_1[-grep("^\\$P", names(kwd_1))]  # not standard parameters
#kwd_2  # display remaining keywords
unlist(kwd_2)

##                                       FCSversion 
##                                              "3" 
##                                             $FIL 
##                                   "clean_D1.fcs" 
##                                             $TOT 
##                                          "15608" 
##                                         $BYTEORD 
##                                        "4,3,2,1" 
##                                        $DATATYPE 
##                                              "F" 
##                                   FJ_FCS_VERSION 
##                                              "3" 
##                                   $BEGINANALYSIS 
##                                              "0" 
##                                     $ENDANALYSIS 
##                                              "0" 
##                                      $BEGINSTEXT 
##                                              "0" 
##                                        $ENDSTEXT 
##                                              "0" 
##                                        $NEXTDATA 
##                                              "0" 
##                                            $MODE 
##                                              "L" 
##                                             $COM 
##               "FCS file exported using Cytobank" 
##                                               ID 
##                                             "D1" 
##                                       $BEGINDATA 
##                                           "5014" 
##                                         $ENDDATA 
##                                        "3189045" 
##                                         FILENAME 
## "c:/demo/200205-atelier/CLEAN_DATA/clean_D1.fcs" 
##                                             GUID 
##                                   "clean_D1.fcs" 
##                                     ORIGINALGUID 
##                                   "clean_D1.fcs" 
##                                    GUID.original 
##                                   "clean_D1.fcs"

cytofkit analysis

Run by hand

library(cytofkit)
# Launch the Graphical User Interface for tuning the run
cytofkit_GUI()
# Note the path to the result file

# Launch the Shiny interface to view and annotate the analysis
cytofkitShinyAPP()

# Launch the Shiny interface using a defined path
analysis_file = "c:/demo/200205-atelier/CLEAN_DATA_results/run_5k/run_5k.RData"
if (file.exists(analysis_file))
  cytofkitShinyAPP(analysis_file)

Script version for tuning parameters

Cytofast post-analysis

Main configuration

# Tell where the FCS files of cytofkit are
analysis_fcs_dir = file.path(res_dir, "run_5k", "run_5k_analyzedFCS")
stopifnot(dir.exists(analysis_fcs_dir))
selected_clustering_method = "FlowSOM_clusterIDs"
#selected_clustering_method = "Rphenograph_clusterIDs"

Read Cytofkit FCS export

# Load libraries
library(flowCore)
library(cytofast)

# Load cytofkit FCS files
cfData = readCytofkitFCS(analysis_fcs_dir, clusterID = selected_clustering_method)

## Reading .FCS sample: 1

## Reading .FCS sample: 2

## Reading .FCS sample: 3

## Reading .FCS sample: 4

## Reading .FCS sample: 5

## Reading .FCS sample: 6

# cfData = readCytofkitFCS("demo/cytofkit_demo_analyzedFCS", clusterID = "Rphenograph_clusterIDs")
table(cfData@expr$clusterID)

## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
##  107 2834  613 2471  256  194 1027  613  914  995   54 4352  943  207  527  229 
##   17   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32 
##   44  347   44   38  352  118  123  461  824  624  255 4083  798   58  139  449 
##   33   34   35   36   37   38   39   40 
##  331 1490  428  942  998  272  192  254

Optionnally, do some channel cleaning

# Remove unneeded channels
keep <- colnames(cfData@expr)  # all
keep <- keep[!grepl("^(Time|Event_length|Cell_length)", keep, ignore.case = TRUE)]
keep <- keep[!grepl("^(Cisplatin)", keep, ignore.case = TRUE)]
keep <- keep[!grepl("(_ADN1|_ADN2)", keep, ignore.case = TRUE)]
keep <- keep[!grepl("^(NA\\.)", keep, ignore.case = TRUE)]
keep <- keep[!grepl("^X\\d+", keep) | grepl("_", keep)]  # unannotated X channel
cfData@expr <- cfData@expr[, keep]

# Rename channels
colnames(cfData@expr) <- gsub("(^X\\d+.+?_)", "", keep)  # remove metal tag
colnames(cfData@expr) <- gsub("(\\.\\d+.$)", "", keep)  # remove metal tag

# Manual ordering of channels
first <- c("CD20", "CD19", "CD3", "CD4", "CD8", "TCRgd", "CD56", "CD16", "CD45RA", "CD95", "CD127")
first <- intersect(first, colnames(cfData@expr))

keep <- colnames(cfData@expr)  # all
cfData@expr <- cfData@expr[, c(keep[1:2], first, setdiff(keep[-(1:2)], first))]

Read meta data that group FCS samples

# To compute statistical tests, we need to define groups
# Here are the un-annotated samples
cfData@samples

##          sampleID           FCSfilename
## clean_D1 clean_D1 cytofkit_clean_D1.fcs
## clean_D2 clean_D2 cytofkit_clean_D2.fcs
## clean_D3 clean_D3 cytofkit_clean_D3.fcs
## clean_P1 clean_P1 cytofkit_clean_P1.fcs
## clean_P2 clean_P2 cytofkit_clean_P2.fcs
## clean_P3 clean_P3 cytofkit_clean_P3.fcs

# Either we fill the annotation using an Excel template
# the template consists in a first column of sample identifiers
# and a second column called status
meta <- data.frame(cfData@samples, status = "")
# The template is written on disk
if (!file.exists("meta.csv")) {
  write.csv(meta, "meta.csv")
}
# Now you can annotate the template using Excel
# and store it as CSV file keeping the original format

# Either we fill meta data programmatically
meta$status <- c(rep(c("D", "P"), each = 3))
write.csv(meta, "meta.csv")

# Fill the template using Excel or so

# Import annotation
if (file.exists("meta.csv")) {
  meta <- read.csv("meta.csv", row.names = 1)
}
# meta <- meta[match(row.names(cfData@samples), meta[,"sampleID"]),] # match sampleID
cfData@samples <- cbind.data.frame(cfData@samples, meta[, -1, drop = FALSE])
cfData@samples

##          sampleID           FCSfilename           FCSfilename status
## clean_D1 clean_D1 cytofkit_clean_D1.fcs cytofkit_clean_D1.fcs      D
## clean_D2 clean_D2 cytofkit_clean_D2.fcs cytofkit_clean_D2.fcs      D
## clean_D3 clean_D3 cytofkit_clean_D3.fcs cytofkit_clean_D3.fcs      D
## clean_P1 clean_P1 cytofkit_clean_P1.fcs cytofkit_clean_P1.fcs      P
## clean_P2 clean_P2 cytofkit_clean_P2.fcs cytofkit_clean_P2.fcs      P
## clean_P3 clean_P3 cytofkit_clean_P3.fcs cytofkit_clean_P3.fcs      P

Transform expression values, in the same way you display marker intensity

# Transform expression with asinh( x / 5 )
cfData@expr[,-(1:2)] <- asinh(cfData@expr[,-(1:2)]/5)

Works with cell counts

# Retrieve cell counts
cfData <- cellCounts(cfData)
head(cfData@counts)

##           1   2   3   4  5  6   7   8   9  10 11  12  13 14 15 16 17 18 19 20
## clean_D1  0 227  66 207 43  9 130  40  88 113  6 956 146 43 92 46  9 79  5  5
## clean_D2  0 247  55 205 41 16 138  40  85 103  4 939 167 61 84 77  8 76  1  6
## clean_D3  1 254  53 242 41 12 116  50  88 110 12 932 165 69 88 70 12 60  3  8
## clean_P1 33 687 152 628 35 50 211 160 208 238 11 517 142 14 86 13  6 44 15  6
## clean_P2 46 701 124 580 40 53 211 156 204 224 10 499 177  5 85  8  5 44  6  6
## clean_P3 27 718 163 609 56 54 221 167 241 207 11 509 146 15 92 15  4 44 14  7
##          21 22 23 24  25  26 27  28  29 30 31  32 33  34  35  36  37 38 39 40
## clean_D1 86 24 29 84 233 175 56 901 204  1 30 120 37 346 106 101 114 16 19  8
## clean_D2 95 20 17 55 187 176 73 890 205  0 40 132 42 357 114 100 111 11 20  2
## clean_D3 97 10 21 74 186 186 46 846 202  1 29 109 37 407 114 104  94 25 25  1
## clean_P1 26 25 11 93  68  28 30 474  59 14 13  30 66 129  30 212 230 83 47 76
## clean_P2 19 25 23 78  85  24 27 525  61 27 15  32 81 132  42 220 217 71 29 83
## clean_P3 29 14 22 77  65  35 23 447  67 15 12  26 68 119  22 205 232 66 52 84

# View(cfData@counts)
cytoHeatmaps(cfData, group="status", legend=TRUE)

Cell counts need to be transformed

# Store raw counts
cellCountRaw <- cfData@counts

# Transform cell counts with log2( x + floor )
cfData@counts <- log2(cfData@counts+10)
# Center
cfData@counts <- sweep(cfData@counts, 2, colMeans(cfData@counts), "-")
round(head(cfData@counts))

##           1  2  3  4 5  6 7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## clean_D1 -1 -1  0 -1 0 -1 0 -1 -1  0  0  0  0  1  0  0  0  0  0  0  1  0  0  0
## clean_D2 -1 -1 -1 -1 0 -1 0 -1 -1 -1  0  0  0  1  0  1  0  0 -1  0  1  0  0  0
## clean_D3 -1 -1 -1 -1 0 -1 0 -1 -1  0  0  0  0  1  0  1  0  0  0  0  1 -1  0  0
## clean_P1  1  1  1  1 0  1 0  1  1  1  0  0  0 -1  0 -1  0  0  1  0 -1  0 -1  0
## clean_P2  1  1  0  1 0  1 0  1  1  0  0  0  0 -1  0 -1  0  0  0  0 -1  0  0  0
## clean_P3  1  1  1  1 0  1 0  1  1  0  0  0  0 -1  0 -1  0  0  1  0 -1  0  0  0
##          25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## clean_D1  1  1  0  0  1 -1  0  1  0  1  1 -1  0 -1  0 -1
## clean_D2  1  1  1  0  1 -1  1  1  0  1  1 -1  0 -1  0 -2
## clean_D3  1  1  0  0  1 -1  0  1  0  1  1  0 -1  0  0 -2
## clean_P1 -1 -1  0  0 -1  0  0 -1  0 -1 -1  0  1  1  1  1
## clean_P2  0 -1  0  0 -1  1  0 -1  1 -1  0  1  0  1  0  1
## clean_P3 -1 -1 -1 -1 -1  1 -1 -1  0 -1 -1  0  1  1  1  1

cytoHeatmaps(cfData, group="status", legend=TRUE)

Let’s take status D as reference

# Center to D
cfData@counts <- sweep(cfData@counts, 2, colMeans(cfData@counts[cfData@samples$status == "D",]), "-")
round(head(cfData@counts)*10)

##           1  2  3  4  5  6  7  8  9 10 11 12 13  14 15  16 17 18 19 20  21 22
## clean_D1  0 -1  2 -1  0 -2  0 -1  0  1 -1  0 -1  -3  1  -4  0  1  2 -1  -1  3
## clean_D2  0  0 -1 -1  0  2  1 -1  0 -1 -3  0  1   1 -1   3 -1  1 -2  0   0  1
## clean_D3  1  1 -1  1  0  0 -1  2  0  0  4  0  0   2  0   1  2 -2  0  1   1 -5
## clean_P1 21 15 13 15 -2 14  7 17 12 11  3 -9 -2 -15  0 -17 -3 -6 10  0 -15  4
## clean_P2 24 15 10 14  0 15  7 16 11 10  2 -9  1 -22  0 -20 -4 -6  3  0 -18  4
## clean_P3 18 15 14 14  4 15  7 17 14  9  3 -9 -1 -14  1 -15 -5 -6  9  1 -14 -2
##          23 24  25  26  27  28  29 30  31  32 33  34  35 36 37 38 39 40
## clean_D1  3  2   2   0   0   0   0  0  -1   0  0  -1  -1  0  1  0 -1  4
## clean_D2 -2 -3  -1   0   3   0   0 -1   2   1  1   0   0  0  1 -3 -1 -2
## clean_D3  0  1  -1   1  -3  -1   0  0  -1  -1  0   1   0  0 -2  4  2 -3
## clean_P1 -6  4 -14 -23  -8  -9 -16 12  -9 -17  6 -14 -16 10 10 18  9 27
## clean_P2  0  1 -12 -25  -9  -7 -16 18  -8 -16  9 -14 -12 10 10 16  3 28
## clean_P3  0  1 -15 -21 -10 -10 -15 12 -10 -19  7 -16 -19  9 11 15 10 28

cytoHeatmaps(cfData, group="status", legend=TRUE)

Box plots

# Detailed view of counts aka percentages
cytoBoxplots(cfData, group = "status")

Functional marker histograms

# Detailed of functional markers
msiPlot(cfData, markers = c("CD8", "CD4"), byGroup='status')

Do some statistics

# Add t.test
cfData <- cytottest(cfData, group="status", adjustMethod = "fdr")
head(cfData@results)

##   clusters        mean_D     mean_P       pvalue    adjusted
## 1        1 -7.401487e-17 2.11326174 0.0044446607 0.009357180
## 2        2 -3.700969e-17 1.49591777 0.0002350696 0.001880557
## 3        3  0.000000e+00 1.19766406 0.0013465276 0.004896464
## 4        4 -3.700743e-17 1.43625178 0.0006430074 0.003462525
## 5        5 -2.312965e-18 0.03577742 0.8487521500 0.893423316
## 6        6 -3.700433e-17 1.49212389 0.0057601568 0.010344499

# Add some columns
# fold is a multiplicative coefficient applied to the reference
# fold is negative to stand for 1/fold
cfData@results <- within(cfData@results, {
  diff <- mean_P - mean_D
  fold <- sign(diff) * round(2^abs(diff), 2)
  unselected <- abs(diff) < 0.5 | pvalue > 0.05
  label <- clusters
  label[unselected] <- NA
  })

View the Volcano plot

library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following object is masked from 'package:cytofast':
## 
##     expr

library(ggrepel)
set.seed(42)

p <- ggplot(cfData@results, aes(diff, -log10(pvalue))) + geom_point() + geom_hline(yintercept = -log10(c(0.05, 1))) + geom_vline(xintercept = c(-0.5, 0 , 0.5)) + ggtitle("Volcano Plot, diff = Log2 Fold Change of percentages")
p + geom_text_repel(aes(label = label))

## Warning: Removed 11 rows containing missing values (geom_text_repel).

Alternate scale

p <- ggplot(cfData@results, aes(2^diff, -log10(pvalue))) + geom_point() + geom_hline(yintercept = -log10(c(0.05, 1))) + geom_vline(xintercept = c(1/1.5, 1 , 1.5)) + ggtitle("Volcano Plot, diff = Log2 Fold Change of percentages") + scale_x_continuous(trans = "log2")
p + geom_text_repel(aes(label = label))

## Warning: Removed 11 rows containing missing values (geom_text_repel).

and the table of counts

library(knitr)
kable(
  cbind(cfData@results[with(cfData@results, !unselected),], t(
    cellCountRaw[,with(cfData@results, !unselected)]))[,-c(1:3,6:7)]
)

	pvalue	adjusted	fold	diff	clean_D1	clean_D2	clean_D3	clean_P1	clean_P2	clean_P3
1	0.0044447	0.0093572	4.33	2.1132617	0	0	1	33	46	27
2	0.0002351	0.0018806	2.82	1.4959178	227	247	254	687	701	718
3	0.0013465	0.0048965	2.29	1.1976641	66	55	53	152	124	163
4	0.0006430	0.0034625	2.71	1.4362518	207	205	242	628	580	609
6	0.0057602	0.0103445	2.81	1.4921239	9	16	12	50	53	54
7	0.0054537	0.0103445	1.63	0.7038460	130	138	116	211	211	221
8	0.0012854	0.0048965	3.22	1.6858110	40	40	50	160	156	167
9	0.0025327	0.0066377	2.34	1.2273065	88	85	88	208	204	241
10	0.0003242	0.0021612	1.96	0.9721761	113	103	110	238	224	207
12	0.0000026	0.0001030	-1.84	-0.8776517	956	939	932	517	499	509
14	0.0059481	0.0103445	-3.21	-1.6819130	43	61	69	14	5	15
16	0.0027074	0.0066377	-3.35	-1.7449611	46	77	70	13	8	15
18	0.0321336	0.0428447	-1.50	-0.5888729	79	76	60	44	44	44
21	0.0028210	0.0066377	-2.98	-1.5757889	86	95	97	26	19	29
25	0.0007619	0.0034625	-2.57	-1.3591659	233	187	186	68	85	65
26	0.0017456	0.0058185	-4.88	-2.2860022	175	176	186	28	24	35
27	0.0182757	0.0261081	-1.85	-0.8836710	56	73	46	30	27	23
28	0.0021898	0.0066377	-1.81	-0.8560822	901	890	846	474	525	447
29	0.0007791	0.0034625	-2.96	-1.5641741	204	205	202	59	61	67
30	0.0157821	0.0233808	2.64	1.3991602	1	0	1	14	27	15
31	0.0071710	0.0119516	-1.83	-0.8747789	30	40	29	13	15	12
32	0.0000683	0.0009111	-3.31	-1.7275874	120	132	109	30	32	26
33	0.0031106	0.0069124	1.67	0.7438357	37	42	37	66	81	68
34	0.0002145	0.0018806	-2.78	-1.4731193	346	357	407	129	132	119
35	0.0142354	0.0219006	-2.99	-1.5813353	106	114	114	30	42	22
36	0.0000561	0.0009111	1.99	0.9931444	101	100	104	212	220	205
37	0.0026399	0.0066377	2.04	1.0262677	114	111	94	230	217	232
38	0.0087517	0.0140028	3.11	1.6349654	16	11	25	83	71	66
40	0.0049719	0.0099439	6.81	2.7685644	8	2	1	76	83	84

JT AFC 05/02/2020 - cytofkit / cytofast

S Granjeaud - A Meghraoui

05/02/2020

Project definition

FCS Overview

cytofkit analysis

Cytofast post-analysis