Cell counts matrix
# ==== COUNTS TABLE ============================================================
# read database of counts
count_long = readxl::read_excel("output/counts_Assignment_db.xlsx")
count_long$cluster = as.character(count_long$cluster)
# ==== SAMPLE TABLE ============================================================
# create sample database
sample_data = data.frame(file = unique(count_long$file))
# create a simplified sample id
sample_data$sample_id = sub(".fcs.astrolabe.fcs", "", sample_data$file, fixed = TRUE)
sample_data$sample_id = sub("_EHA001", "", sample_data$sample_id, fixed = TRUE)
sample_data$sample_id = sub("_1_Patients", "", sample_data$sample_id, fixed = TRUE)
# set it as key index of rows
rownames(sample_data) = sample_data$sample_id
# extract batch, patient id, condition from sample id
sample_var = Reduce(rbind.data.frame, strsplit(sample_data$sample_id, "_"))
colnames(sample_var) = c("batch", "patient", "condition")
# append these columns
sample_data = cbind(sample_data, sample_var)
# order samples
order_sample = with(sample_data, order(condition, batch, patient))
# ==== CLUSTER TABLE ===========================================================
# read cluster database
cluster_long = read.csv("input/attachments/Assignment.csv")
cluster_long$Value = as.character(cluster_long$Value)
# ==== MATRIX OF COUNTS ========================================================
# dimension of the matrix of counts
col_names = sample_data$file[order_sample]
row_names = cluster_long$Value
# put extracted counts into a matrix
counts = matrix(0, nrow = length(row_names), ncol = length(col_names),
dimnames = list(row_names, col_names))
counts[cbind(count_long$cluster, count_long$file)] = count_long$counts
# assign comprehensive names
colnames(counts) = sample_data$sample[order_sample]
rownames(counts) = cluster_long$CellSubset
# ==== FILTER OUT LOW COUNTS ===================================================
# This an optional step that simplifies the Excel processing
# require that a minimal total count
keep_total = rowSums(counts) > 43 * 50
# require that at most 75% of counts are zero
keep_quart = apply(counts, 1, quantile, 0.75) > 0
kept_clusters = which(keep_total & keep_quart)
# update counts table
counts = counts[ kept_clusters, ]
# ==== VIEW, SAVE ==============================================================
knitr::kable(counts[1:9, 1:5])
B Cell (CD27-) |
1166 |
9089 |
1626 |
1286 |
2551 |
B Cell (Memory) |
187 |
685 |
296 |
608 |
813 |
B Cell (Plasmablast) |
287 |
411 |
85 |
890 |
494 |
B Cell_unassigned |
2184 |
8841 |
3530 |
2531 |
2930 |
Basophil |
39 |
260 |
13 |
185 |
0 |
CD4+ CD8+ T Cell |
0 |
4306 |
1324 |
7155 |
6379 |
CD4+ T Cell (Central Memory) |
5153 |
6484 |
2680 |
2437 |
9530 |
CD4+ T Cell (Effector Memory) |
872 |
320 |
369 |
1553 |
3427 |
CD4+ T Cell (EMRA) |
128 |
174 |
80 |
447 |
222 |
counts_df = cbind.data.frame(clusters = rownames(counts), counts)
writexl::write_xlsx(
counts_df, "output/counts_Assignment_matrix.xlsx")
save(
counts, sample_data, order_sample,
file = "output/counts_Assignment.RData")
Median Fluorescence Intensity matrix
# ==== MFIS TABLE ==============================================================
# read database of mfis
mfi_long = readxl::read_excel("output/mfi_Assignment_median_db.xlsx")
mfi_long$cluster = as.character(mfi_long$cluster)
# ==== MARKER TABLE ============================================================
# load markers table and retain selected markers
mrk_long = as.data.frame(
readxl::read_excel("output/fcs_markers.xlsx"))
# filter out useless markers (which removes NA)
mrk_long = mrk_long[ which(mrk_long$useful == 1), c("name", "desc")]
# reduce MFI table to selected markers
tbl = merge(mfi_long, mrk_long, by.x = "marker", by.y = "name")
# reduce MFI table to selected clusters
tbl = merge(
tbl, data.frame(cluster = cluster_long$Value[kept_clusters]))
# ==== MATRIX OF MFIS ==========================================================
# dimension of the matrix of mfis
col_names = mrk_long$desc
row_names = cluster_long$Value[kept_clusters]
# extract MFI DB
res = with(tbl, aggregate(
tbl[, "median_mfi", drop = FALSE], list(marker = desc, cluster = cluster), median))
# weighted MFI version
# append counts
tbl = merge(tbl, as.data.frame(count_long[, c("cluster", "file", "counts")]))
# split
resw = split(tbl, ~ desc + cluster)
# compute for each data chunk
resw = lapply(resw, function(df)
list(marker = df$desc[1], cluster = df$cluster[1],
median_mfi = limma::weighted.median(df$median_mfi, df$counts)))
# assemble
resw = Reduce(rbind.data.frame, resw)
# df = merge(res, resw, by = c("marker", "cluster"))
# plot(asinh(df$median_mfi.x/5), asinh(df$median_mfi.y/5), asp = 1)
# library(ggplot2)
# ggplot(df, aes(asinh(median_mfi.x/5), asinh(median_mfi.y/5))) + geom_point() +
# facet_wrap(~marker)
# ggplot(df, aes(asinh(median_mfi.x/5), asinh(median_mfi.y/5), col = marker)) + geom_point()
# put extracted mfis into a matrix
mfis = matrix(0, nrow = length(row_names), ncol = length(col_names),
dimnames = list(row_names, col_names))
mfis[cbind(res$cluster, res$marker)] = resw$median_mfi
# assign comprehensive names to clusters
rownames(mfis) = cluster_long$CellSubset[kept_clusters]
# ==== VIEW, SAVE ==============================================================
knitr::kable(mfis[1:9, 1:5])
B Cell (CD27-) |
5.4535198 |
126.62884 |
107.1281357 |
130.730270 |
3.1227607 |
B Cell (Memory) |
6.7174575 |
138.85619 |
120.0290985 |
164.852600 |
4.9270062 |
B Cell (Plasmablast) |
2.8377441 |
78.54947 |
38.5903625 |
79.405418 |
0.8412668 |
B Cell_unassigned |
5.0418935 |
116.94312 |
101.4108276 |
114.130939 |
2.6247699 |
Basophil |
0.5703478 |
36.11363 |
0.0000000 |
2.944027 |
0.0000000 |
CD4+ CD8+ T Cell |
5.9221554 |
148.14133 |
0.3653206 |
20.671368 |
1.1517029 |
CD4+ T Cell (Central Memory) |
4.2089591 |
126.08840 |
0.0000000 |
5.122553 |
0.5479562 |
CD4+ T Cell (Effector Memory) |
5.6014166 |
139.60620 |
0.0000000 |
2.720707 |
0.6345142 |
CD4+ T Cell (EMRA) |
6.4672663 |
112.40231 |
0.5579739 |
20.046161 |
0.6310581 |
mfis_df = cbind.data.frame(clusters = rownames(mfis), mfis)
writexl::write_xlsx(
mfis_df, "output/mfis_Assignment_matrix.xlsx")
save(mfis, sample_data, order_sample, file = "output/mfis_Assignment.RData")
Script session
Session info
## R version 4.4.2 (2024-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=French_France.utf8 LC_CTYPE=French_France.utf8
## [3] LC_MONETARY=French_France.utf8 LC_NUMERIC=C
## [5] LC_TIME=French_France.utf8
##
## time zone: Europe/Paris
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] vctrs_0.6.5 cli_3.6.3 knitr_1.49 rlang_1.1.4
## [5] xfun_0.50 jsonlite_1.8.9 glue_1.8.0 statmod_1.5.0
## [9] htmltools_0.5.8.1 sass_0.4.9 readxl_1.4.3 writexl_1.5.1
## [13] rmarkdown_2.29 cellranger_1.1.0 evaluate_1.0.3 jquerylib_0.1.4
## [17] tibble_3.2.1 fastmap_1.2.0 yaml_2.3.10 lifecycle_1.0.4
## [21] compiler_4.4.2 pkgconfig_2.0.3 limma_3.62.2 rstudioapi_0.17.1
## [25] digest_0.6.37 R6_2.5.1 pillar_1.10.1 magrittr_2.0.3
## [29] bslib_0.8.0 tools_4.4.2 cachem_1.1.0