02_prepare_from

Cell counts matrix
Median Fluorescence Intensity matrix
Median Fluorescence Intensity matrix per sample
Script session

Cell counts matrix

# ==== COUNTS TABLE ============================================================

# read database of counts
count_long = readxl::read_excel("output/counts_Assignment_db.xlsx")
count_long$cluster = as.character(count_long$cluster)


# ==== SAMPLE TABLE ============================================================

# create sample database
sample_data = data.frame(file = unique(count_long$file))

# create a simplified sample id
sample_data$sample_id = sub(".fcs.astrolabe.fcs", "", sample_data$file, fixed = TRUE)
sample_data$sample_id = sub("_EHA001", "", sample_data$sample_id, fixed = TRUE)
sample_data$sample_id = sub("_1_Patients", "", sample_data$sample_id, fixed = TRUE)
# set it as key index of rows
rownames(sample_data) = sample_data$sample_id

# extract batch, patient id, condition from sample id
sample_var = Reduce(rbind.data.frame, strsplit(sample_data$sample_id, "_"))
colnames(sample_var) = c("batch", "patient", "condition")
# append these columns
sample_data = cbind(sample_data, sample_var)

# order samples
order_sample = with(sample_data, order(condition, batch, patient))


# ==== CLUSTER TABLE ===========================================================

# read cluster database
cluster_long = read.csv("input/attachments/Assignment.csv")
cluster_long$Value = as.character(cluster_long$Value)


# ==== MATRIX OF COUNTS ========================================================

# dimension of the matrix of counts
col_names = sample_data$file[order_sample]
row_names = cluster_long$Value

# put extracted counts into a matrix
counts = matrix(0, nrow = length(row_names), ncol = length(col_names),
                dimnames = list(row_names, col_names))
counts[cbind(count_long$cluster, count_long$file)] = count_long$counts

# assign comprehensive names
colnames(counts) = sample_data$sample[order_sample]
rownames(counts) = cluster_long$CellSubset


# ==== FILTER OUT LOW COUNTS ===================================================

# This an optional step that simplifies the Excel processing

# require that a minimal total count
keep_total = rowSums(counts) > 43 * 50
# require that at most 75% of counts are zero
keep_quart = apply(counts, 1, quantile, 0.75) > 0
kept_clusters = which(keep_total & keep_quart)

# update counts table
counts = counts[ kept_clusters, ]


# ==== VIEW, SAVE ==============================================================

knitr::kable(counts[1:9, 1:5])

	160406_1758_Acute	160406_1760_Acute	160406_1773_Acute	160406_1785_Acute	160406_1790_Acute
B Cell (CD27-)	1166	9089	1626	1286	2551
B Cell (Memory)	187	685	296	608	813
B Cell (Plasmablast)	287	411	85	890	494
B Cell_unassigned	2184	8841	3530	2531	2930
Basophil	39	260	13	185	0
CD4+ CD8+ T Cell	0	4306	1324	7155	6379
CD4+ T Cell (Central Memory)	5153	6484	2680	2437	9530
CD4+ T Cell (Effector Memory)	872	320	369	1553	3427
CD4+ T Cell (EMRA)	128	174	80	447	222

counts_df = cbind.data.frame(clusters = rownames(counts), counts)
writexl::write_xlsx(
  counts_df, "output/counts_Assignment_matrix.xlsx")

save(
  counts, sample_data, order_sample, 
  file = "output/counts_Assignment.RData")

Median Fluorescence Intensity matrix

# ==== MFIS TABLE ==============================================================

# read database of mfis
mfi_long = readxl::read_excel("output/mfi_Assignment_median_db.xlsx")
mfi_long$cluster = as.character(mfi_long$cluster)


# ==== MARKER TABLE ============================================================

# load markers table and retain selected markers
mrk_long = as.data.frame(
  readxl::read_excel("output/fcs_markers.xlsx"))
# filter out useless markers (which removes NA)
mrk_long = mrk_long[ which(mrk_long$useful == 1), c("name", "desc")]

# reduce MFI table to selected markers
tbl = merge(mfi_long, mrk_long, by.x = "marker", by.y = "name")

# reduce MFI table to selected clusters
tbl = merge(
  tbl, data.frame(cluster = cluster_long$Value[kept_clusters]))


# ==== MATRIX OF MFIS ==========================================================

# dimension of the matrix of mfis
col_names = mrk_long$desc
row_names = cluster_long$Value[kept_clusters]

# extract MFI DB
res = with(tbl, aggregate(
  tbl[, "median_mfi", drop = FALSE], list(marker = desc, cluster = cluster), median))
# weighted MFI version
# append counts
tbl = merge(tbl, as.data.frame(count_long[, c("cluster", "file", "counts")]))
# split
resw = split(tbl, ~ desc + cluster)
# compute for each data chunk
resw =  lapply(resw, function(df) 
  list(marker = df$desc[1], cluster = df$cluster[1],
       median_mfi = limma::weighted.median(df$median_mfi, df$counts)))
# assemble
resw = Reduce(rbind.data.frame, resw)

# df = merge(res, resw, by = c("marker", "cluster"))
# plot(asinh(df$median_mfi.x/5), asinh(df$median_mfi.y/5), asp = 1)
# library(ggplot2)
# ggplot(df, aes(asinh(median_mfi.x/5), asinh(median_mfi.y/5))) + geom_point() +
#   facet_wrap(~marker)
# ggplot(df, aes(asinh(median_mfi.x/5), asinh(median_mfi.y/5), col = marker)) + geom_point()

# put extracted mfis into a matrix
mfis = matrix(0, nrow = length(row_names), ncol = length(col_names),
              dimnames = list(row_names, col_names))
mfis[cbind(res$cluster, res$marker)] = resw$median_mfi

# assign comprehensive names to clusters
rownames(mfis) = cluster_long$CellSubset[kept_clusters]


# ==== VIEW, SAVE ==============================================================

knitr::kable(mfis[1:9, 1:5])

	CD57	CD45	CD19	CD45RA	CD141
B Cell (CD27-)	5.4535198	126.62884	107.1281357	130.730270	3.1227607
B Cell (Memory)	6.7174575	138.85619	120.0290985	164.852600	4.9270062
B Cell (Plasmablast)	2.8377441	78.54947	38.5903625	79.405418	0.8412668
B Cell_unassigned	5.0418935	116.94312	101.4108276	114.130939	2.6247699
Basophil	0.5703478	36.11363	0.0000000	2.944027	0.0000000
CD4+ CD8+ T Cell	5.9221554	148.14133	0.3653206	20.671368	1.1517029
CD4+ T Cell (Central Memory)	4.2089591	126.08840	0.0000000	5.122553	0.5479562
CD4+ T Cell (Effector Memory)	5.6014166	139.60620	0.0000000	2.720707	0.6345142
CD4+ T Cell (EMRA)	6.4672663	112.40231	0.5579739	20.046161	0.6310581

mfis_df = cbind.data.frame(clusters = rownames(mfis), mfis)
writexl::write_xlsx(
  mfis_df, "output/mfis_Assignment_matrix.xlsx")

save(mfis, sample_data, order_sample, file = "output/mfis_Assignment.RData")

Median Fluorescence Intensity matrix per sample

# ==== MATRIX OF MFIS PER SAMPLE ===============================================

# extract MFI DB
res = tbl
res$key = sprintf("%s $$ %s", res$desc, res$cluster)

# dimension of the matrix of mfis
col_names = sample_data$file[order_sample]
row_names_df = expand.grid(
  cluster = cluster_long$Value[kept_clusters], desc = mrk_long$desc)
row_names = row_names_df$key = 
  sprintf("%s $$ %s", row_names_df$desc, row_names_df$cluster)

# put extracted mfis into a matrix
mfis_samples = matrix(
  0, nrow = length(row_names), ncol = length(col_names),
  dimnames = list(row_names, col_names))
mfis_samples[cbind(res$key, res$file)] = res$median_mfi

row_names_df$key_desc = sprintf(
  "%s - %s", row_names_df$desc, cluster_long$CellSubset[row_names_df$cluster])

# assign comprehensive names
colnames(mfis_samples) = sample_data$sample[order_sample]
rownames(mfis_samples) = row_names_df$key_desc


# ==== VIEW, SAVE ==============================================================

knitr::kable(mfis_samples[1:9, 1:5])

	160406_1758_Acute	160406_1760_Acute	160406_1773_Acute	160406_1785_Acute	160406_1790_Acute
CD57 - B Cell (CD27-)	2.493280	3.2060740	4.111000	2.464065	3.588324
CD57 - B Cell (Memory)	3.688186	4.6267438	5.930459	3.048475	3.798380
CD57 - B Cell (Plasmablast)	1.913485	1.8589548	1.783925	1.647928	1.613323
CD57 - B Cell_unassigned	1.893067	2.3921435	3.088092	1.898907	2.689122
CD57 - Basophil	0.000000	0.0621516	2.546956	0.000000	0.000000
CD57 - CD4+ CD8+ T Cell	0.000000	2.6537449	3.119385	1.494454	3.703859
CD57 - CD4+ T Cell (Central Memory)	1.411105	1.8586383	1.728350	1.115001	2.743009
CD57 - CD4+ T Cell (Effector Memory)	1.787999	1.2110798	3.414195	1.413289	3.924535
CD57 - CD4+ T Cell (EMRA)	1.725193	1.3881128	147.955566	1.970000	198.599983

mfis_samples_df = cbind.data.frame(
  "marker - cluster" = rownames(mfis_samples), mfis_samples)
writexl::write_xlsx(
  as.data.frame(mfis_samples_df), "output/mfis_samples_Assignment_matrix.xlsx")

save(mfis_samples_df, sample_data, order_sample, 
     file = "output/mfis_samples_Assignment.RData")

Script session

Session info

## R version 4.4.2 (2024-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=French_France.utf8  LC_CTYPE=French_France.utf8   
## [3] LC_MONETARY=French_France.utf8 LC_NUMERIC=C                  
## [5] LC_TIME=French_France.utf8    
## 
## time zone: Europe/Paris
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] vctrs_0.6.5       cli_3.6.3         knitr_1.49        rlang_1.1.4      
##  [5] xfun_0.50         jsonlite_1.8.9    glue_1.8.0        statmod_1.5.0    
##  [9] htmltools_0.5.8.1 sass_0.4.9        readxl_1.4.3      writexl_1.5.1    
## [13] rmarkdown_2.29    cellranger_1.1.0  evaluate_1.0.3    jquerylib_0.1.4  
## [17] tibble_3.2.1      fastmap_1.2.0     yaml_2.3.10       lifecycle_1.0.4  
## [21] compiler_4.4.2    pkgconfig_2.0.3   limma_3.62.2      rstudioapi_0.17.1
## [25] digest_0.6.37     R6_2.5.1          pillar_1.10.1     magrittr_2.0.3   
## [29] bslib_0.8.0       tools_4.4.2       cachem_1.1.0

02_prepare_from_fcs

2025-01-30

Cell counts matrix

Median Fluorescence Intensity matrix

Median Fluorescence Intensity matrix per sample

Script session