Parameters
# Path to the directory
in_path <- "input"
out_path <-"extract"
#
clustCols <- c("Profiling", "Assignment")
intensity_feat <- c("mean", "median")
intensity_func <- list(
"mean" = base::mean,
"median" = stats::median
)
markers_to_ignore <- c("Time", "Event_length")
helpers
if ("tictoc" %in% rownames(installed.packages())) {
library(tictoc)
} else {
toc <- tic <- function(...) return()
}
Read annotations
stopifnot(dir.exists(in_path))
stopifnot(dir.exists(file.path(in_path, "fcs")))
if (!dir.exists(out_path)) dir.create(out_path)
# get fname files
list_fcs <- list.files(path = file.path(in_path, "fcs"), pattern = "\\.fcs$")
stopifnot(length(list_fcs) > 0)
knitr::kable(data.frame("fname_files" = list_fcs[1:9]))
160406_EHA001_1758_1_Patients_Acute.fcs.astrolabe.fcs |
160406_EHA001_1758_1_Patients_Conv.fcs.astrolabe.fcs |
160406_EHA001_1760_1_Patients_Acute.fcs.astrolabe.fcs |
160406_EHA001_1760_1_Patients_Conv.fcs.astrolabe.fcs |
160406_EHA001_1773_1_Patients_Acute.fcs.astrolabe.fcs |
160406_EHA001_1773_1_Patients_Conv.fcs.astrolabe.fcs |
160406_EHA001_1785_1_Patients_Acute.fcs.astrolabe.fcs |
160406_EHA001_1785_1_Patients_Conv.fcs.astrolabe.fcs |
160406_EHA001_1790_1_Patients_Acute.fcs.astrolabe.fcs |
# load annotation
tmp_path <- file.path(in_path, "attachments")
stopifnot(dir.exists(tmp_path))
cluster_id2name <- list()
for (clustCol in clustCols) {
cluster_id2name[[clustCol]] <- read.csv(file.path(tmp_path, paste0(clustCol, ".csv")))
}
for (clustCol in clustCols) {
print(knitr::kable(cluster_id2name[[clustCol]][1:9,]))
}
##
##
## | Value|CellSubset |
## |-----:|:----------------------------------------|
## | 1|B Cell (CD27-) CXCR5hi CD1chi |
## | 2|B Cell (CD27-) CXCR5hi CD1clo CCR6hi |
## | 3|B Cell (CD27-) CXCR5hi CD1clo CCR6lo |
## | 4|B Cell (CD27-) CXCR5lo |
## | 5|B Cell (Memory) |
## | 6|B Cell (Plasmablast) |
## | 7|B Cell_unassigned CD11chi |
## | 8|B Cell_unassigned CD11clo CXCR5hi CD1chi |
## | 9|B Cell_unassigned CD11clo CXCR5hi CD1clo |
##
##
## | Value|CellSubset |
## |-----:|:-----------------------------|
## | 1|B Cell (CD27-) |
## | 2|B Cell (Memory) |
## | 3|B Cell (Plasmablast) |
## | 4|B Cell_unassigned |
## | 5|Basophil |
## | 6|CD4+ CD8+ T Cell |
## | 7|CD4+ T Cell (Central Memory) |
## | 8|CD4+ T Cell (Effector Memory) |
## | 9|CD4+ T Cell (EMRA) |
rm(tmp_path)
Parse FCS files
fcs_stat <- list()
fcs_length<- list()
# list("MFI"= NULL, "counts" = NULL)
# clustCol = "Profiling"
# fcs_stat[[clustCol]] <- list("MFI" = NULL, "abundance" = NULL)
tic("Parsing FCS")
for (fname in list_fcs) {
# fname = list_fcs[1]
cat(fname, "\n")
# read fname
fcs <- read.FCS(file.path(in_path, "fcs", fname),
transformation = FALSE, truncate_max_range = FALSE)
exprs <- exprs(fcs)
fcs_length[[fname]] <- nrow(fcs)
for (clustCol in clustCols) {
# Extract features from intensity
for (fct_ in intensity_feat) {
func <- intensity_func[[fct_]]
value_feat <- aggregate(exprs, list(cluster = exprs[,clustCol]), func)
value_feat$file <- fname
fcs_stat[[clustCol]][["MFI"]][[fct_]][[fname]] <- value_feat
}
# Extract counts
counts <- count(exprs[,clustCol])
counts$file <- fname
colnames(counts) <- c("cluster", "counts", "file")
counts$freq_by_patients <- counts$counts / nrow(exprs)
fcs_stat[[clustCol]][["abundance"]][[fname]] <- counts
}
}
## 160406_EHA001_1758_1_Patients_Acute.fcs.astrolabe.fcs
## 160406_EHA001_1758_1_Patients_Conv.fcs.astrolabe.fcs
## 160406_EHA001_1760_1_Patients_Acute.fcs.astrolabe.fcs
## 160406_EHA001_1760_1_Patients_Conv.fcs.astrolabe.fcs
## 160406_EHA001_1773_1_Patients_Acute.fcs.astrolabe.fcs
## 160406_EHA001_1773_1_Patients_Conv.fcs.astrolabe.fcs
## 160406_EHA001_1785_1_Patients_Acute.fcs.astrolabe.fcs
## 160406_EHA001_1785_1_Patients_Conv.fcs.astrolabe.fcs
## 160406_EHA001_1790_1_Patients_Acute.fcs.astrolabe.fcs
## 160406_EHA001_1790_1_Patients_Conv.fcs.astrolabe.fcs
## 160406_EHA001_1793_1_Patients_Acute.fcs.astrolabe.fcs
## 160406_EHA001_1793_1_Patients_Conv.fcs.astrolabe.fcs
## 160406_EHA001_1794_1_Patients_Acute.fcs.astrolabe.fcs
## 160406_EHA001_1794_1_Patients_Conv.fcs.astrolabe.fcs
## 160406_EHA001_1800_1_Patients_Acute.fcs.astrolabe.fcs
## 160406_EHA001_1800_1_Patients_Conv.fcs.astrolabe.fcs
## 160406_EHA001_1802_1_Patients_Acute.fcs.astrolabe.fcs
## 160406_EHA001_1802_1_Patients_Conv.fcs.astrolabe.fcs
## 160406_EHA001_1822_1_Patients_Acute.fcs.astrolabe.fcs
## 160406_EHA001_1822_1_Patients_Conv.fcs.astrolabe.fcs
## 160406_EHA001_1823_1_Patients_Acute.fcs.astrolabe.fcs
## 160406_EHA001_1823_1_Patients_Conv.fcs.astrolabe.fcs
## 160406_EHA001_1824_1_Patients_Acute.fcs.astrolabe.fcs
## 160406_EHA001_1824_1_Patients_Conv.fcs.astrolabe.fcs
## 160406_EHA001_1857_1_Patients_Acute.fcs.astrolabe.fcs
## 160406_EHA001_1857_1_Patients_Conv.fcs.astrolabe.fcs
## 160407_EHA001_1828_1_Patients_Acute.fcs.astrolabe.fcs
## 160407_EHA001_1828_1_Patients_Conv.fcs.astrolabe.fcs
## 160407_EHA001_1829_1_Patients_Acute.fcs.astrolabe.fcs
## 160407_EHA001_1829_1_Patients_Conv.fcs.astrolabe.fcs
## 160407_EHA001_1838_1_Patients_Acute.fcs.astrolabe.fcs
## 160407_EHA001_1838_1_Patients_Conv.fcs.astrolabe.fcs
## 160407_EHA001_1839_1_Patients_Acute.fcs.astrolabe.fcs
## 160407_EHA001_1839_1_Patients_Conv.fcs.astrolabe.fcs
## 160407_EHA001_1842_1_Patients_Acute.fcs.astrolabe.fcs
## 160407_EHA001_1842_1_Patients_Conv.fcs.astrolabe.fcs
## 160407_EHA001_1844_1_Patients_Acute.fcs.astrolabe.fcs
## 160407_EHA001_1844_1_Patients_Conv.fcs.astrolabe.fcs
## 160407_EHA001_1847_1_Patients_Acute.fcs.astrolabe.fcs
## 160407_EHA001_1847_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1862_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1862_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1863_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1863_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1864_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1864_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1878_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1878_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1879_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1879_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1880_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1880_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1882_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1882_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1885_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1885_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1886_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1886_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1889_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1889_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1890_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1890_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1891_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1891_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1897_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1897_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1910_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1910_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1912_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1912_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1914_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1914_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1918_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1918_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1920_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1920_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1924_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1924_1_Patients_Conv.fcs.astrolabe.fcs
## 160408_EHA001_1937_1_Patients_Acute.fcs.astrolabe.fcs
## 160408_EHA001_1937_1_Patients_Conv.fcs.astrolabe.fcs
## 160411_EHA001_1925_1_Patients_Acute.fcs.astrolabe.fcs
## 160411_EHA001_1925_1_Patients_Conv.fcs.astrolabe.fcs
## 160411_EHA001_1938_1_Patients_Acute.fcs.astrolabe.fcs
## 160411_EHA001_1938_1_Patients_Conv.fcs.astrolabe.fcs
## 160411_EHA001_1948_1_Patients_Acute.fcs.astrolabe.fcs
## 160411_EHA001_1948_1_Patients_Conv.fcs.astrolabe.fcs
toc()
## Parsing FCS: 261.54 sec elapsed
MFI workbooks
for (clustCol in clustCols) {
for (fct_ in intensity_feat) {
# fct_ = "median"
# Put MFI in shape
MFI <- do.call("rbind", fcs_stat[[clustCol]][["MFI"]][[fct_]])
markers <- setdiff(colnames(MFI), c("cluster", "file", markers_to_ignore, clustCols))
list_MFI_marker <- list()
cluster_ids_def <- list()
for (mrk in markers) {
MFI_marker <- MFI[,c(mrk, "file", "cluster")]
MFI_marker <- dcast(MFI_marker, file ~ cluster, value.var = mrk)
rownames(MFI_marker) <- MFI_marker$file
MFI_marker <- MFI_marker[, setdiff(colnames(MFI_marker), "file")]
list_MFI_marker[[clustCol]][[mrk]] <- MFI_marker
present_clusters <- colnames(MFI_marker)
cluster_ids_def[[clustCol]][[mrk]] <- t(cluster_id2name[[clustCol]][present_clusters,])[2,]
}
# save to xlxs
wb <- createWorkbook()
for (mrk in names(list_MFI_marker[[clustCol]])){
addWorksheet(wb, mrk)
ids_clust <- t(cluster_ids_def[[clustCol]][[mrk]])
rownames(ids_clust)<- "CellSubset"
writeData(wb, ids_clust, sheet = mrk, rowNames = T, colNames = T)
writeData(wb, list_MFI_marker[[clustCol]][[mrk]], sheet = mrk, rowNames = T, colNames = F, startRow =3)
}
saveWorkbook(wb, file.path(out_path, paste0("mfi_",clustCol,"_",fct_,".xlsx")), TRUE)
}
}
Counts workbooks
for (clustCol in clustCols) {
# clustCol = "Assignment"
# Put abundance in shape
counts <- do.call("rbind", fcs_stat[[clustCol]][["abundance"]])
counts$cluster <- as.factor(counts$cluster)
total_counts_per_cluster <- tapply(counts$counts, counts$cluster, sum)
counts$freq_per_clusters <- counts$counts / total_counts_per_cluster[as.character(counts$cluster)]
counts_wide <- dcast(counts, cluster ~ file, value.var = "counts")
freqs_patients_wide <- dcast(counts, cluster ~ file, value.var = "freq_by_patients")
freqs_clusters_wide <- dcast(counts, cluster ~ file, value.var = "freq_per_clusters")
}
Abundance DB
abundance <- list()
for (clustCol in clustCols) {
# clustCol = "Assignment"
# all-in-one
counts <- do.call("rbind", fcs_stat[[clustCol]][["abundance"]])
counts$cluster <- as.factor(counts$cluster)
# freq per cluster
total_counts_per_cluster <- tapply(counts$counts, counts$cluster, sum)
counts$freq_per_clusters <- counts$counts /
total_counts_per_cluster[as.character(counts$cluster)]
# extract meta data
abundance[[clustCol]] <- counts |>
dplyr::select(
cluster, file, counts, freq_by_patients, freq_per_clusters) |>
tidyr::extract(
file, into = c("patient", "condition"),
regex = "\\w+_(\\d+)_\\d+_Patients_(\\w+)\\.fcs\\..+", remove = F)
# write xlxs
wb2 <- createWorkbook()
shn <- paste0("abundance_", clustCol)
addWorksheet(wb2, sheet = shn)
writeData(wb2, abundance[[clustCol]], sheet = shn)
saveWorkbook(wb2, file.path(out_path, paste0(shn, "_db.xlsx")), TRUE)
}
### ncells by fcs
tibble::enframe(fcs_length, name = "file", value = "n_cells") |>
tidyr::extract(
file, into = c("patient", "condition"),
regex = "\\w+_(\\d+)_\\d+_Patients_(\\w+)\\.fcs\\..+", remove =F) -> number_of_cells_fcs
write.xlsx(number_of_cells_fcs, file.path(out_path, "number_of_cells_fcs.xlsx"))
MFI DB
### Profiling MFI
for (clustCol in clustCols) {
Profiling_median_l <- list()
for (fct_ in intensity_feat) {
Profiling_median_l[[fct_]] <-
do.call("rbind", fcs_stat[[clustCol]][["MFI"]][[fct_]])[c(markers, "cluster", "file")] |>
pivot_longer(cols = all_of(markers), names_to = "marker", values_to = paste0(fct_, "_mfi"))
}
MFI_Profiling_db <- Reduce(merge, Profiling_median_l) |>
tidyr::extract(
file, into = c("patient", "condition"),
regex = "\\w+_(\\d+)_\\d+_Patients_(\\w+)\\.fcs\\..+", remove =F)
write.xlsx(MFI_Profiling_db, file.path(out_path, paste0("mfi_", clustCol, "_db.xlsx")))
}
Script session
Session info
## R version 4.4.2 (2024-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=French_France.utf8 LC_CTYPE=French_France.utf8
## [3] LC_MONETARY=French_France.utf8 LC_NUMERIC=C
## [5] LC_TIME=French_France.utf8
##
## time zone: Europe/Paris
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tictoc_1.2.1 tidyr_1.3.1 openxlsx_4.2.7.1 reshape2_1.4.4
## [5] plyr_1.8.9 flowCore_2.17.1
##
## loaded via a namespace (and not attached):
## [1] jsonlite_1.8.9 dplyr_1.1.4 compiler_4.4.2
## [4] tidyselect_1.2.1 Rcpp_1.0.13-1 zip_2.3.1
## [7] Biobase_2.66.0 cytolib_2.18.1 stringr_1.5.1
## [10] jquerylib_0.1.4 yaml_2.3.10 fastmap_1.2.0
## [13] R6_2.5.1 RProtoBufLib_2.18.0 generics_0.1.3
## [16] knitr_1.49 BiocGenerics_0.52.0 tibble_3.2.1
## [19] bslib_0.8.0 pillar_1.10.1 rlang_1.1.4
## [22] cachem_1.1.0 stringi_1.8.4 xfun_0.50
## [25] sass_0.4.9 cli_3.6.3 withr_3.0.2
## [28] magrittr_2.0.3 digest_0.6.37 rstudioapi_0.17.1
## [31] lifecycle_1.0.4 S4Vectors_0.44.0 vctrs_0.6.5
## [34] evaluate_1.0.3 glue_1.8.0 stats4_4.4.2
## [37] purrr_1.0.2 rmarkdown_2.29 matrixStats_1.5.0
## [40] tools_4.4.2 pkgconfig_2.0.3 htmltools_0.5.8.1