01_extract_from_fcs_to

Parameters
helpers
Read annotations
Parse FCS files
MFI workbooks
Counts workbooks
Abundance DB
MFI DB
Script session

Parameters

# Path to the directory
in_path <- "input"
out_path <-"extract"
# 
clustCols <- c("Profiling", "Assignment")
intensity_feat <- c("mean", "median")
intensity_func <- list(
  "mean" = base::mean,
  "median" = stats::median
)
markers_to_ignore <- c("Time", "Event_length")

helpers

if ("tictoc" %in% rownames(installed.packages())) {
  library(tictoc)
} else {
  toc <- tic <- function(...) return()
}

Read annotations

stopifnot(dir.exists(in_path))
stopifnot(dir.exists(file.path(in_path, "fcs")))
if (!dir.exists(out_path)) dir.create(out_path)
# get fname files
list_fcs <- list.files(path = file.path(in_path, "fcs"), pattern = "\\.fcs$")
stopifnot(length(list_fcs) > 0)
knitr::kable(data.frame("fname_files" = list_fcs[1:9]))

fname_files
160406_EHA001_1758_1_Patients_Acute.fcs.astrolabe.fcs
160406_EHA001_1758_1_Patients_Conv.fcs.astrolabe.fcs
160406_EHA001_1760_1_Patients_Acute.fcs.astrolabe.fcs
160406_EHA001_1760_1_Patients_Conv.fcs.astrolabe.fcs
160406_EHA001_1773_1_Patients_Acute.fcs.astrolabe.fcs
160406_EHA001_1773_1_Patients_Conv.fcs.astrolabe.fcs
160406_EHA001_1785_1_Patients_Acute.fcs.astrolabe.fcs
160406_EHA001_1785_1_Patients_Conv.fcs.astrolabe.fcs
160406_EHA001_1790_1_Patients_Acute.fcs.astrolabe.fcs

# load annotation
tmp_path <- file.path(in_path, "attachments")
stopifnot(dir.exists(tmp_path))
cluster_id2name <- list()
for (clustCol in clustCols) {
  cluster_id2name[[clustCol]] <- read.csv(file.path(tmp_path, paste0(clustCol, ".csv")))
}
for (clustCol in clustCols) {
  print(knitr::kable(cluster_id2name[[clustCol]][1:9,]))
}

## 
## 
## | Value|CellSubset                               |
## |-----:|:----------------------------------------|
## |     1|B Cell (CD27-) CXCR5hi CD1chi            |
## |     2|B Cell (CD27-) CXCR5hi CD1clo CCR6hi     |
## |     3|B Cell (CD27-) CXCR5hi CD1clo CCR6lo     |
## |     4|B Cell (CD27-) CXCR5lo                   |
## |     5|B Cell (Memory)                          |
## |     6|B Cell (Plasmablast)                     |
## |     7|B Cell_unassigned CD11chi                |
## |     8|B Cell_unassigned CD11clo CXCR5hi CD1chi |
## |     9|B Cell_unassigned CD11clo CXCR5hi CD1clo |
## 
## 
## | Value|CellSubset                    |
## |-----:|:-----------------------------|
## |     1|B Cell (CD27-)                |
## |     2|B Cell (Memory)               |
## |     3|B Cell (Plasmablast)          |
## |     4|B Cell_unassigned             |
## |     5|Basophil                      |
## |     6|CD4+ CD8+ T Cell              |
## |     7|CD4+ T Cell (Central Memory)  |
## |     8|CD4+ T Cell (Effector Memory) |
## |     9|CD4+ T Cell (EMRA)            |

rm(tmp_path)

Parse FCS files

fcs_stat <- list()
fcs_length<- list()
# list("MFI"= NULL, "counts" = NULL)

# clustCol = "Profiling"
# fcs_stat[[clustCol]] <- list("MFI" = NULL, "abundance" = NULL)
tic("Parsing FCS")
for (fname in list_fcs) {
  
  # fname = list_fcs[1]
  cat(fname, "\n")
  # read fname
  fcs <- read.FCS(file.path(in_path, "fcs", fname), 
                  transformation = FALSE, truncate_max_range = FALSE)
  exprs <- exprs(fcs)
  fcs_length[[fname]] <- nrow(fcs)
  
  for (clustCol in clustCols) {
    
    # Extract features from intensity
    for (fct_ in intensity_feat) {
      func <- intensity_func[[fct_]]
      value_feat <- aggregate(exprs, list(cluster = exprs[,clustCol]), func)
      value_feat$file <- fname
      fcs_stat[[clustCol]][["MFI"]][[fct_]][[fname]] <- value_feat
    }
    
    # Extract counts
    counts <- count(exprs[,clustCol])
    counts$file <- fname
    colnames(counts) <- c("cluster", "counts", "file")
    
    counts$freq_by_patients <- counts$counts / nrow(exprs)
    fcs_stat[[clustCol]][["abundance"]][[fname]] <- counts
    
  }
  
}

## 160406_EHA001_1758_1_Patients_Acute.fcs.astrolabe.fcs 
## 160406_EHA001_1758_1_Patients_Conv.fcs.astrolabe.fcs 
## 160406_EHA001_1760_1_Patients_Acute.fcs.astrolabe.fcs 
## 160406_EHA001_1760_1_Patients_Conv.fcs.astrolabe.fcs 
## 160406_EHA001_1773_1_Patients_Acute.fcs.astrolabe.fcs 
## 160406_EHA001_1773_1_Patients_Conv.fcs.astrolabe.fcs 
## 160406_EHA001_1785_1_Patients_Acute.fcs.astrolabe.fcs 
## 160406_EHA001_1785_1_Patients_Conv.fcs.astrolabe.fcs 
## 160406_EHA001_1790_1_Patients_Acute.fcs.astrolabe.fcs 
## 160406_EHA001_1790_1_Patients_Conv.fcs.astrolabe.fcs 
## 160406_EHA001_1793_1_Patients_Acute.fcs.astrolabe.fcs 
## 160406_EHA001_1793_1_Patients_Conv.fcs.astrolabe.fcs 
## 160406_EHA001_1794_1_Patients_Acute.fcs.astrolabe.fcs 
## 160406_EHA001_1794_1_Patients_Conv.fcs.astrolabe.fcs 
## 160406_EHA001_1800_1_Patients_Acute.fcs.astrolabe.fcs 
## 160406_EHA001_1800_1_Patients_Conv.fcs.astrolabe.fcs 
## 160406_EHA001_1802_1_Patients_Acute.fcs.astrolabe.fcs 
## 160406_EHA001_1802_1_Patients_Conv.fcs.astrolabe.fcs 
## 160406_EHA001_1822_1_Patients_Acute.fcs.astrolabe.fcs 
## 160406_EHA001_1822_1_Patients_Conv.fcs.astrolabe.fcs 
## 160406_EHA001_1823_1_Patients_Acute.fcs.astrolabe.fcs 
## 160406_EHA001_1823_1_Patients_Conv.fcs.astrolabe.fcs 
## 160406_EHA001_1824_1_Patients_Acute.fcs.astrolabe.fcs 
## 160406_EHA001_1824_1_Patients_Conv.fcs.astrolabe.fcs 
## 160406_EHA001_1857_1_Patients_Acute.fcs.astrolabe.fcs 
## 160406_EHA001_1857_1_Patients_Conv.fcs.astrolabe.fcs 
## 160407_EHA001_1828_1_Patients_Acute.fcs.astrolabe.fcs 
## 160407_EHA001_1828_1_Patients_Conv.fcs.astrolabe.fcs 
## 160407_EHA001_1829_1_Patients_Acute.fcs.astrolabe.fcs 
## 160407_EHA001_1829_1_Patients_Conv.fcs.astrolabe.fcs 
## 160407_EHA001_1838_1_Patients_Acute.fcs.astrolabe.fcs 
## 160407_EHA001_1838_1_Patients_Conv.fcs.astrolabe.fcs 
## 160407_EHA001_1839_1_Patients_Acute.fcs.astrolabe.fcs 
## 160407_EHA001_1839_1_Patients_Conv.fcs.astrolabe.fcs 
## 160407_EHA001_1842_1_Patients_Acute.fcs.astrolabe.fcs 
## 160407_EHA001_1842_1_Patients_Conv.fcs.astrolabe.fcs 
## 160407_EHA001_1844_1_Patients_Acute.fcs.astrolabe.fcs 
## 160407_EHA001_1844_1_Patients_Conv.fcs.astrolabe.fcs 
## 160407_EHA001_1847_1_Patients_Acute.fcs.astrolabe.fcs 
## 160407_EHA001_1847_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1862_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1862_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1863_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1863_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1864_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1864_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1878_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1878_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1879_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1879_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1880_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1880_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1882_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1882_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1885_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1885_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1886_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1886_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1889_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1889_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1890_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1890_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1891_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1891_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1897_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1897_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1910_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1910_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1912_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1912_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1914_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1914_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1918_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1918_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1920_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1920_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1924_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1924_1_Patients_Conv.fcs.astrolabe.fcs 
## 160408_EHA001_1937_1_Patients_Acute.fcs.astrolabe.fcs 
## 160408_EHA001_1937_1_Patients_Conv.fcs.astrolabe.fcs 
## 160411_EHA001_1925_1_Patients_Acute.fcs.astrolabe.fcs 
## 160411_EHA001_1925_1_Patients_Conv.fcs.astrolabe.fcs 
## 160411_EHA001_1938_1_Patients_Acute.fcs.astrolabe.fcs 
## 160411_EHA001_1938_1_Patients_Conv.fcs.astrolabe.fcs 
## 160411_EHA001_1948_1_Patients_Acute.fcs.astrolabe.fcs 
## 160411_EHA001_1948_1_Patients_Conv.fcs.astrolabe.fcs

toc()

## Parsing FCS: 261.54 sec elapsed

MFI workbooks

for (clustCol in clustCols) {

  for (fct_ in intensity_feat) {
    
    # fct_ = "median"
    # Put MFI in shape
    MFI <- do.call("rbind", fcs_stat[[clustCol]][["MFI"]][[fct_]])
    markers <- setdiff(colnames(MFI), c("cluster", "file", markers_to_ignore, clustCols))
    
    list_MFI_marker <- list()
    cluster_ids_def <- list()
    for (mrk in markers) {
      MFI_marker <- MFI[,c(mrk, "file", "cluster")]
      MFI_marker <- dcast(MFI_marker, file ~ cluster, value.var = mrk)
      rownames(MFI_marker) <- MFI_marker$file
      MFI_marker <- MFI_marker[, setdiff(colnames(MFI_marker), "file")]
      list_MFI_marker[[clustCol]][[mrk]] <- MFI_marker
      
      present_clusters <- colnames(MFI_marker)
      cluster_ids_def[[clustCol]][[mrk]] <- t(cluster_id2name[[clustCol]][present_clusters,])[2,] 
    }

    # save to xlxs
    wb <- createWorkbook()
    for (mrk in names(list_MFI_marker[[clustCol]])){
      addWorksheet(wb, mrk)
      ids_clust <- t(cluster_ids_def[[clustCol]][[mrk]])
      rownames(ids_clust)<- "CellSubset"
      writeData(wb, ids_clust, sheet = mrk, rowNames = T, colNames = T)
      writeData(wb, list_MFI_marker[[clustCol]][[mrk]], sheet = mrk, rowNames = T, colNames = F, startRow =3)
    }
    saveWorkbook(wb, file.path(out_path, paste0("mfi_",clustCol,"_",fct_,".xlsx")), TRUE)
    
  }
  
}

Counts workbooks

for (clustCol in clustCols) {

  # clustCol = "Assignment"
  # Put abundance in shape
  counts <- do.call("rbind", fcs_stat[[clustCol]][["abundance"]])
  counts$cluster <- as.factor(counts$cluster)

  total_counts_per_cluster <- tapply(counts$counts, counts$cluster, sum)
  counts$freq_per_clusters <- counts$counts / total_counts_per_cluster[as.character(counts$cluster)]
  
  counts_wide <- dcast(counts, cluster ~ file, value.var = "counts")
  freqs_patients_wide <- dcast(counts, cluster ~ file, value.var = "freq_by_patients")
  freqs_clusters_wide <- dcast(counts, cluster ~ file, value.var = "freq_per_clusters")
}

Abundance DB

abundance <- list()

for (clustCol in clustCols) {

  # clustCol = "Assignment"
  # all-in-one
  counts <- do.call("rbind", fcs_stat[[clustCol]][["abundance"]])
  counts$cluster <- as.factor(counts$cluster)

  # freq per cluster
  total_counts_per_cluster <- tapply(counts$counts, counts$cluster, sum)
  counts$freq_per_clusters <- counts$counts / 
    total_counts_per_cluster[as.character(counts$cluster)]
  
  # extract meta data
  abundance[[clustCol]] <- counts |>
    dplyr::select(
      cluster, file, counts, freq_by_patients, freq_per_clusters) |>
    tidyr::extract(
      file, into = c("patient", "condition"),
      regex = "\\w+_(\\d+)_\\d+_Patients_(\\w+)\\.fcs\\..+", remove = F)  
  
  # write xlxs
  wb2 <- createWorkbook()
  shn <- paste0("abundance_", clustCol)
  addWorksheet(wb2, sheet = shn)
  writeData(wb2, abundance[[clustCol]], sheet = shn)
  saveWorkbook(wb2, file.path(out_path, paste0(shn, "_db.xlsx")), TRUE)

}

### ncells by fcs 

tibble::enframe(fcs_length, name = "file", value = "n_cells") |>
  tidyr::extract(
    file, into = c("patient", "condition"),
    regex = "\\w+_(\\d+)_\\d+_Patients_(\\w+)\\.fcs\\..+", remove =F)  -> number_of_cells_fcs

write.xlsx(number_of_cells_fcs, file.path(out_path, "number_of_cells_fcs.xlsx"))

MFI DB

### Profiling MFI

for (clustCol in clustCols) {

  Profiling_median_l <- list()

  for (fct_ in intensity_feat) {
    
    Profiling_median_l[[fct_]] <- 
      do.call("rbind", fcs_stat[[clustCol]][["MFI"]][[fct_]])[c(markers, "cluster", "file")] |>
      pivot_longer(cols = all_of(markers), names_to = "marker", values_to = paste0(fct_, "_mfi"))

  }

  MFI_Profiling_db <- Reduce(merge, Profiling_median_l) |>
    tidyr::extract(
      file, into = c("patient", "condition"),
      regex = "\\w+_(\\d+)_\\d+_Patients_(\\w+)\\.fcs\\..+", remove =F)
  
  write.xlsx(MFI_Profiling_db, file.path(out_path, paste0("mfi_", clustCol, "_db.xlsx")))
}

Script session

Session info

## R version 4.4.2 (2024-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=French_France.utf8  LC_CTYPE=French_France.utf8   
## [3] LC_MONETARY=French_France.utf8 LC_NUMERIC=C                  
## [5] LC_TIME=French_France.utf8    
## 
## time zone: Europe/Paris
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] tictoc_1.2.1     tidyr_1.3.1      openxlsx_4.2.7.1 reshape2_1.4.4  
## [5] plyr_1.8.9       flowCore_2.17.1 
## 
## loaded via a namespace (and not attached):
##  [1] jsonlite_1.8.9      dplyr_1.1.4         compiler_4.4.2     
##  [4] tidyselect_1.2.1    Rcpp_1.0.13-1       zip_2.3.1          
##  [7] Biobase_2.66.0      cytolib_2.18.1      stringr_1.5.1      
## [10] jquerylib_0.1.4     yaml_2.3.10         fastmap_1.2.0      
## [13] R6_2.5.1            RProtoBufLib_2.18.0 generics_0.1.3     
## [16] knitr_1.49          BiocGenerics_0.52.0 tibble_3.2.1       
## [19] bslib_0.8.0         pillar_1.10.1       rlang_1.1.4        
## [22] cachem_1.1.0        stringi_1.8.4       xfun_0.50          
## [25] sass_0.4.9          cli_3.6.3           withr_3.0.2        
## [28] magrittr_2.0.3      digest_0.6.37       rstudioapi_0.17.1  
## [31] lifecycle_1.0.4     S4Vectors_0.44.0    vctrs_0.6.5        
## [34] evaluate_1.0.3      glue_1.8.0          stats4_4.4.2       
## [37] purrr_1.0.2         rmarkdown_2.29      matrixStats_1.5.0  
## [40] tools_4.4.2         pkgconfig_2.0.3     htmltools_0.5.8.1

01_extract_from_fcs_to_db

2025-01-30

Parameters

helpers

Read annotations

Parse FCS files

MFI workbooks

Counts workbooks

Abundance DB

MFI DB

Script session