9. Clustering Tabula Muris Tissues With Additional Methods (Figure 2, S11-S16, S37)
09_cluster-tabula-muris_additional.Rmd
suppressPackageStartupMessages({
library(dplyr)
library(Seurat)
library(ggplot2)
library(recall)
})
set.seed(123456)
First, we load the Tabula Muris tissue data and set up our parameters.
tissue_files <- list.files(".", pattern = "cluster_results_seurat.rds")
cores <- 6
num_PCs <- 10
timing_df <- data.frame()
Then, we loop over each tissue and cluster using the various
recall
methods and countsplit
and save the
resulting Seurat
objects for each tissue c(ontaining the
cluster labels for each method) to RDS files.
for (tissue_file in tissue_files) {
tissue_name <- unlist(strsplit(tissue_file, "_results_seurat.rds"))[1]
print("Processing:")
print(tissue_name)
print("Loading tissue")
seurat_obj <- readRDS(tissue_file)
print("Running initial seurat workflow")
p <- 1000
#tissue <- seurat_workflow(tissue, num_variable_features = p, algorithm="louvain", resolution_param = 0.8)
print("running recall negative binomial")
recall_NB_start_time <- Sys.time()
recall_nb_seurat_obj <- FindClustersRecall(seurat_obj, null_method = "NB", cores = cores) #,resolution_start = 2.4)
recall_NB_end_time <- Sys.time()
print("running recall NB-copula")
recall_NB_copula_start_time <- Sys.time()
recall_nb_copula_seurat_obj <- FindClustersRecall(seurat_obj, null_method = "NB-copula", cores = cores)
recall_NB_copula_end_time <- Sys.time()
print("running recall Poisson-copula")
recall_Poisson_copula_start_time <- Sys.time()
recall_poisson_copula_seurat_obj <- FindClustersRecall(seurat_obj, null_method = "Poisson-copula", cores = cores)
recall_Poisson_copula_end_time <- Sys.time()
#print("running recall Gaussian-copula")
#recall_gaussian_copula_seurat_obj <- FindClustersRecall(seurat_obj, null_method = "Gaussian-copula", cores = cores)
print("running recall countsplit")
recall_countsplit_start_time <- Sys.time()
countsplit_seurat_obj <- recall::FindClustersCountsplit(seurat_obj, cores = cores)
recall_countsplit_end_time <- Sys.time()
print("storing cluster labels")
seurat_obj[["recall_NB"]] <- recall_nb_seurat_obj@meta.data$recall_clusters
seurat_obj[["recall_NB-copula"]] <- recall_nb_copula_seurat_obj@meta.data$recall_clusters
seurat_obj[["recall_Poisson-copula"]] <- recall_poisson_copula_seurat_obj@meta.data$recall_clusters
seurat_obj[["recall_countsplit"]] <- countsplit_seurat_obj@meta.data$recall_clusters
print("Saving result to rds file")
saveRDS(seurat_obj, file = paste0("new_clustering_output/", tissue_name, "_copula_clustering_results.rds" ))
# todo time taken for each of the new/copula methods
recall_NB_time_taken <- difftime(recall_NB_end_time, recall_NB_start_time, units="mins")
recall_NB_copula_time_taken <- difftime(recall_NB_copula_end_time, recall_NB_copula_start_time, units="mins")
recall_Poisson_copula_time_taken <- difftime(recall_Poisson_copula_end_time, recall_Poisson_copula_start_time, units="mins")
recall_countsplit_time_taken <- difftime(recall_countsplit_end_time, recall_countsplit_start_time, units="mins")
tissue <- c(tissue_name, tissue_name, tissue_name, tissue_name)
method <- c("recall_NB", "recall_NB-copula", "recall_Poisson-copula", "recall_countsplit")
time <- c(recall_NB_time_taken, recall_NB_copula_time_taken, recall_Poisson_copula_time_taken, recall_countsplit_time_taken)
timing_df_new_row <- data.frame(tissue, method, time)
print(timing_df_new_row)
timing_df <- rbind(timing_df, timing_df_new_row)
}
Finally, we save the runtime data.