Skip to contents

First, we load the Tabula Muris tissue data and set up our parameters.

tissue_files <- list.files(".", pattern = "cluster_results_seurat.rds")

cores <- 6
num_PCs <- 10

timing_df <- data.frame()

Then, we loop over each tissue and cluster using the various recall methods and countsplit and save the resulting Seurat objects for each tissue c(ontaining the cluster labels for each method) to RDS files.

for (tissue_file in tissue_files) {
  tissue_name <- unlist(strsplit(tissue_file, "_results_seurat.rds"))[1]
  
  print("Processing:")
  print(tissue_name)
  
  print("Loading tissue")
  seurat_obj <- readRDS(tissue_file) 
  
  print("Running initial seurat workflow")
  p <- 1000
  #tissue <- seurat_workflow(tissue, num_variable_features = p, algorithm="louvain", resolution_param = 0.8)
  
  print("running recall negative binomial")
  recall_NB_start_time <- Sys.time()
  recall_nb_seurat_obj <- FindClustersRecall(seurat_obj, null_method = "NB", cores = cores) #,resolution_start = 2.4)
  recall_NB_end_time <- Sys.time()
  
  print("running recall NB-copula")
  recall_NB_copula_start_time <- Sys.time()
  recall_nb_copula_seurat_obj <- FindClustersRecall(seurat_obj, null_method = "NB-copula", cores = cores)
  recall_NB_copula_end_time <- Sys.time()
  
  print("running recall Poisson-copula")
  recall_Poisson_copula_start_time <- Sys.time()
  recall_poisson_copula_seurat_obj <- FindClustersRecall(seurat_obj, null_method = "Poisson-copula", cores = cores)
  recall_Poisson_copula_end_time <- Sys.time()
  
  #print("running recall Gaussian-copula")
  #recall_gaussian_copula_seurat_obj <- FindClustersRecall(seurat_obj, null_method = "Gaussian-copula", cores = cores)
  
  print("running recall countsplit")
  recall_countsplit_start_time <- Sys.time()
  countsplit_seurat_obj <- recall::FindClustersCountsplit(seurat_obj, cores = cores)
  recall_countsplit_end_time <- Sys.time()
  
  print("storing cluster labels")
  
  seurat_obj[["recall_NB"]] <- recall_nb_seurat_obj@meta.data$recall_clusters
  seurat_obj[["recall_NB-copula"]] <- recall_nb_copula_seurat_obj@meta.data$recall_clusters
  seurat_obj[["recall_Poisson-copula"]] <- recall_poisson_copula_seurat_obj@meta.data$recall_clusters
  seurat_obj[["recall_countsplit"]] <- countsplit_seurat_obj@meta.data$recall_clusters

  print("Saving result to rds file")
  saveRDS(seurat_obj, file =  paste0("new_clustering_output/", tissue_name, "_copula_clustering_results.rds" ))
  
  # todo time taken for each of the new/copula methods
  recall_NB_time_taken <- difftime(recall_NB_end_time, recall_NB_start_time, units="mins")
  recall_NB_copula_time_taken <- difftime(recall_NB_copula_end_time, recall_NB_copula_start_time, units="mins")
  recall_Poisson_copula_time_taken <- difftime(recall_Poisson_copula_end_time, recall_Poisson_copula_start_time, units="mins")
  recall_countsplit_time_taken <- difftime(recall_countsplit_end_time, recall_countsplit_start_time, units="mins")

  tissue <- c(tissue_name, tissue_name, tissue_name, tissue_name)
  method <- c("recall_NB", "recall_NB-copula", "recall_Poisson-copula", "recall_countsplit")
  time <- c(recall_NB_time_taken, recall_NB_copula_time_taken, recall_Poisson_copula_time_taken, recall_countsplit_time_taken)
  
  timing_df_new_row <- data.frame(tissue, method, time)
  
  print(timing_df_new_row)
  
  timing_df <- rbind(timing_df, timing_df_new_row)
}

Finally, we save the runtime data.

print("final timing df")
print(timing_df)
write.csv(timing_df, "additional_tissue_timing_df.csv")