Skip to contents

First, we write a function that will plot the clustering metrics bar plots for the supplementary figures.

clustering_metrics_facet_plot_with_additions <- function (cluster_metrics_df, statistic, y_label) 
{
  # remove underscores from tissue names
  cluster_metrics_df$tissue_name <- gsub("_", "\n", cluster_metrics_df$tissue_name)
  
  # re-order factor levels
  cluster_metrics_df$method <- factor(cluster_metrics_df$method, levels = c("callback",
                                                                            "callback_NB",
                                                                            "callback_Poisson-copula",
                                                                            "callback_NB-copula",
                                                                            "callback_countsplit",
                                                                            "sc-SHC",
                                                                            "CHOIR",
                                                                            "scace_cluster"))
  
  
  small_text_size <- 16
  large_text_size <- 22
  
  bar_plot <- ggplot2::ggplot(cluster_metrics_df, ggplot2::aes(x = method, 
                                                               y = !!rlang::sym(statistic),
                                                               fill = method,
                                                               #label=sprintf("%0.3f", round(!!rlang::sym(statistic), digits = 3)))) + 
                                                               label=sprintf("%0.2f", round(!!rlang::sym(statistic), digits = 2)))) + 
    ggplot2::geom_bar(
      stat = "identity", 
      position = "dodge",
      color = "black",
      alpha = 0.7) + 
    ggplot2::facet_wrap(~tissue_name, ncol = 4) + 
    ggplot2::scale_y_continuous(breaks=c(0, 0.5, 1.00), limits = c(-0.05, 1.1)) +
    ggplot2::xlab("Tabula Muris Tissues") +
    ggplot2::ylab(y_label) + 
    ggplot2::ggtitle(y_label) + 
    ggplot2::labs(fill = "Method") + 
    #ggplot2::scale_fill_brewer(palette = "Set1", labels = c("callback", "sc-SHC", "CHOIR")) + 
    ggplot2::scale_fill_manual(values = c("red", "darkmagenta", "darkgreen", "deepskyblue2", "darkorange1", "grey", "black", "tan"),
                               labels = c("recall+ZIP", "recall+NB", "recall+Poisson-copula", "recall+NB-copula", "recall+countsplit", "sc-SHC", "CHOIR", "scACE")) + 
    ggplot2::theme_bw() +
    ggplot2::theme(axis.ticks.x = ggplot2::element_blank(), 
                   axis.text.x = ggplot2::element_blank(),
                   axis.text = ggplot2::element_text(size = small_text_size),
                   axis.title = ggplot2::element_text(size = large_text_size),
                   strip.text = ggplot2::element_text(size = small_text_size), 
                   legend.text = ggplot2::element_text(size = small_text_size, family = "Courier"),
                   legend.title = ggplot2::element_text(size = small_text_size),
                   plot.title = ggplot2::element_text(size = large_text_size, hjust = 0.5),
                   legend.position="bottom") + 
    ggplot2::geom_text(vjust = -0.2)
  
  return(bar_plot)
}

Second, we write a function that will do plot the clustering metrics bar plots for Figure 2.

clustering_metrics_facet_plot_for_figure2 <- function (cluster_metrics_df, statistic, y_label) 
{
  # remove underscores from tissue names
  cluster_metrics_df$tissue_name <- gsub("_", "\n", cluster_metrics_df$tissue_name)
  
  # re-order factor levels
  cluster_metrics_df$method <- factor(cluster_metrics_df$method, levels = c("callback",
                                                                            "callback_NB",
                                                                            "callback_Poisson-copula",
                                                                            "callback_NB-copula",
                                                                            "callback_countsplit",
                                                                            "sc-SHC",
                                                                            "CHOIR",
                                                                            "scace_cluster"))
  
  cluster_metrics_df <- subset(cluster_metrics_df, method %in% c("callback", "sc-SHC", "CHOIR", "scace_cluster"))
  
  
  
  
  small_text_size <- 16
  large_text_size <- 22
  
  bar_plot <- ggplot2::ggplot(cluster_metrics_df, ggplot2::aes(x = method, 
                                                               y = !!rlang::sym(statistic),
                                                               fill = method,
                                                               label=sprintf("%0.3f", round(!!rlang::sym(statistic), digits = 3)))) + 
                                                               #label=sprintf("%0.2f", round(!!rlang::sym(statistic), digits = 2)))) + 
    ggplot2::geom_bar(
      stat = "identity", 
      position = "dodge",
      color = "black",
      alpha = 0.7) + 
    ggplot2::facet_wrap(~tissue_name, ncol = 4) + 
    ggplot2::scale_y_continuous(breaks=c(0, 0.5, 1.00), limits = c(-0.05, 1.1)) +
    ggplot2::xlab("Tabula Muris Tissues") +
    ggplot2::ylab(y_label) + 
    ggplot2::ggtitle(y_label) + 
    ggplot2::labs(fill = "Method") + 
    #ggplot2::scale_fill_brewer(palette = "Set1", labels = c("callback", "sc-SHC", "CHOIR")) + 
    ggplot2::scale_fill_manual(values = c("red", "grey", "black", "tan"), labels = c("recall+ZIP", "sc-SHC", "CHOIR", "scAce")) + 
    ggplot2::theme_bw() +
    ggplot2::theme(axis.ticks.x = ggplot2::element_blank(), 
                   axis.text.x = ggplot2::element_blank(),
                   axis.text = ggplot2::element_text(size = small_text_size),
                   axis.title = ggplot2::element_text(size = large_text_size),
                   strip.text = ggplot2::element_text(size = small_text_size), 
                   legend.text = ggplot2::element_text(size = small_text_size, family = "Courier"),
                   legend.title = ggplot2::element_text(size = small_text_size),
                   plot.title = ggplot2::element_text(size = large_text_size, hjust = 0.5),
                   legend.position="bottom") + 
    ggplot2::geom_text(vjust = -0.2)
  
  return(bar_plot)
}

We load the clustering data and plot the relevant clustering evaluation metrics.

cluster_metrics_df <- read.csv("cluster_metrics_df.csv", row.names = 1)

cluster_metrics_copula <- read.csv("tabula_muris_copula_additional_clustering_metrics.csv", row.names = 1)
# one column name had a mismatch
names(cluster_metrics_df)[names(cluster_metrics_df) == 'ari'] <- 'ari_results'
cluster_metrics_df <- rbind(cluster_metrics_df, cluster_metrics_copula)


bar_plot_width <- 2 * 1440
bar_plot_height <- 2.4 * 1440

vmeasure_plot <- clustering_metrics_facet_plot_with_additions(cluster_metrics_df, "v_measure_results", "V-measure")
vmeasure_plot_fig2 <- clustering_metrics_facet_plot_for_figure2(cluster_metrics_df, "v_measure_results", "V-measure")
ggsave("vmeasure_plot.png", vmeasure_plot, width = 2 * bar_plot_width, height = bar_plot_height, units = 'px')
ggsave("fig2_vmeasure_plot.png", vmeasure_plot_fig2, width = 2 * 1440, height = bar_plot_height, units = 'px')

ari_plot <- clustering_metrics_facet_plot_with_additions(cluster_metrics_df, "ari_results", "ARI")
ari_plot_fig2 <- clustering_metrics_facet_plot_for_figure2(cluster_metrics_df, "ari_results", "ARI")
ggsave("ari_plot.png", ari_plot, width = 2 * bar_plot_width, height = bar_plot_height, units = 'px')
ggsave("fig2_ari_plot.png", ari_plot_fig2, width = bar_plot_width, height = bar_plot_height, units = 'px')

fm_plot <- clustering_metrics_facet_plot_with_additions(cluster_metrics_df, "fowlkes_mallows_results", "Fowlkes-Mallows Index")
ggsave("fm_plot.png", fm_plot, width = 2 * bar_plot_width, height = bar_plot_height, units = 'px')

homogeneity_plot <- clustering_metrics_facet_plot_with_additions(cluster_metrics_df, "homogeneity_results", "Homogeneity") 
ggsave("homogeneity_plot.png", homogeneity_plot, width = 2 * bar_plot_width, height = bar_plot_height, units = 'px')

completeness_plot <- clustering_metrics_facet_plot_with_additions(cluster_metrics_df, "completeness_results", "Completeness") 
ggsave("completeness_plot.png", completeness_plot, width = 2 * bar_plot_width, height = bar_plot_height, units = 'px')

jaccard_plot <- clustering_metrics_facet_plot_with_additions(cluster_metrics_df, "jaccard_results", "Jaccard Index")
ggsave("jaccard_plot.png", jaccard_plot, width = 2 * bar_plot_width, height = bar_plot_height, units = 'px')

We load the timing and memory data and plot it.

tissue_timing_df <- read.csv("tissue_timing_df.csv", row.names = 1)

scAce_timing_df <- read.csv("scAce_timing.csv", row.names = 1)

tissue_timing_df <- rbind(tissue_timing_df, scAce_timing_df)


# order factor levels
tissue_timing_df$method <- factor(tissue_timing_df$method, levels=c("callback", "sc-SHC", "CHOIR", "scAce"))

# remove underscores from tissue names
tissue_timing_df$tissue <- gsub("_", "\n", tissue_timing_df$tissue)

small_text_size <- 16
large_text_size <- 22

p <- ggplot2::ggplot(tissue_timing_df, ggplot2::aes(x=method, y=time, fill=method, label=sprintf("%0.1f", round(time, digits = 1)))) +
  ggplot2::geom_bar(stat = "identity", alpha = 0.7, colour = 'black') + 
  ggplot2::facet_wrap(~tissue, nrow = 2) +
  ggplot2::xlab("Tabula Muris Tissues") + 
  ggplot2::ylab("Time Taken (minutes)") +
  ggplot2::labs(fill = "Method") + 
  ggplot2::ylim(c(0,18)) +
  ggplot2::scale_fill_manual(values = c("red", "grey", "black", "tan"), labels = c("recall+ZIP", "sc-SHC", "CHOIR", "scAce")) + 
  ggplot2::theme_bw() +
  ggplot2::theme(axis.ticks.x = ggplot2::element_blank(), 
                 axis.text.x = ggplot2::element_blank(),
                 axis.text = ggplot2::element_text(size = small_text_size),
                 axis.title = ggplot2::element_text(size = large_text_size),
                 strip.text = ggplot2::element_text(size = small_text_size), 
                 legend.text = ggplot2::element_text(size = small_text_size, family = "Courier"),
                 legend.title = ggplot2::element_text(size = small_text_size),
                 legend.position = "bottom") + 
  geom_text(vjust = -0.2)

ggsave("fig2_tabula_muris_timing.png", p, width = 4 * 1440, height = 4 * 460, units = "px")

Finally, we plot the UMAPs for Diaphragm and Limb Muscle.

# plot UMAPs for two tissues

diaphragm <- readRDS(file = "Diaphragmcluster_results_seurat.rds")
limb_muscle <- readRDS(file = "Limb_Musclecluster_results_seurat.rds")

diagphragm_scace <- anndata::read_h5ad("scace_output/Diaphragm.h5ad")
diagphragm_scace <- Seurat::CreateSeuratObject(counts = t(diagphragm_scace$X), meta.data = diagphragm_scace$obs)

limb_muscle_scace <- anndata::read_h5ad("scace_output/Limb_Muscle.h5ad")
limb_muscle_scace <- Seurat::CreateSeuratObject(counts = t(limb_muscle_scace$X), meta.data = limb_muscle_scace$obs)

# add scAce idents to Seurat objs
diaphragm@meta.data$scAce_idents <- diagphragm_scace@meta.data$scace_cluster
limb_muscle@meta.data$scAce_idents <- limb_muscle_scace@meta.data$scace_cluster

# remove NAs
limb_muscle <- subset(limb_muscle, subset = cell_ontology_class %in% levels(factor(limb_muscle@meta.data$cell_ontology_class)))
diaphragm <- subset(diaphragm, subset = cell_ontology_class %in% levels(factor(diaphragm@meta.data$cell_ontology_class)))

# clean up cell type labels for limb_muscle
#limb_muscle@meta.data$cell_ontology_class[limb_muscle@meta.data$cell_ontology_class == ""] <- "cardiac neuron"
#limb_muscle@meta.data$cell_ontology_class <- trimws(limb_muscle@meta.data$cell_ontology_class, whitespace = " cell")

# shorten cell type labels for diaphragm
#diaphragm@meta.data$cell_ontology_class <- trimws(diaphragm@meta.data$cell_ontology_class, whitespace = " cell")

# sort levels by size of group (limb_muscle)
limb_muscle@meta.data$cell_ontology_class <- as.factor(limb_muscle@meta.data$cell_ontology_class)
sorted_limb_muscle_clusters <- names(sort(summary(as.factor(na.omit(limb_muscle@meta.data$cell_ontology_class))), decreasing = TRUE))
limb_muscle@meta.data$cell_ontology_class <- factor(limb_muscle@meta.data$cell_ontology_class, levels = sorted_limb_muscle_clusters)

# sort levels by size of group (diaphragm)
diaphragm@meta.data$cell_ontology_class <- as.factor(diaphragm@meta.data$cell_ontology_class)
sorted_diaphragm_clusters <- names(sort(summary(as.factor(na.omit(diaphragm@meta.data$cell_ontology_class))), decreasing = TRUE))
diaphragm@meta.data$cell_ontology_class <- factor(diaphragm@meta.data$cell_ontology_class, levels = sorted_diaphragm_clusters)


library(callbackreproducibility)
library(Seurat)
library(ggplot2)
library(patchwork)
library(grid)

y_limits <- c(-30, 12)
x_limits <- c(-20, 20)
legend_location <- c(0.1, 0.2)


tissue <- limb_muscle
limb_muscle_curated_scatter <- custom_scatter(tissue, reduction = "umap", group_by = "cell_ontology_class", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 4) +
  theme(legend.position = legend_location) + guides(color=guide_legend(ncol=2)) + 
  xlim(x_limits) + ylim(y_limits) + 
  scale_colour_discrete(labels = function(x) stringr::str_wrap(x, width = 15))

limb_muscle_default_scatter <- custom_scatter(tissue, reduction = "umap", group_by = "seurat_clusters", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 4) +
  NoLegend() + xlim(x_limits) + ylim(y_limits)
limb_muscle_callback_scatter <- custom_scatter(tissue, reduction = "umap", group_by = "callback_idents", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 4) + 
  NoLegend() + xlim(x_limits) + ylim(y_limits)
limb_muscle_scSHC_scatter <- custom_scatter(tissue, reduction = "umap", group_by = "scSHC_clusters", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 4) +
  NoLegend() + xlim(x_limits) + ylim(y_limits)
limb_muscle_CHOIR_scatter <- custom_scatter(tissue, reduction = "umap", group_by = "CHOIR_clusters_0.05", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 4) + 
  NoLegend() + xlim(x_limits) + ylim(y_limits)
limb_muscle_scAce_scatter <- custom_scatter(tissue, reduction = "umap", group_by = "scAce_idents", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 4) +
  NoLegend() + xlim(x_limits) + ylim(y_limits)

tissue <- diaphragm
diaphragm_curated_scatter <- custom_scatter(tissue, reduction = "umap", group_by = "cell_ontology_class", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 4) + 
  theme(legend.position = legend_location) + 
  guides(color=guide_legend(ncol=2)) + 
  xlim(x_limits) + ylim(y_limits) +
  scale_colour_discrete(labels = function(x) stringr::str_wrap(x, width = 15))

diaphragm_default_scatter <- custom_scatter(tissue, reduction = "umap", group_by = "seurat_clusters", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 4) + 
  NoLegend() + xlim(x_limits) + ylim(y_limits)
diaphragm_callback_scatter <- custom_scatter(tissue, reduction = "umap", group_by = "callback_idents", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 4) + 
  NoLegend() + xlim(x_limits) + ylim(y_limits)
diaphragm_scSHC_scatter <- custom_scatter(tissue, reduction = "umap", group_by = "scSHC_clusters", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 4) + 
  NoLegend() + xlim(x_limits) + ylim(y_limits)
diaphragm_CHOIR_scatter <- custom_scatter(tissue, reduction = "umap", group_by = "CHOIR_clusters_0.05", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 4) +
  NoLegend() + xlim(x_limits) + ylim(y_limits)
diaphragm_scAce_scatter <- custom_scatter(tissue, reduction = "umap", group_by = "scAce_idents", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 4) +
  NoLegend() + xlim(x_limits) + ylim(y_limits)


row_label_1 <- wrap_elements(panel = textGrob('Diaphragm', gp = gpar(fontsize = 64), rot = 90))
row_label_2 <- wrap_elements(panel = textGrob('Limb Muscle', gp = gpar(fontsize = 64), rot = 90))

column_label_1 <- wrap_elements(panel = textGrob('Curated Labels', gp = gpar(fontsize = 64)))
column_label_2 <- wrap_elements(panel = textGrob('recall+ZIP', gp = gpar(fontsize = 64, fontfamily = "Courier")))
column_label_3 <- wrap_elements(panel = textGrob('sc-SHC', gp = gpar(fontsize = 64, fontfamily = "Courier")))
column_label_4 <- wrap_elements(panel = textGrob('CHOIR', gp = gpar(fontsize = 64, fontfamily = "Courier")))
column_label_5 <- wrap_elements(panel = textGrob('scAce', gp = gpar(fontsize = 64, fontfamily = "Courier")))

patchwork_grid <- plot_spacer() + column_label_1 + column_label_2 + column_label_3 + column_label_4 + column_label_5 +
  row_label_1 + diaphragm_curated_scatter + diaphragm_callback_scatter + diaphragm_scSHC_scatter + diaphragm_CHOIR_scatter + diaphragm_scAce_scatter +
  plot_spacer() + column_label_1 + column_label_2 + column_label_3 + column_label_4 + column_label_5 +
  row_label_2 + limb_muscle_curated_scatter + limb_muscle_callback_scatter + limb_muscle_scSHC_scatter + limb_muscle_CHOIR_scatter + limb_muscle_scAce_scatter +
  plot_layout(widths = c(1, 5, 5, 5, 5, 5),
              heights = c(1,3,1,3))


ggsave("fig2_umap_grid.png", patchwork_grid, width = 1.8 * 2^13, height = 0.8 * 2^13, units = "px")