Skip to contents
mesenchymal_stem_cells_marker_genes <- c(
  "Col6a3",
  "Col1a1",
  "Igfbp6",
  "Pdgfra",
  "C1s",
  "Mfap5",
  "Ecm1", 
  "Dcn",
  "Dpep1"
)

skeletal_muscle_satellite_cell_marker_genes <- c(
  "Des",
  "Chodl",
  "Myl12a",
  "Asb5",
  "Sdc4",
  "Apoe",
  "Musk",
  "Myf5",
  "Chrdl2",
  "Notch3"
)

First, we load the limb muscle seurat object and scale it for plotting the heatmap.

limb_muscle <- readRDS("Limb_Musclecluster_results_seurat.rds")
limb_muscle <- Seurat::ScaleData(limb_muscle, features = rownames(limb_muscle))

Next, we sort the cell ontology classes factors so that the cell type with the largest number of cells is first and the cell type with the fewest smallest is last.

# sort levels by size of group (limb_muscle)
limb_muscle@meta.data$cell_ontology_class <- as.factor(limb_muscle@meta.data$cell_ontology_class)
sorted_limb_muscle_clusters <- names(sort(summary(as.factor(na.omit(limb_muscle@meta.data$cell_ontology_class))), decreasing = TRUE))
limb_muscle@meta.data$cell_ontology_class <- factor(limb_muscle@meta.data$cell_ontology_class, levels = sorted_limb_muscle_clusters)

We define a function for plotting the grid of UMAPs.

revision_fig3_scatter_plots <- function(tissue, tissue_name, legend_pos=c(0.6, 0.2)) {  
  louvain_default <- custom_scatter(tissue, "umap", group_by = "seurat_clusters", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 2, label=FALSE) + Seurat::NoLegend()
  louvain_recall <- custom_scatter(tissue, "umap", group_by = "recall_idents", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 2) + Seurat::NoLegend() 
  cell_ontology <- custom_scatter(tissue, "umap", group_by = "cell_ontology_class", x_title = "UMAP 1", y_title = "UMAP 2", pt.size = 2) + 
    ggplot2::theme(legend.position = legend_pos,
                   legend.text = ggplot2::element_text(size=20)) + 
    ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size=6), ncol = 1)) + 
    ggplot2::scale_colour_discrete(na.translate = F)
  
  column_label_1 <- patchwork::wrap_elements(panel = grid::textGrob('Cell Ontology', gp = grid::gpar(fontsize = 64)))
  column_label_2 <- patchwork::wrap_elements(panel = grid::textGrob('Seurat Default', gp = grid::gpar(fontsize = 64)))
  column_label_3 <- patchwork::wrap_elements(panel = grid::textGrob('recall+ZIP', gp = grid::gpar(fontsize = 64, fontfamily = "Courier")))
  
  
  umap_grid <- column_label_1 + column_label_2 + column_label_3 +
    cell_ontology + louvain_default + louvain_recall +
    patchwork::plot_layout(widths = c(5, 5, 5),
                           heights = c(1,3))
  
  return(umap_grid)
}

We plot and save UMAP scatterplots of the Cell Ontology Class, the Seurat default clusters, and the recall clusters.

fig_3_umap_grid <- revision_fig3_scatter_plots(limb_muscle, "limb_muscle", legend_pos = c(0.1, 0.8))

ggplot2::ggsave("fig3_revision__umap_grid.png", fig_3_umap_grid, width = 4.4 * 2^11, height = 1 * 2^11, units = "px")

We define a function for plotting the volcano plots.

# new volcano plots

revision_fig3_volcano_plots <- function(tissue, tissue_name,
                                        recall_cluster1, recall_cluster2,
                                        default_cluster1, default_cluster2,
                                        ymax=150,
                                        y_increment = 10,
                                        genes_to_label_left=c(),
                                        genes_to_label_right=c()) {
  
  volcano_plot <- function(seurat_obj, markers, cluster1, cluster2, title, logfc_thresh=1.0,
                           genes_to_label_left=c(),
                           genes_to_label_right=c()) {
    markers$log10pval <- -log10(markers$p_val)
    
    markers$Name <- rownames(markers)
    
    p_val_thresh <- -log10(0.05 / dim(seurat_obj)[1])
    
    
    markers$log10pval[markers$log10pval > ymax] <- ymax 
    
    markers$color <- "grey"
    
    markers[(markers$avg_log2FC > 1) & (markers$log10pval > p_val_thresh),]$color <- "red"
    markers[(markers$avg_log2FC < -1) & (markers$log10pval > p_val_thresh),]$color <- "blue"
    
    
    p <- ggplot2::ggplot(markers, ggplot2::aes(x=avg_log2FC, y=log10pval, color = color)) + 
      ggplot2::geom_point() +
      ggplot2::scale_colour_identity() +
      ggrepel::geom_label_repel(data = markers %>% dplyr::filter(Name %in% genes_to_label_right), ggplot2::aes(label = Name),
                                min.segment.length = 0,
                                box.padding = 1.5,
                                point.size = 2,
                                size = 10,
                                force = 12,
                                xlim  = c(14,19),
                                ylim  = c(50,160),
                                hjust=0,
                                direction = "y",
                                max.overlaps = Inf) + # right side isn't showing half of the labels
      ggrepel::geom_label_repel(data = markers %>% dplyr::filter(Name %in% genes_to_label_left), ggplot2::aes(label = Name),
                                min.segment.length = 0,
                                box.padding = 1.5,
                                point.size = 2,
                                size = 10,
                                force = 12,
                                seed = 123,
                                xlim  = c(-14,-19),
                                ylim  = c(50,160),
                                hjust=1,
                                direction = "y",
                                max.overlaps = Inf) + # left side isn't showing half of the labels
      #gghighlight::gghighlight(log10pval > p_val_thresh) + 
      #gghighlight::gghighlight(abs(avg_log2FC) > logfc_thresh) + 
      ggplot2::xlim(-20,20) + 
      ggplot2::xlab("Average Log2-Fold Change") +
      ggplot2::ggtitle(title) + 
      ggplot2::theme_bw() +
      ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5),
                     axis.title = ggplot2::element_text(size=40),
                     axis.text = ggplot2::element_text(size=32),
                     title = ggplot2::element_text(size=44),
                     legend.title = ggplot2::element_blank(),
                     legend.background = ggplot2::element_blank(),
                     legend.box.background = ggplot2::element_rect(colour = "black"), 
                     legend.text = ggplot2::element_text(size=32),
                     legend.position = c(0.8, 0.6),
                     legend.key.size = ggplot2::unit(3, "cm")) +
      ggplot2::geom_hline(ggplot2::aes(yintercept=p_val_thresh), size = 1, linetype = 'dashed', show.legend = TRUE) +
      ggplot2::geom_vline(ggplot2::aes(xintercept=logfc_thresh), linetype = 'dashed', size = 1) +
      ggplot2::geom_vline(ggplot2::aes(xintercept=-logfc_thresh), linetype = 'dashed', size = 1) +
      ggplot2::scale_linetype_manual(values=c("dashed")) +
      #scale_color_manual(values=c("blue", "red")) +
      # annotate p-value threshold
      ggplot2::annotate("segment", x = 13, xend = 15, y = 18, yend = p_val_thresh, colour = "black", linetype = "dashed", size = 2) +
      ggplot2::annotate("label", x = 13, y = 25, label = "Adj. P-value = 0.05", size = 12) +
      # annotate logFC thresholds
      ggplot2::annotate("segment", x = 11.6, xend = 1.0, y = 40, yend = 60, colour = "black", linetype = "dashed", size = 2) +
      ggplot2::annotate("label", x = 13, y = 40, label = "Avg. Log2-FC = 1.0", size = 12) +
      ggplot2::annotate("segment", x = -11.6, xend = -1.0, y = 40, yend = 60, colour = "black", linetype = "dashed", size = 2) +
      ggplot2::annotate("label", x = -13, y = 40, label = "Avg. Log2-FC = -1.0", size = 12) +
      #geom_text_repel(data=subset(markers, abs(avg_log2FC) > logfc_thresh & p_val_adj < 0.05), aes(label = Name), size = 8)
      ggplot2::scale_y_continuous(name = "-log10 P-value", limits = c(0, ymax))#,
    #breaks = seq(from = 0, to = ymax, by = y_increment)
    #labels = c(seq(from = 0, to = ymax - 100, by = y_increment), paste0('\u2265', ymax)))
    return(p)
  }
  
  
  # re-index clusters to be one-based
  tissue@meta.data$recall_idents <- as.factor(as.numeric(tissue@meta.data$recall_idents))
  
  Seurat::Idents(tissue) <- tissue@meta.data$recall_idents
  #DimPlot(tissue)
  
  recall_markers <- Seurat::FindMarkers(tissue,
                                          ident.1 = recall_cluster1,
                                          ident.2 = recall_cluster2,
                                          logfc.threshold = 0.0,
  )
  # highlighted markers
  subset(recall_markers, abs(avg_log2FC) > 1.0 & p_val_adj < 0.05)
  
  recall_title <- paste0("recall+ZIP: Cluster ", recall_cluster1, " vs Cluster ", recall_cluster2)
  volcano_recall <- volcano_plot(tissue, recall_markers, recall_cluster1, recall_cluster2, recall_title,
                                   genes_to_label_left = genes_to_label_left, genes_to_label_right = genes_to_label_right)
  
  # re-index clusters to be one-based
  tissue@meta.data$seurat_clusters <- as.factor(as.numeric(tissue@meta.data$seurat_clusters))
  Seurat::Idents(tissue) <- tissue@meta.data$seurat_clusters
  
  default_markers <- Seurat::FindMarkers(tissue,
                                         ident.1 = default_cluster1,
                                         ident.2 = default_cluster2,
                                         logfc.threshold = 0.0,
  )
  # highlighted markers
  subset(default_markers, abs(avg_log2FC) > 1.0 & p_val_adj < 0.05)
  
  default_title <- paste0("Default: Cluster ", default_cluster1, " vs Cluster ", default_cluster2)
  volcano_default <- volcano_plot(tissue, default_markers, default_cluster1, default_cluster2, default_title)
  
  
  return(list("volcano_recall" = volcano_recall, "volcano_default" = volcano_default))
}

Finally, we do differential expression testing between the callback cluster corresponding to skeletal mucle satellite cells and the callback cluster corresponding to mesenchymal stem cell cluster as well as two of the Seurat clusters that correspond to the same cell types. Notice that the P-values produced after clustering with recall are more significant due to the increased sample size from correctly clustering the two cell types.

volcanos = revision_fig3_volcano_plots(limb_muscle, "limb_muscle", recall_cluster1 = 1,
                              recall_cluster2 = 2,
                              default_cluster1 = 1,
                              default_cluster2 = 3,
                              ymax=150,
                              y_increment = 100,
                              genes_to_label_left = mesenchymal_stem_cells_marker_genes,
                              genes_to_label_right = skeletal_muscle_satellite_cell_marker_genes)

ggplot2::ggsave("fig3_revision_limb_muscle_volcano_plot_default.png", volcanos$volcano_default, width = 2^12, height = 2^12, units = "px")
ggplot2::ggsave("fig3_revision_limb_muscle_volcano_plot_recall.png", volcanos$volcano_recall, width = 2^12,  height = 2^12, units = "px")