Advanced Search
Group-based Minimum Spanning Tree Correlation Network
Last updated date: Feb 18, 2026 Views: 23 Forks: 0
Group-based Minimum Spanning Tree Correlation Network
Authors: Camilo Espinosa Bernal, Nima Aghaeepour
The R script outlined below and attached as a supplementary txt file represent a user-friendly template to generate a group-based MST of a correlation network of clinical features for which predictive multiomic models have been built.
# "
# Generic Multi-Omic MST (Minimum Spanning Tree) Template
# ========================================================
# INPUTS REQUIRED
# ---------------
# 1. clinical_table (CSV):
# - One row per sample/subject.
# - Must contain a column matching SAMPLE_ID_COL (e.g. 'SampleID').
# - All remaining columns are treated as clinical features to network.
# 2. multiomic_table (CSV):
# - One row per feature (clinical covariate).
# - Must contain at minimum:
# * FEATURE_COL : feature name matching column names in clinical_table (e.g. 'Covariate')
# * PVAL_COL : -log10(p-value) representing multiomic predictability of each feature (e.g. 'pValOmics')
# * GROUP_COL : group label used to partition features into sub-MSTs (e.g. 'Group')
# - Optional columns carried through as vertex attributes:
# * Any extra columns you want visualized on nodes.
# 3. group_table (CSV):
# - Maps each clinical feature to a group for sub-MST construction.
# - Must contain:
# * FEATURE_COL : same feature name column as above
# * GROUP_COL : group label
# - If group assignments are already present in multiomic_table, set
# USE_SEPARATE_GROUP_TABLE <- FALSE and this file is ignored.
# OUTPUT
# ------
# - An igraph object (reducedGraph) ready for plotting.
# - A ggplot2 MST visualization saved to OUTPUT_PLOT_PATH.
# "
# ============================================================
# 0. LIBRARIES
# ============================================================
library(magrittr)
library(data.table)
library(igraph)
library(ggnetwork)
library(ggrepel)
library(magic)
library(ggplot2)
# ============================================================
# 1. USER CONFIGURATION — edit this section only
# ============================================================
# --- File paths ---
CLINICAL_TABLE_PATH <- "data/clinical_table.csv" # See INPUT 1 above
MULTIOMIC_TABLE_PATH <- "data/multiomic_table.csv" # See INPUT 2 above
GROUP_TABLE_PATH <- "data/group_table.csv" # See INPUT 3 above
OUTPUT_PLOT_PATH <- "results/mst_plot.png"
# --- Column name mappings ---
SAMPLE_ID_COL <- "SampleID" # Subject/sample ID column in clinical_table
FEATURE_COL <- "Covariate" # Feature name column in multiomic & group tables
GROUP_COL <- "Group" # Group column in multiomic & group tables
PVAL_OMICS_COL <- "pValOmics" # Multiomic -log10(p-value) column in multiomic_table
# Optional: extra vertex-attribute columns in multiomic_table to add to nodes.
# Set to character(0) if none.
# Example: EXTRA_VERTEX_ATTR_COLS <- c("Rho", "pValWilcox")
EXTRA_VERTEX_ATTR_COLS <- character(0)
# --- Whether group assignments live in the multiomic_table already ---
# TRUE -> group_table is loaded and merged in
# FALSE -> GROUP_COL must already be present in multiomic_table
USE_SEPARATE_GROUP_TABLE <- TRUE
# --- Statistical thresholds ---
SIG_CUTOFF <- 0.05 # Bonferroni-style significance cutoff (used on pValMatrix)
RHO_CUTOFF <- 0.1 # Minimum absolute Spearman rho for an edge to be drawn
# --- Plot aesthetics ---
# Node size mapped to: PVAL_OMICS_COL (multiomic predictability)
# Node fill mapped to: NODE_FILL_COL (set to PVAL_OMICS_COL if you have no other column)
NODE_FILL_COL <- "pValWilcox" # Column for node fill colour
NODE_FILL_MIDPOINT <- 12.5 # Midpoint for diverging fill colour scale
PLOT_WIDTH <- 7
PLOT_HEIGHT <- 7
# ============================================================
# 2. LOAD DATA
# ============================================================
cat("Loading data...\n")
clinicalTable <- fread(CLINICAL_TABLE_PATH)
multiomicTable <- fread(MULTIOMIC_TABLE_PATH)
if (USE_SEPARATE_GROUP_TABLE) {
groupTable <- fread(GROUP_TABLE_PATH)
# Merge group assignments into multiomicTable
multiomicTable <- merge(multiomicTable, groupTable[, c(FEATURE_COL, GROUP_COL), with = FALSE],
by = FEATURE_COL, all.x = TRUE)
}
# Validate required columns
stopifnot(SAMPLE_ID_COL %in% colnames(clinicalTable))
stopifnot(all(c(FEATURE_COL, GROUP_COL, PVAL_OMICS_COL) %in% colnames(multiomicTable)))
features <- multiomicTable[[FEATURE_COL]]
stopifnot(all(features %in% colnames(clinicalTable)))
cat(sprintf(" %d features across %d groups.\n",
length(features), length(unique(multiomicTable[[GROUP_COL]]))))
# ============================================================
# 3. COMPUTE PAIRWISE FEATURE CORRELATIONS (full matrix)
# ============================================================
cat("Computing pairwise Spearman correlations...\n")
featureData <- clinicalTable[, ..features]
corMatrix <- cor(featureData, use = "pairwise.complete.obs", method = "spearman")
corMatrix[is.na(corMatrix)] <- 0
# ============================================================
# 4. COMPUTE PAIRWISE SIGNIFICANCE MATRIX
# ============================================================
cat("Computing pairwise correlation significance...\n")
nFeatures <- length(features)
nPairs <- nFeatures * (nFeatures - 1) / 2
bonferroni <- -log10(SIG_CUTOFF / nPairs) # Bonferroni-corrected threshold
pValMatrix <- lapply(features, function(f1) {
lapply(features, function(f2) {
tryCatch({
res <- cor.test(clinicalTable[[f1]], clinicalTable[[f2]], method = "spearman")
data.table(Feat1 = f1, Feat2 = f2, pVal = -log10(res$p.value))
}, error = function(e) {
data.table(Feat1 = f1, Feat2 = f2, pVal = 0)
})
}) %>% rbindlist
}) %>% rbindlist
pValMatrix <- dcast(pValMatrix, Feat1 ~ Feat2, value.var = "pVal") %>%
.[, 2:ncol(.)] %>% as.matrix
pValMatrix[is.na(pValMatrix)] <- 0
rownames(pValMatrix) <- colnames(pValMatrix)
# ============================================================
# 5. BUILD PER-GROUP MSTs
# ============================================================
cat("Building per-group MSTs...\n")
groups <- unique(multiomicTable[[GROUP_COL]])
mstList <- lapply(groups, function(grp) {
grpFeatures <- multiomicTable[get(GROUP_COL) == grp, get(FEATURE_COL)]
if (length(grpFeatures) < 2) {
# Single-node group: trivial 1x1 matrices
m <- matrix(0, 1, 1, dimnames = list(grpFeatures, grpFeatures))
return(list(MST = m, corMatrix = m + 1))
}
grpData <- clinicalTable[, ..grpFeatures]
grpCor <- cor(grpData, use = "pairwise.complete.obs", method = "spearman")
grpCor[is.na(grpCor)] <- 0
adjMatrix <- 1 - abs(grpCor)
corGraph <- graph.adjacency(adjMatrix, weighted = TRUE, mode = "undirected", diag = FALSE)
mstAdj <- as_adjacency_matrix(mst(corGraph), type = "both", sparse = FALSE)
list(MST = mstAdj, corMatrix = abs(grpCor))
})
names(mstList) <- groups
# ============================================================
# 6. AGGREGATE GROUP MSTs INTO A COMBINED MST
# ============================================================
cat("Aggregating group MSTs...\n")
mstCombinedGraphs <- lapply(mstList, `[[`, "MST") %>% Reduce(adiag, .)
corMatrixBlock <- lapply(mstList, `[[`, "corMatrix") %>% Reduce(adiag, .)
# Reorder the full correlation matrix to match the block structure
orderedFeatures <- unlist(lapply(mstList, function(x) rownames(x$MST)))
corMatrix <- corMatrix[orderedFeatures, orderedFeatures]
pValMatrix <- pValMatrix[orderedFeatures, orderedFeatures]
multiomicTable <- multiomicTable[match(orderedFeatures, multiomicTable[[FEATURE_COL]])]
# Build inter-group adjacency by penalising already-covered within-group edges
newAdjMatrix <- (1 - (abs(corMatrix) - corMatrixBlock) + 1e-3) * (1 - mstCombinedGraphs + 1e-6)
combinedGraph <- graph.adjacency(newAdjMatrix, weighted = TRUE, mode = "undirected", diag = FALSE)
mstGraph <- as_adjacency_matrix(mst(combinedGraph), type = "both", sparse = FALSE)
reducedGraph <- graph.adjacency(abs(corMatrix) * mstGraph, weighted = T, mode = "undirected", diag = F)
frLayout <- layout_with_fr(reducedGraph)
# ============================================================
# 7. FILTER ADDITIONAL EDGES (significance + rho thresholds)
# ============================================================
cat("Filtering additional edges...\n")
groupFilter <- lapply(mstList, function(x) x$MST * 0 + 1) %>% Reduce(adiag, .)
pValFilter <- (pValMatrix > bonferroni) * 1
correlFilter <- (abs(corMatrix) > RHO_CUTOFF) * 1
coreMST <- abs(corMatrix) * mstGraph
filtCorMatrix <- abs(corMatrix) * correlFilter * pValFilter * (1 - mstGraph)
groupNetwork <- filtCorMatrix * groupFilter
interNetwork <- filtCorMatrix * (1 - groupFilter)
# ============================================================
# 8. BUILD FINAL GRAPH WITH TYPED EDGES
# ============================================================
cat("Building final graph...\n")
mstCoreGraph <- graph.adjacency(coreMST, weighted = TRUE, mode = "undirected", diag = FALSE)
groupGraph <- graph.adjacency(groupNetwork, weighted = TRUE, mode = "undirected", diag = FALSE)
interGraph <- graph.adjacency(interNetwork, weighted = TRUE, mode = "undirected", diag = FALSE)
edge.attributes(mstCoreGraph)$type <- rep("MST", gsize(mstCoreGraph))
edge.attributes(groupGraph)$type <- rep("Group", gsize(groupGraph))
edge.attributes(interGraph)$type <- rep("Interactions", gsize(interGraph))
reducedGraph <- mstCoreGraph + groupGraph + interGraph
# Resolve duplicate edge attributes created by igraph's + operator
for (attr in c("type", "weight")) {
v <- edge.attributes(reducedGraph)[[attr]]
v1 <- edge.attributes(reducedGraph)[[paste0(attr, "_1")]]
v2 <- edge.attributes(reducedGraph)[[paste0(attr, "_2")]]
if (!is.null(v1)) v[!is.na(v1)] <- v1[!is.na(v1)]
if (!is.null(v2)) v[!is.na(v2)] <- v2[!is.na(v2)]
edge.attributes(reducedGraph)[[attr]] <- v
}
# ============================================================
# 9. ADD VERTEX ATTRIBUTES
# ============================================================
cat("Adding vertex attributes...\n")
nodeNames <- vertex.attributes(reducedGraph)$name
orderedRows <- multiomicTable[match(nodeNames, multiomicTable[[FEATURE_COL]])]
vertex.attributes(reducedGraph)$id <- seq_len(nrow(orderedRows))
vertex.attributes(reducedGraph)$pVal <- orderedRows[[PVAL_OMICS_COL]]
vertex.attributes(reducedGraph)$Group <- gsub("_", " ", orderedRows[[GROUP_COL]])
# Add any requested extra vertex attributes
for (col in EXTRA_VERTEX_ATTR_COLS) {
if (col %in% colnames(orderedRows)) {
vertex.attributes(reducedGraph)[[col]] <- orderedRows[[col]]
} else {
warning(sprintf("Extra vertex attribute '%s' not found in multiomic_table — skipping.", col))
}
}
# Clean up node names for display
vertex.attributes(reducedGraph)$name <- tolower(nodeNames) %>% gsub("_", " ", .)
# ============================================================
# 10. PLOT
# ============================================================
cat("Plotting...\n")
ggdf <- ggnetwork(reducedGraph, layout = frLayout, scale = FALSE)
# Determine node fill column (fallback to pVal if the column was not added as a vertex attr)
fill_col <- if (NODE_FILL_COL %in% colnames(ggdf)) NODE_FILL_COL else "pVal"
p <- ggplot(ggdf, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(data = ggdf[!is.na(ggdf$type) & ggdf$type == "Interactions", ],
aes(alpha = weight), color = "#5e5e5e", linewidth = 0.5) +
geom_edges(data = ggdf[!is.na(ggdf$type) & ggdf$type == "Group", ],
aes(alpha = weight), color = "#5e5e5e", linewidth = 0.5) +
geom_edges(data = ggdf[!is.na(ggdf$type) & ggdf$type == "MST", ],
color = "black", linewidth = 0.5) +
geom_nodes(aes(size = pVal, fill = .data[[fill_col]]),
shape = 21, color = "black", stroke = 0.75) +
theme_blank() +
theme(
legend.background = element_rect(fill = "transparent"),
panel.background = element_rect(fill = "transparent"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = "transparent", color = NA)
) +
scale_fill_gradient2(low = "white", mid = "red", high = "blue",
midpoint = NODE_FILL_MIDPOINT) +
scale_alpha_continuous(range = c(0, 1), limits = c(0, 1)) +
labs(
size = paste0("-log10(p-value) of Multiomic Prediction (", PVAL_OMICS_COL, ")"),
fill = paste0("Node fill: ", NODE_FILL_COL),
alpha = "Absolute Spearman's Rho"
)
print(p)
ggsave(OUTPUT_PLOT_PATH, plot = p, width = PLOT_WIDTH, height = PLOT_HEIGHT, bg = "transparent")
cat(sprintf("Plot saved to: %s\n", OUTPUT_PLOT_PATH))
# reducedGraph is available in the environment for downstream use
cat("Done. igraph object 'reducedGraph' is ready.\n")
Related files
correlation_mst.txt Group-based Minimum Spanning Tree Correlation Network
. Bio-protocol Preprint. bio-protocol.org/prep2905.Do you have any questions about this protocol?
Post your question to gather feedback from the community. We will also invite the authors of this article to respond.
Share
Bluesky
X
Copy link