In [6]:
# Set up sys.path so that 'src/spindle_dev' is importable as 'spindle_dev'
import sys
import importlib
from pathlib import Path
project_root = '/data/sarkar_lab/Projects/spindle_dev'
src_path = Path(project_root) / 'src'
if str(src_path) not in sys.path:
sys.path.insert(0, str(src_path))
import spindle_dev
# Reload to pick up code changes without restarting the kernel
importlib.reload(spindle_dev)
Out[6]:
<module 'spindle_dev' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/__init__.py'>
In [7]:
import scanpy as sc
In [8]:
adata = sc.read_h5ad("/data/sarkar_lab/zenodo_data/2023_xenium_human_breast_cancer/adata.h5ad")
In [9]:
adata = adata[adata.obs.loc[adata.obs.Cluster != "Unlabeled"].index, :].copy()
In [10]:
sc.pl.spatial(adata, color=["Cluster"], spot_size=13)
/tmp/ipykernel_3030422/1942974121.py:1: FutureWarning: Use `squidpy.pl.spatial_scatter` instead. sc.pl.spatial(adata, color=["Cluster"], spot_size=13)
In [11]:
# Reload spindle_dev metrics, index, and package after code changes
import importlib
import spindle_dev
import spindle_dev.metrics as metrics
import spindle_dev.index as index
import spindle_dev.preprocessing as preprocessing
import spindle_dev.plotting as plotting
# Reload in dependency order so new symbols are available
importlib.reload(metrics)
importlib.reload(index)
importlib.reload(preprocessing)
importlib.reload(plotting)
importlib.reload(spindle_dev)
import spindle_dev.typing as typing
importlib.reload(typing)
import spindle_dev.test as test
import spindle_dev.search as search
importlib.reload(test)
importlib.reload(search)
Out[11]:
<module 'spindle_dev.search' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/search.py'>
In [12]:
def prepare_to_index(adata, resolution=0.2, min_final_size=20):
coords = adata.obsm["spatial"]
tiles = preprocessing.build_quadtree_tiles(coords, max_pts=200, min_side=0.0, max_depth=40)
num_genes = adata.n_vars
genes_work, gene_idx = spindle_dev.preprocessing.topvar_genes(adata, G=num_genes)
tile_covs = spindle_dev.preprocessing.build_tile_covs_full(adata, tiles, gene_idx, n_jobs=8, eps=1e-6)
data = index.ProcessedData(tiles, tile_covs, genes_work, adata.n_obs)
data.reduce_dim(num_pca_components=30, n_components=2, do_umap=True)
data.cluster_spds(cluster_distance="tree", cluster_method="leiden", resolution=resolution)
data.assign_label_to_spots()
data.get_corr_mean_by_cluster()
out_dict = data.get_adaptive_runs(find_blocks=True, with_size_guard=True,min_final_size=min_final_size,max_final_size=100)
return tiles, tile_covs, genes_work, data, out_dict
In [13]:
tiles, tile_covs, genes_work, data, out_dict = prepare_to_index(adata, min_final_size=15)
[2026-01-19 12:59:14,378] INFO spindle_dev.index: Clustering SPD-s using 'tree' distance. [2026-01-19 12:59:14,393] INFO spindle_dev.index: Building ultrametric features from SPD matrices. [2026-01-19 12:59:25,299] INFO spindle_dev.index: Computing latent features from the tree representations. [2026-01-19 12:59:25,964] INFO spindle_dev.index: Reducing latent features to 30 dimensions using PCA. [2026-01-19 12:59:35,508] INFO spindle_dev.index: Explained variance ratios by PCA components: [0.06878107 0.05380314 0.02646384 0.01915205 0.01300586 0.01025453 0.00781812 0.00735731 0.00665263 0.00645236 0.00500298 0.00465103 0.00423426 0.00416654 0.00382042 0.00364907 0.00355717 0.00348549 0.00334294 0.00319313 0.00303316 0.00301945 0.00300855 0.00294676 0.00288963 0.00281065 0.00275823 0.00274145 0.00271152 0.00265672] [2026-01-19 12:59:35,510] INFO spindle_dev.index: Reducing latent features to 2 dimensions using UMAP. /panfs/accrepfs.vampire/home/sarkah1/miniforge3/envs/spatial/lib/python3.10/site-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism. warn( [2026-01-19 12:59:47,595] INFO spindle_dev.index: Clustering SPD-s using 'tree' distance. [2026-01-19 12:59:47,596] INFO spindle_dev.index: Clustering SPD matrices using Leiden clustering with resolution 0.20. [2026-01-19 12:59:47,814] INFO spindle_dev.index: Since clustering method is tree, I am going to find global order per cluster [2026-01-19 12:59:47,815] INFO spindle_dev.index: Finding consensus tree for cluster 0 [2026-01-19 12:59:47,916] INFO spindle_dev.index: Finding consensus tree for cluster 1 [2026-01-19 12:59:47,996] INFO spindle_dev.index: Finding consensus tree for cluster 2 [2026-01-19 12:59:48,068] INFO spindle_dev.index: Finding consensus tree for cluster 3 [2026-01-19 12:59:48,607] INFO spindle_dev.index: Computing mean correlation matrix for cluster 0 [2026-01-19 12:59:49,415] INFO spindle_dev.index: Computing mean correlation matrix for cluster 1 [2026-01-19 12:59:50,019] INFO spindle_dev.index: Computing mean correlation matrix for cluster 2 [2026-01-19 12:59:50,508] INFO spindle_dev.index: Computing mean correlation matrix for cluster 3 [2026-01-19 12:59:50,776] INFO spindle_dev.index: Finding adaptive block runs for cluster 0 [2026-01-19 12:59:51,045] INFO spindle_dev.index: Chose t=0.7638580524895158 resulting in 200 blocks instead of 61 blocks would have gotten by default [2026-01-19 12:59:51,076] INFO spindle_dev.index: Final block runs for cluster 0: 16 blocks. [2026-01-19 12:59:51,077] INFO spindle_dev.index: Finding adaptive block runs for cluster 1 [2026-01-19 12:59:51,351] INFO spindle_dev.index: Chose t=0.8451053062066296 resulting in 159 blocks instead of 37 blocks would have gotten by default [2026-01-19 12:59:51,370] INFO spindle_dev.index: Final block runs for cluster 1: 14 blocks. [2026-01-19 12:59:51,371] INFO spindle_dev.index: Finding adaptive block runs for cluster 2 [2026-01-19 12:59:51,641] INFO spindle_dev.index: Chose t=0.7649764855224312 resulting in 188 blocks instead of 40 blocks would have gotten by default [2026-01-19 12:59:51,669] INFO spindle_dev.index: Final block runs for cluster 2: 16 blocks. [2026-01-19 12:59:51,669] INFO spindle_dev.index: Finding adaptive block runs for cluster 3 [2026-01-19 12:59:51,939] INFO spindle_dev.index: Chose t=0.8187994098657192 resulting in 220 blocks instead of 98 blocks would have gotten by default [2026-01-19 12:59:51,977] INFO spindle_dev.index: Final block runs for cluster 3: 15 blocks.
In [14]:
import matplotlib.pylab as plt
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
In [15]:
import numpy as np
palette = sc.pl.palettes.default_20
point_colors = [palette[lab] for lab in data.spot_label.values()]
labels = data.labels
indices = np.array([int(i) for i in data.spot_label.keys()])
coords = adata.obsm['spatial']
plt.scatter(coords[indices,0], coords[indices,1], s=1, c=point_colors, lw=0, alpha=0.8)
patches = []
for t in tiles:
bbox = t.bbox if hasattr(t, "bbox") else t["bbox"] # (xmin,ymin,xmax,ymax)
x0, y0, x1, y1 = bbox
patches.append(Rectangle((x0, y0), x1 - x0, y1 - y0, fill=False))
pc = PatchCollection(patches, match_original=True, linewidths=0.2, edgecolors='b', alpha=0.3)
# add collection
ax = plt.gca()
ax.add_collection(pc)
ax.invert_yaxis()
ax.axis('off')
plt.savefig("/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/spatial_plot_with_cluster.png", dpi=300)
plt.show()
In [237]:
import numpy as np
palette = sc.pl.palettes.default_20
point_colors = [palette[lab] for lab in data.spot_label.values()]
labels = data.labels
indices = np.array([int(i) for i in data.spot_label.keys()])
coords = adata.obsm['spatial']
# Create new color array - grey for all except cluster 3
new_point_colors = []
for idx, lab in zip(indices, data.spot_label.values()):
if lab == 3:
new_point_colors.append(palette[lab])
else:
new_point_colors.append("#E9E6E6") # grey
plt.scatter(coords[indices,0], coords[indices,1], s=1, c=new_point_colors, lw=0, alpha=0.8)
ax = plt.gca()
patches = []
for t in tiles:
bbox = t.bbox if hasattr(t, "bbox") else t["bbox"] # (xmin,ymin,xmax,ymax)
x0, y0, x1, y1 = bbox
patches.append(Rectangle((x0, y0), x1 - x0, y1 - y0, fill=False))
pc = PatchCollection(patches, match_original=True, linewidths=0.01, edgecolors='b', alpha=0.1)
# add collection
ax.add_collection(pc)
ax.invert_yaxis()
ax.axis('off')
plt.savefig("/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/spatial_plot_cluster3_only.png", dpi=300, bbox_inches='tight')
plt.show()
In [16]:
# Create colors for clusters
import numpy as np
import matplotlib.pyplot as plt
n_clusters = len(set(data.labels))
# Use Scanpy's default_20 palette for consistency
palette = sc.pl.palettes.default_20
cluster_colors = [palette[lab] for lab in data.labels]
plt.scatter(data.latent['umap'][:,0], data.latent['umap'][:,1], s=6, lw=0, alpha=0.8, c=cluster_colors)
# Write label on the plot
# with backgrond circle for better visibility
# and bold font
for i in range(n_clusters):
cluster_points = data.latent['umap'][data.labels == i]
if len(cluster_points) == 0:
continue
x_mean = np.mean(cluster_points[:,0])
y_mean = np.mean(cluster_points[:,1])
plt.text(x_mean, y_mean, str(i), color='black', fontsize=10, ha='center', va='center', fontweight='bold',
bbox=dict(facecolor='white', edgecolor='black', boxstyle='circle,pad=0.5', alpha=0.7))
# take away top and right border
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# remove axis ticks
ax.set_xticks([])
ax.set_yticks([])
plt.savefig("/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/umap_plot_with_cluster_labels.png", dpi=300)
plt.show()
In [17]:
epsilon_block_wise_dict = {}
epsilon_dict = {}
for cluster_id in set(data.labels):
eps_per_block, eps_elbow_per_block, eps = index.choose_adaptive_epsilons(data, cluster_id, k_target_per_block=64)
epsilon_block_wise_dict[int(cluster_id)] = eps_elbow_per_block
epsilon_dict[int(cluster_id)] = eps
In [ ]:
In [18]:
config = typing.IndexConfig()
config.epsilon_dict = epsilon_dict
config.epsilon_block_wise_dict = epsilon_block_wise_dict
config.threshold_type = 'block_wise'
config.kmean_method = 'epsilon_net'
In [19]:
epsilon_block_wise_dict
Out[19]:
{0: {0: 7.551487154783578,
1: 7.688574605978839,
2: 8.348619550689628,
3: 8.12886229785936,
4: 6.941772257682238,
5: 7.11207711085268,
6: 7.4634645865588745,
7: 8.12941780999703,
8: 7.394765319810765,
9: 6.714566808424555,
10: 7.174724333051149,
11: 6.700368214926148,
12: 6.223964337518237,
13: 5.151625529484852,
14: 4.577674131656347,
15: 9.7037491104398},
1: {0: 7.34932238515324,
1: 8.493449641575141,
2: 8.901136453589094,
3: 7.8762835362284145,
4: 7.151354147881101,
5: 7.800650669737451,
6: 8.425077404859794,
7: 8.975881255696738,
8: 8.588472145359653,
9: 5.793292280974387,
10: 9.691527557373046,
11: 9.951465035398689,
12: 6.811198569674318,
13: 4.351012706756592},
2: {0: 6.799333509719286,
1: 8.557316793247027,
2: 8.452427434089639,
3: 7.588833091125586,
4: 6.785369982212529,
5: 5.5096939776630975,
6: 5.248055444220603,
7: 5.746564254670582,
8: 5.975592355011169,
9: 3.1322101267631437,
10: 4.466462934560956,
11: 8.84131131853376,
12: 6.445534355120787,
13: 5.415287845182453,
14: 6.687208104027927,
15: 7.126931829888523},
3: {0: 7.388558401037387,
1: 7.783292293548584,
2: 8.289245743649037,
3: 7.992608086706223,
4: 7.854193064200638,
5: 8.034181459182257,
6: 7.878430512484119,
7: 8.066423324555597,
8: 9.167032732265142,
9: 7.850585689461382,
10: 8.363619363357321,
11: 6.642502405394938,
12: 10.199246521661854,
13: 8.335221526620284,
14: 8.539956410725912}}
In [20]:
# Create index
dag_dict, stat, dist_list = index.index_spds(data, config=config)
[2026-01-19 12:59:59,265] INFO spindle_dev.index: Processing cluster 0 [2026-01-19 12:59:59,266] INFO spindle_dev.index: Building SPD index with epsilon=7.570320670118333 [2026-01-19 12:59:59,267] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices. [2026-01-19 12:59:59,300] INFO spindle_dev.index: Cluster 0: 733 SPDs, 16 blocks [2026-01-19 12:59:59,301] INFO spindle_dev.index: Using epsilon-net clustering for block 0 [2026-01-19 12:59:59,356] INFO spindle_dev.index: Finished block 0 in 0.05 seconds, found 2 clusters. [2026-01-19 12:59:59,356] INFO spindle_dev.index: Using epsilon-net clustering for block 1 [2026-01-19 12:59:59,412] INFO spindle_dev.index: Finished block 1 in 0.06 seconds, found 5 clusters. [2026-01-19 12:59:59,413] INFO spindle_dev.index: Using epsilon-net clustering for block 2 [2026-01-19 12:59:59,458] INFO spindle_dev.index: Finished block 2 in 0.05 seconds, found 3 clusters. [2026-01-19 12:59:59,459] INFO spindle_dev.index: Using epsilon-net clustering for block 3 [2026-01-19 12:59:59,506] INFO spindle_dev.index: Finished block 3 in 0.05 seconds, found 3 clusters. [2026-01-19 12:59:59,507] INFO spindle_dev.index: Using epsilon-net clustering for block 4 [2026-01-19 12:59:59,555] INFO spindle_dev.index: Finished block 4 in 0.05 seconds, found 3 clusters. [2026-01-19 12:59:59,556] INFO spindle_dev.index: Using epsilon-net clustering for block 5 [2026-01-19 12:59:59,605] INFO spindle_dev.index: Finished block 5 in 0.05 seconds, found 3 clusters. [2026-01-19 12:59:59,606] INFO spindle_dev.index: Using epsilon-net clustering for block 6 [2026-01-19 12:59:59,664] INFO spindle_dev.index: Finished block 6 in 0.06 seconds, found 2 clusters. [2026-01-19 12:59:59,665] INFO spindle_dev.index: Using epsilon-net clustering for block 7 [2026-01-19 12:59:59,708] INFO spindle_dev.index: Finished block 7 in 0.04 seconds, found 2 clusters. [2026-01-19 12:59:59,709] INFO spindle_dev.index: Using epsilon-net clustering for block 8 [2026-01-19 12:59:59,758] INFO spindle_dev.index: Finished block 8 in 0.05 seconds, found 3 clusters. [2026-01-19 12:59:59,759] INFO spindle_dev.index: Using epsilon-net clustering for block 9 [2026-01-19 12:59:59,814] INFO spindle_dev.index: Finished block 9 in 0.06 seconds, found 4 clusters. [2026-01-19 12:59:59,815] INFO spindle_dev.index: Using epsilon-net clustering for block 10 [2026-01-19 12:59:59,883] INFO spindle_dev.index: Finished block 10 in 0.07 seconds, found 5 clusters. [2026-01-19 12:59:59,884] INFO spindle_dev.index: Using epsilon-net clustering for block 11 [2026-01-19 12:59:59,954] INFO spindle_dev.index: Finished block 11 in 0.07 seconds, found 2 clusters. [2026-01-19 12:59:59,954] INFO spindle_dev.index: Using epsilon-net clustering for block 12 [2026-01-19 13:00:00,029] INFO spindle_dev.index: Finished block 12 in 0.07 seconds, found 2 clusters. [2026-01-19 13:00:00,030] INFO spindle_dev.index: Using epsilon-net clustering for block 13 [2026-01-19 13:00:00,076] INFO spindle_dev.index: Finished block 13 in 0.05 seconds, found 2 clusters. [2026-01-19 13:00:00,077] INFO spindle_dev.index: Using epsilon-net clustering for block 14 [2026-01-19 13:00:00,128] INFO spindle_dev.index: Finished block 14 in 0.05 seconds, found 3 clusters. [2026-01-19 13:00:00,129] INFO spindle_dev.index: Using epsilon-net clustering for block 15 [2026-01-19 13:00:00,338] INFO spindle_dev.index: Finished block 15 in 0.21 seconds, found 1 clusters. [2026-01-19 13:00:00,339] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters. [2026-01-19 13:00:00,339] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by [2026-01-19 13:00:00,339] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them? [2026-01-19 13:00:00,340] INFO spindle_dev.index: We will use triangle inequality to order clusters. [2026-01-19 13:00:00,340] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs. [2026-01-19 13:00:00,346] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list [2026-01-19 13:00:00,347] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances. [2026-01-19 13:00:00,347] INFO spindle_dev.index: Processing cluster 1 [2026-01-19 13:00:00,347] INFO spindle_dev.index: Building SPD index with epsilon=8.204116141691717 [2026-01-19 13:00:00,348] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices. [2026-01-19 13:00:00,375] INFO spindle_dev.index: Cluster 1: 567 SPDs, 14 blocks [2026-01-19 13:00:00,375] INFO spindle_dev.index: Using epsilon-net clustering for block 0 [2026-01-19 13:00:00,416] INFO spindle_dev.index: Finished block 0 in 0.04 seconds, found 4 clusters. [2026-01-19 13:00:00,417] INFO spindle_dev.index: Using epsilon-net clustering for block 1 [2026-01-19 13:00:00,457] INFO spindle_dev.index: Finished block 1 in 0.04 seconds, found 4 clusters. [2026-01-19 13:00:00,458] INFO spindle_dev.index: Using epsilon-net clustering for block 2 [2026-01-19 13:00:00,490] INFO spindle_dev.index: Finished block 2 in 0.03 seconds, found 2 clusters. [2026-01-19 13:00:00,491] INFO spindle_dev.index: Using epsilon-net clustering for block 3 [2026-01-19 13:00:00,538] INFO spindle_dev.index: Finished block 3 in 0.05 seconds, found 5 clusters. [2026-01-19 13:00:00,538] INFO spindle_dev.index: Using epsilon-net clustering for block 4 [2026-01-19 13:00:00,622] INFO spindle_dev.index: Finished block 4 in 0.08 seconds, found 10 clusters. [2026-01-19 13:00:00,622] INFO spindle_dev.index: Using epsilon-net clustering for block 5 [2026-01-19 13:00:00,679] INFO spindle_dev.index: Finished block 5 in 0.06 seconds, found 3 clusters. [2026-01-19 13:00:00,679] INFO spindle_dev.index: Using epsilon-net clustering for block 6 [2026-01-19 13:00:00,794] INFO spindle_dev.index: Finished block 6 in 0.11 seconds, found 3 clusters. [2026-01-19 13:00:00,795] INFO spindle_dev.index: Using epsilon-net clustering for block 7 [2026-01-19 13:00:00,862] INFO spindle_dev.index: Finished block 7 in 0.07 seconds, found 2 clusters. [2026-01-19 13:00:00,862] INFO spindle_dev.index: Using epsilon-net clustering for block 8 [2026-01-19 13:00:00,932] INFO spindle_dev.index: Finished block 8 in 0.07 seconds, found 3 clusters. [2026-01-19 13:00:00,933] INFO spindle_dev.index: Using epsilon-net clustering for block 9 [2026-01-19 13:00:00,976] INFO spindle_dev.index: Finished block 9 in 0.04 seconds, found 4 clusters. [2026-01-19 13:00:00,977] INFO spindle_dev.index: Using epsilon-net clustering for block 10 [2026-01-19 13:00:01,036] INFO spindle_dev.index: Finished block 10 in 0.06 seconds, found 2 clusters. [2026-01-19 13:00:01,036] INFO spindle_dev.index: Using epsilon-net clustering for block 11 [2026-01-19 13:00:01,115] INFO spindle_dev.index: Finished block 11 in 0.08 seconds, found 2 clusters. [2026-01-19 13:00:01,116] INFO spindle_dev.index: Using epsilon-net clustering for block 12 [2026-01-19 13:00:01,196] INFO spindle_dev.index: Finished block 12 in 0.08 seconds, found 6 clusters. [2026-01-19 13:00:01,196] INFO spindle_dev.index: Using epsilon-net clustering for block 13 [2026-01-19 13:00:01,255] INFO spindle_dev.index: Finished block 13 in 0.06 seconds, found 7 clusters. [2026-01-19 13:00:01,256] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters. [2026-01-19 13:00:01,256] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by [2026-01-19 13:00:01,256] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them? [2026-01-19 13:00:01,257] INFO spindle_dev.index: We will use triangle inequality to order clusters. [2026-01-19 13:00:01,257] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs. [2026-01-19 13:00:01,261] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list [2026-01-19 13:00:01,262] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances. [2026-01-19 13:00:01,262] INFO spindle_dev.index: Processing cluster 2 [2026-01-19 13:00:01,262] INFO spindle_dev.index: Building SPD index with epsilon=6.784064466997531 [2026-01-19 13:00:01,263] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices. [2026-01-19 13:00:01,286] INFO spindle_dev.index: Cluster 2: 504 SPDs, 16 blocks [2026-01-19 13:00:01,286] INFO spindle_dev.index: Using epsilon-net clustering for block 0 [2026-01-19 13:00:01,335] INFO spindle_dev.index: Finished block 0 in 0.05 seconds, found 5 clusters. [2026-01-19 13:00:01,336] INFO spindle_dev.index: Using epsilon-net clustering for block 1 [2026-01-19 13:00:01,364] INFO spindle_dev.index: Finished block 1 in 0.03 seconds, found 2 clusters. [2026-01-19 13:00:01,364] INFO spindle_dev.index: Using epsilon-net clustering for block 2 [2026-01-19 13:00:01,393] INFO spindle_dev.index: Finished block 2 in 0.03 seconds, found 2 clusters. [2026-01-19 13:00:01,394] INFO spindle_dev.index: Using epsilon-net clustering for block 3 [2026-01-19 13:00:01,424] INFO spindle_dev.index: Finished block 3 in 0.03 seconds, found 2 clusters. [2026-01-19 13:00:01,424] INFO spindle_dev.index: Using epsilon-net clustering for block 4 [2026-01-19 13:00:01,464] INFO spindle_dev.index: Finished block 4 in 0.04 seconds, found 3 clusters. [2026-01-19 13:00:01,465] INFO spindle_dev.index: Using epsilon-net clustering for block 5 [2026-01-19 13:00:01,507] INFO spindle_dev.index: Finished block 5 in 0.04 seconds, found 5 clusters. [2026-01-19 13:00:01,508] INFO spindle_dev.index: Using epsilon-net clustering for block 6 [2026-01-19 13:00:01,552] INFO spindle_dev.index: Finished block 6 in 0.04 seconds, found 4 clusters. [2026-01-19 13:00:01,552] INFO spindle_dev.index: Using epsilon-net clustering for block 7 [2026-01-19 13:00:01,583] INFO spindle_dev.index: Finished block 7 in 0.03 seconds, found 2 clusters. [2026-01-19 13:00:01,584] INFO spindle_dev.index: Using epsilon-net clustering for block 8 [2026-01-19 13:00:01,627] INFO spindle_dev.index: Finished block 8 in 0.04 seconds, found 5 clusters. [2026-01-19 13:00:01,627] INFO spindle_dev.index: Using epsilon-net clustering for block 9 [2026-01-19 13:00:01,682] INFO spindle_dev.index: Finished block 9 in 0.05 seconds, found 8 clusters. [2026-01-19 13:00:01,682] INFO spindle_dev.index: Using epsilon-net clustering for block 10 [2026-01-19 13:00:01,754] INFO spindle_dev.index: Finished block 10 in 0.07 seconds, found 4 clusters. [2026-01-19 13:00:01,755] INFO spindle_dev.index: Using epsilon-net clustering for block 11 [2026-01-19 13:00:01,904] INFO spindle_dev.index: Finished block 11 in 0.15 seconds, found 4 clusters. [2026-01-19 13:00:01,905] INFO spindle_dev.index: Using epsilon-net clustering for block 12 [2026-01-19 13:00:01,952] INFO spindle_dev.index: Finished block 12 in 0.05 seconds, found 4 clusters. [2026-01-19 13:00:01,953] INFO spindle_dev.index: Using epsilon-net clustering for block 13 [2026-01-19 13:00:02,001] INFO spindle_dev.index: Finished block 13 in 0.05 seconds, found 5 clusters. [2026-01-19 13:00:02,001] INFO spindle_dev.index: Using epsilon-net clustering for block 14 [2026-01-19 13:00:02,043] INFO spindle_dev.index: Finished block 14 in 0.04 seconds, found 4 clusters. [2026-01-19 13:00:02,044] INFO spindle_dev.index: Using epsilon-net clustering for block 15 [2026-01-19 13:00:02,083] INFO spindle_dev.index: Finished block 15 in 0.04 seconds, found 3 clusters. [2026-01-19 13:00:02,084] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters. [2026-01-19 13:00:02,084] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by [2026-01-19 13:00:02,085] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them? [2026-01-19 13:00:02,085] INFO spindle_dev.index: We will use triangle inequality to order clusters. [2026-01-19 13:00:02,085] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs. [2026-01-19 13:00:02,090] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list [2026-01-19 13:00:02,090] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances. [2026-01-19 13:00:02,090] INFO spindle_dev.index: Processing cluster 3 [2026-01-19 13:00:02,091] INFO spindle_dev.index: Building SPD index with epsilon=8.425444379709184 [2026-01-19 13:00:02,091] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices. [2026-01-19 13:00:02,105] INFO spindle_dev.index: Cluster 3: 277 SPDs, 15 blocks [2026-01-19 13:00:02,105] INFO spindle_dev.index: Using epsilon-net clustering for block 0 [2026-01-19 13:00:02,125] INFO spindle_dev.index: Finished block 0 in 0.02 seconds, found 3 clusters. [2026-01-19 13:00:02,125] INFO spindle_dev.index: Using epsilon-net clustering for block 1 [2026-01-19 13:00:02,142] INFO spindle_dev.index: Finished block 1 in 0.02 seconds, found 3 clusters. [2026-01-19 13:00:02,143] INFO spindle_dev.index: Using epsilon-net clustering for block 2 [2026-01-19 13:00:02,157] INFO spindle_dev.index: Finished block 2 in 0.01 seconds, found 2 clusters. [2026-01-19 13:00:02,158] INFO spindle_dev.index: Using epsilon-net clustering for block 3 [2026-01-19 13:00:02,179] INFO spindle_dev.index: Finished block 3 in 0.02 seconds, found 3 clusters. [2026-01-19 13:00:02,179] INFO spindle_dev.index: Using epsilon-net clustering for block 4 [2026-01-19 13:00:02,197] INFO spindle_dev.index: Finished block 4 in 0.02 seconds, found 3 clusters. [2026-01-19 13:00:02,197] INFO spindle_dev.index: Using epsilon-net clustering for block 5 [2026-01-19 13:00:02,215] INFO spindle_dev.index: Finished block 5 in 0.02 seconds, found 3 clusters. [2026-01-19 13:00:02,216] INFO spindle_dev.index: Using epsilon-net clustering for block 6 [2026-01-19 13:00:02,242] INFO spindle_dev.index: Finished block 6 in 0.03 seconds, found 4 clusters. [2026-01-19 13:00:02,243] INFO spindle_dev.index: Using epsilon-net clustering for block 7 [2026-01-19 13:00:02,259] INFO spindle_dev.index: Finished block 7 in 0.02 seconds, found 2 clusters. [2026-01-19 13:00:02,260] INFO spindle_dev.index: Using epsilon-net clustering for block 8 [2026-01-19 13:00:02,276] INFO spindle_dev.index: Finished block 8 in 0.02 seconds, found 2 clusters. [2026-01-19 13:00:02,277] INFO spindle_dev.index: Using epsilon-net clustering for block 9 [2026-01-19 13:00:02,298] INFO spindle_dev.index: Finished block 9 in 0.02 seconds, found 2 clusters. [2026-01-19 13:00:02,299] INFO spindle_dev.index: Using epsilon-net clustering for block 10 [2026-01-19 13:00:02,316] INFO spindle_dev.index: Finished block 10 in 0.02 seconds, found 2 clusters. [2026-01-19 13:00:02,317] INFO spindle_dev.index: Using epsilon-net clustering for block 11 [2026-01-19 13:00:02,336] INFO spindle_dev.index: Finished block 11 in 0.02 seconds, found 3 clusters. [2026-01-19 13:00:02,336] INFO spindle_dev.index: Using epsilon-net clustering for block 12 [2026-01-19 13:00:02,412] INFO spindle_dev.index: Finished block 12 in 0.08 seconds, found 2 clusters. [2026-01-19 13:00:02,412] INFO spindle_dev.index: Using epsilon-net clustering for block 13 [2026-01-19 13:00:02,441] INFO spindle_dev.index: Finished block 13 in 0.03 seconds, found 2 clusters. [2026-01-19 13:00:02,441] INFO spindle_dev.index: Using epsilon-net clustering for block 14 [2026-01-19 13:00:02,487] INFO spindle_dev.index: Finished block 14 in 0.05 seconds, found 2 clusters. [2026-01-19 13:00:02,487] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters. [2026-01-19 13:00:02,488] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by [2026-01-19 13:00:02,488] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them? [2026-01-19 13:00:02,488] INFO spindle_dev.index: We will use triangle inequality to order clusters. [2026-01-19 13:00:02,489] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs. [2026-01-19 13:00:02,491] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list [2026-01-19 13:00:02,492] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances.
In [21]:
search_cfg = search.SearchConfig(max_results=2, debug=False, max_failed_starts=5, max_failed_paths=10)
In [22]:
test_results = test.run_sanity_search(data, dag_dict, config, search_cfg, max_queries=2000, skip_baseline=True)
[2026-01-19 13:00:02,893] INFO spindle_dev.test: Built ground-truth paths for 733 SPDs in cluster 0 across 16 blocks. [2026-01-19 13:00:03,058] INFO spindle_dev.test: Built ground-truth paths for 567 SPDs in cluster 1 across 14 blocks. [2026-01-19 13:00:03,208] INFO spindle_dev.test: Built ground-truth paths for 504 SPDs in cluster 2 across 16 blocks. [2026-01-19 13:00:03,253] INFO spindle_dev.test: Built ground-truth paths for 277 SPDs in cluster 3 across 15 blocks. [2026-01-19 13:00:03,254] INFO spindle_dev.test: Created ground-truth paths for all clusters. [2026-01-19 13:00:03,254] INFO spindle_dev.test: Search config: SearchConfig(max_results=2, max_failed_starts=5, max_failed_paths=10, total_paths_limit=1000, deterministic=DeterministicConfig(seed=0), debug=False) [2026-01-19 13:00:03,255] INFO spindle_dev.test: Running sanity search with 2000 queries. 100%|██████████| 2000/2000 [04:41<00:00, 7.11it/s] [2026-01-19 13:04:44,476] INFO spindle_dev.test: Sanity search: 2000 queries, 2000 exact path matches, 2000 leaf matches, mean search_time=0.1399s
In [181]:
fig = plotting.visualize_block_dag_sankey_scaled(dag_dict[2].nodes, height=800, thickness=20.0)
fig
In [24]:
#fig.write_image("/data/sarkar_lab/Projects/spindle_dev/ISMB_notebook/figures/breast_cluster_0_block_dag_sankey.png", engine="kaleido", width=2000, height=1200, scale=2)
In [25]:
import pandas as pd
def get_dag_stats(dag_dict):
dag_stats = []
for cluster_id, cluster_dag in dag_dict.items():
block_to_node = cluster_dag.block_to_node_indices
for node in cluster_dag.nodes:
num_spds = len(node.metadata.members)
block_size = node.metadata.mean.shape[0]
block_id = node.block_index
radius = node.metadata.radius
#print(f"Cluster_id {cluster_id} Node id {node.global_node_id} Block {block_id}: {num_spds} spds")
dag_stats.append({
"cluster_id": cluster_id,
"node_id": node.global_node_id,
"block_id": block_id,
"block_cluster_id": node.block_cluster_id,
"num_spds": num_spds,
"radius": radius,
"block_size": block_size
})
dag_stats_df = pd.DataFrame(dag_stats)
return dag_stats_df
In [26]:
dag_stats_df = get_dag_stats(dag_dict)
In [117]:
importlib.reload(index)
Out[117]:
<module 'spindle_dev.index' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/index.py'>
In [118]:
score_list = []
for cluster_id in set(data.labels):
cluster_dag = dag_dict[cluster_id]
cluster_score = index.score_nodes_from_a_cluster(data, cluster_id, dag_dict, take_representative_mean=True)
score_list.extend(cluster_score)
100%|██████████| 45/45 [00:00<00:00, 326.93it/s] 100%|██████████| 57/57 [00:00<00:00, 228.34it/s] 100%|██████████| 62/62 [00:00<00:00, 243.52it/s] 100%|██████████| 38/38 [00:00<00:00, 282.96it/s]
In [119]:
node_scores = []
for node_score in score_list:
node_scores.append({
"cluster_id": node_score["cluster_id"],
"block_id": node_score["block_id"],
"node_id": node_score["node_id"],
"node_score": node_score["node_score"],
"num_spds": node_score["num_spds"],
"block_size": node_score["block_size"],
"E": node_score["E"],
"Q": node_score["Q"],
"S": node_score["S"],
"num_modules": len(node_score["modules"])
})
node_score_df = pd.DataFrame(node_scores)
In [121]:
node_score_df.sort_values(by='node_score', ascending=False).head(20)
Out[121]:
| cluster_id | block_id | node_id | node_score | num_spds | block_size | E | Q | S | num_modules | |
|---|---|---|---|---|---|---|---|---|---|---|
| 75 | 1 | 6 | 30 | 0.239966 | 1 | 38 | 1.932651 | 0.254274 | 0.511693 | 3 |
| 146 | 2 | 11 | 44 | 0.165974 | 1 | 49 | 1.627859 | 0.164071 | 0.378574 | 4 |
| 5 | 0 | 1 | 5 | 0.159690 | 91 | 15 | 0.774484 | 0.390674 | 0.472224 | 4 |
| 94 | 1 | 12 | 49 | 0.158631 | 13 | 26 | 1.273377 | 0.231421 | 0.461699 | 4 |
| 74 | 1 | 6 | 29 | 0.156417 | 5 | 38 | 1.526276 | 0.186053 | 0.449176 | 6 |
| 156 | 2 | 13 | 54 | 0.150369 | 2 | 18 | 0.935547 | 0.288826 | 0.443511 | 5 |
| 60 | 1 | 4 | 15 | 0.148257 | 397 | 21 | 0.976389 | 0.243900 | 0.377444 | 7 |
| 145 | 2 | 11 | 43 | 0.148110 | 1 | 49 | 2.225433 | 0.125890 | 0.471341 | 4 |
| 89 | 1 | 12 | 44 | 0.140268 | 543 | 26 | 1.321063 | 0.188080 | 0.435467 | 3 |
| 173 | 3 | 3 | 9 | 0.136184 | 49 | 20 | 0.794279 | 0.282274 | 0.392594 | 4 |
| 80 | 1 | 8 | 35 | 0.132119 | 1 | 27 | 1.412164 | 0.179984 | 0.480190 | 4 |
| 79 | 1 | 8 | 34 | 0.131210 | 2 | 27 | 1.464330 | 0.189470 | 0.527084 | 4 |
| 198 | 3 | 13 | 34 | 0.125699 | 266 | 26 | 0.862653 | 0.251575 | 0.420804 | 3 |
| 201 | 3 | 14 | 37 | 0.113385 | 11 | 36 | 1.961125 | 0.121871 | 0.525596 | 3 |
| 52 | 1 | 1 | 7 | 0.105444 | 91 | 15 | 0.571309 | 0.322258 | 0.427276 | 5 |
| 86 | 1 | 10 | 41 | 0.104808 | 3 | 25 | 1.835981 | 0.124215 | 0.540434 | 3 |
| 131 | 2 | 8 | 29 | 0.104168 | 42 | 15 | 0.792322 | 0.248719 | 0.471405 | 3 |
| 77 | 1 | 7 | 32 | 0.101813 | 3 | 28 | 2.014848 | 0.139933 | 0.638893 | 4 |
| 91 | 1 | 12 | 46 | 0.101234 | 1 | 26 | 1.344811 | 0.142968 | 0.473469 | 5 |
| 97 | 1 | 13 | 52 | 0.100772 | 1 | 16 | 1.536831 | 0.159211 | 0.588153 | 2 |
In [156]:
importlib.reload(plotting)
Out[156]:
<module 'spindle_dev.plotting' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/plotting.py'>
In [217]:
def get_module_info(id):
"""
Return metadata + L_mean/index_handle WITHOUT making any plots.
"""
# Take the top scored node
cluster_id = int(node_score_df.iloc[id]["cluster_id"])
node_id = int(node_score_df.iloc[id]["node_id"])
block_id = int(
dag_stats_df.loc[
(dag_stats_df["cluster_id"] == cluster_id) & (dag_stats_df["node_id"] == node_id),
"block_id",
].values[0]
)
print(f"Top scored node is in cluster {cluster_id}, node {node_id}, block {block_id}")
# Most frequent center (kept for your debug / parity, not strictly needed for L_mean retrieval below)
_ = dag_stats_df.loc[
(dag_stats_df["cluster_id"] == cluster_id) & (dag_stats_df["block_id"] == block_id)
].sort_values(by="num_spds", ascending=False).node_id.values[0]
index_handle = dag_dict[cluster_id]
# Representative log-mean reference for this block
L_mean = index.get_node_log_ref(index_handle, block_id, use_representative_mean=True)
return cluster_id, node_id, block_id, L_mean, index_handle
def _fig_to_rgb_array(fig):
from io import BytesIO
from PIL import Image
# Ensure figure is rendered
try:
fig.canvas.draw()
except:
pass
buf = BytesIO()
fig.savefig(buf, format='png', dpi=100, bbox_inches='tight', pad_inches=0.05)
buf.seek(0)
img = np.array(Image.open(buf))
if img.shape[2] == 4:
# If RGBA, convert to RGB
img = img[..., :3]
buf.close()
return img
def _as_figure(fig_or_tuple):
# Accept fig, or (fig, ax), or (fig, axs), etc.
if isinstance(fig_or_tuple, tuple):
return fig_or_tuple[0]
return fig_or_tuple
def plot_module_heatmap_plus_spatial(id, module_ids=(0, 1), zero_corr_threshold=0.05):
"""
One panel:
- Left: corr heatmap with modules (auto-sized based on L_mean.shape[0])
- Right: spatial module score plots for module_ids (stacked, constant size)
"""
# Get metadata
cluster_id = int(node_score_df.iloc[id]["cluster_id"])
node_id = int(node_score_df.iloc[id]["node_id"])
block_id = int(
dag_stats_df.loc[
(dag_stats_df["cluster_id"] == cluster_id) & (dag_stats_df["node_id"] == node_id),
"block_id",
].values[0]
)
index_handle = dag_dict[cluster_id]
L_mean = index.get_node_log_ref(index_handle, block_id, use_representative_mean=True)
p = int(L_mean.shape[0])
print(f"Creating combined figure for cluster {cluster_id}, node {node_id}, block {block_id}")
# Make the heatmap and embed as image
fig_hm, ax_hm, ordered_genes, gene_to_mod = plotting.plot_corr_heatmap_with_modules(
score_list[id],
show_values=False,
value_fontsize=10,
bold_labels=True,
figsize=(8, 7),
filter_zero_correlation=True,
zero_corr_threshold=zero_corr_threshold,
)
hm_img = _fig_to_rgb_array(fig_hm)
plt.close(fig_hm)
# Build the combined figure
heat_w = max(6.5, 0.25 * p)
heat_h = max(5.0, 0.21 * p)
spatial_w = 5.5
total_w = heat_w + spatial_w
total_h = max(heat_h, 6.5)
fig = plt.figure(figsize=(total_w, total_h), constrained_layout=False)
gs = fig.add_gridspec(
nrows=2, ncols=2,
width_ratios=[heat_w, spatial_w],
height_ratios=[1, 1],
wspace=0.05, hspace=0.10
)
ax_heat = fig.add_subplot(gs[:, 0])
ax_sp0 = fig.add_subplot(gs[0, 1])
ax_sp1 = fig.add_subplot(gs[1, 1])
# Place heatmap image
ax_heat.imshow(hm_img)
ax_heat.set_axis_off()
ax_heat.set_title(f"Cluster {cluster_id} | Node {node_id} | Block {block_id}", fontsize=10, pad=6)
# Create spatial plots directly into axes (NO intermediate figures!)
scores_final = None
for ax_sp, mid in zip([ax_sp0, ax_sp1], module_ids):
# print(f"Getting module {mid} score for cluster {cluster_id}, node {node_id}, block {block_id}")
scores_tiles_wrt_ref, info = index.tile_module_scores_from_reference(
data,
score_list[id]["modules"][mid],
index_handle,
block_id,
L_mean,
cluster_id,
)
spot_score = plotting.assign_module_score_to_spots(data, scores_tiles_wrt_ref)
scores = np.array(list(spot_score.values()))
scores_final = scores
# Plot directly into the axis - no intermediate figure!
plotting.plot_spot_module_scores(
spot_score, tiles, adata,
ax=ax_sp, # Pass the axis directly!
grid=False,
label=f"Module {mid}",
)
ax_sp.set_title(f"Module {mid}", fontsize=10, pad=4)
return scores_final, fig
In [142]:
# This function is deprecated - use plot_module_heatmap_plus_spatial instead
# def get_module_info(id):
# # Take the top scored node
# cluster_id = int(node_score_df.iloc[id]['cluster_id'])
# node_id = int(node_score_df.iloc[id]['node_id'])
# block_id = int(dag_stats_df[(dag_stats_df['cluster_id'] == cluster_id) & (dag_stats_df['node_id'] == node_id)]['block_id'].values[0])
# print(f"Top scored node is in cluster {cluster_id}, node {node_id}, block {block_id}")
# fig, ax, ordered_genes, gene_to_mod = plotting.plot_corr_heatmap_with_modules(score_list[id],
# show_values=False, value_fontsize=10, bold_labels=True, figsize=(8,7), filter_zero_correlation=True, zero_corr_threshold=0.05)
# # save
# # fig.savefig(f"/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X/corr_heatmap_cluster{cluster_id}_node_{node_id}.png", dpi=300)
#
# # Find out the most frequent center in the same block
# freq_node_center = dag_stats_df.loc[(dag_stats_df['cluster_id'] == cluster_id) & (dag_stats_df['block_id'] == block_id)].sort_values(by='num_spds', ascending=False).node_id.values[0]
# index_handle = dag_dict[cluster_id]
# #L_mean = dag_dict[cluster_id].nodes[freq_node_center].metadata.representative_mean
# L_mean = index.get_node_log_ref(index_handle, block_id, use_representative_mean=True)
# return cluster_id, node_id, block_id, L_mean, index_handle
In [219]:
scores_146, fig_146 = plot_module_heatmap_plus_spatial(146)
fig_146.savefig("/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/module_146_heatmap_spatial.png", dpi=300)
Creating combined figure for cluster 2, node 44, block 11
In [220]:
out_146 = go_score.enrich_modules_with_gseapy(score_list[146]['modules'])
Module 0: enrichment completed with 401 terms Module 1: enrichment completed with 340 terms Module 2: enrichment completed with 270 terms Module 3: enrichment completed with 206 terms
In [ ]:
In [227]:
for module_id, module_genes, res in out_146:
if module_id == 1:
print(module_genes)
fig, axes = go_score.plot_module_enrichment_libraries(
module_id=module_id,
module_genes=module_genes,
res=res,
top_n=7,
ncols=2,
bar_color="#D41515EE", # pick your color
tick_fontsize=10,
label_fontsize=10,
title_fontsize=8,
compact_height_per_term=0.2,
min_fig_h=6,
save_dir="/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/go_enrichment/"
)
if fig is not None:
plt.show()
['ADAM9', 'CCDC6', 'CCND1', 'CD9', 'DSP', 'ELF3', 'EPCAM', 'JUP', 'KRT7', 'KRT8', 'S100A14', 'SERPINA3', 'TACSTD2', 'TRAF4'] ['GO_Biological_Process_2023', 'KEGG_2021_Human', 'MSigDB_Hallmark_2020', 'Reactome_2022']
In [158]:
scores_156, fig_156 = plot_module_heatmap_plus_spatial(156)
Creating combined figure for cluster 2, node 54, block 13
Creating combined figure for cluster 2, node 54, block 13
In [175]:
# Create a dictionary of tile_id to block_id
# Clusters do not share tile ids across them
from collections import defaultdict
tile_to_block_dict = {}
cluster_to_tile = defaultdict(set)
for cluster_id, cluster_dag in dag_dict.items():
block_to_node = cluster_dag.block_to_node_indices
for node in cluster_dag.nodes:
member_tile_ids = [tid for tid,_ in node.metadata.members]
for tile_id in member_tile_ids:
if not tile_id in tile_to_block_dict:
tile_to_block_dict[tile_id] = {}
tile_to_block_dict[tile_id][node.block_index] = node.block_cluster_id
cluster_to_tile[cluster_id].add(tile_id)
In [193]:
importlib.reload(plotting)
Out[193]:
<module 'spindle_dev.plotting' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/plotting.py'>
In [196]:
def get_spot_score_dict(block_id, cluster_id, tile_to_block_dict, grid=False):
spot_score = {}
tiles_of_cluster = cluster_to_tile[cluster_id]
for tid in tiles_of_cluster:
if tid not in tile_to_block_dict:
continue
c = tile_to_block_dict[tid][block_id]
t = data.metadata['tiles'][tid]
for id in t.idx:
spot_score[id] = c
fig, ax = plotting.plot_spot_module_scores(spot_score, tiles, adata, color='discrete', grid=grid)
return fig, ax
In [207]:
fig, ax = get_spot_score_dict(5, 2, tile_to_block_dict, grid=False)
In [208]:
node_score_df.loc[(node_score_df['cluster_id'] == 2) & (node_score_df['block_id'] == 5)]
Out[208]:
| cluster_id | block_id | node_id | node_score | num_spds | block_size | E | Q | S | num_modules | |
|---|---|---|---|---|---|---|---|---|---|---|
| 116 | 2 | 5 | 14 | 0.001398 | 350 | 15 | 1.661521 | 0.010595 | 0.920589 | 2 |
| 117 | 2 | 5 | 15 | 0.003958 | 2 | 15 | 0.563854 | 0.051842 | 0.864585 | 4 |
| 118 | 2 | 5 | 16 | 0.001554 | 22 | 15 | 0.287832 | 0.029581 | 0.817513 | 5 |
| 119 | 2 | 5 | 17 | 0.010833 | 67 | 15 | 0.351048 | 0.100392 | 0.692624 | 3 |
| 120 | 2 | 5 | 18 | 0.007216 | 63 | 15 | 0.419383 | 0.075795 | 0.772996 | 4 |
In [214]:
scores_116, fig_116 = plot_module_heatmap_plus_spatial(116)
Creating combined figure for cluster 2, node 14, block 5
In [215]:
fig_116.savefig("/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/module_heatmap_plus_spatial_node_116.png", dpi=300)
In [ ]:
In [ ]:
scores_102, fig_102 = plot_module_heatmap_plus_spatial()
In [211]:
out = go_score.enrich_modules_with_gseapy(score_list[116]['modules'])
Module 0: enrichment completed with 239 terms Module 1: skipped (only 2 genes)
In [212]:
for module_id, module_genes, res in out:
fig, axes = go_score.plot_module_enrichment_libraries(
module_id=module_id,
module_genes=module_genes,
res=res,
top_n=7,
ncols=2,
bar_color="#D41515EE", # pick your color
tick_fontsize=10,
label_fontsize=8,
title_fontsize=8,
compact_height_per_term=0.2,
min_fig_h=8,
#save_dir="/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X/go_enrichment/"
)
if fig is not None:
plt.show()
['GO_Biological_Process_2023', 'KEGG_2021_Human', 'MSigDB_Hallmark_2020', 'Reactome_2022']
In [ ]:
In [ ]:
In [168]:
importlib.reload(plotting)
Out[168]:
<module 'spindle_dev.plotting' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/plotting.py'>
In [ ]:
In [165]:
len(tile_to_block_dict)
Out[165]:
2081
In [ ]:
In [166]:
len(tiles)
Out[166]:
2081
In [ ]:
In [162]:
dag_stats_df[['cluster_id', 'block_id', 'block_cluster_id', ]]
Out[162]:
| cluster_id | block_id | block_cluster_id | |
|---|---|---|---|
| 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 1 |
| 2 | 0 | 1 | 0 |
| 3 | 0 | 1 | 1 |
| 4 | 0 | 1 | 2 |
| ... | ... | ... | ... |
| 197 | 3 | 12 | 1 |
| 198 | 3 | 13 | 0 |
| 199 | 3 | 13 | 1 |
| 200 | 3 | 14 | 0 |
| 201 | 3 | 14 | 1 |
202 rows × 3 columns
In [ ]:
In [123]:
node_score_df.sort_values(by='node_score', ascending=False).head(20)
Out[123]:
| cluster_id | block_id | node_id | node_score | num_spds | block_size | E | Q | S | num_modules | |
|---|---|---|---|---|---|---|---|---|---|---|
| 75 | 1 | 6 | 30 | 0.239966 | 1 | 38 | 1.932651 | 0.254274 | 0.511693 | 3 |
| 146 | 2 | 11 | 44 | 0.165974 | 1 | 49 | 1.627859 | 0.164071 | 0.378574 | 4 |
| 5 | 0 | 1 | 5 | 0.159690 | 91 | 15 | 0.774484 | 0.390674 | 0.472224 | 4 |
| 94 | 1 | 12 | 49 | 0.158631 | 13 | 26 | 1.273377 | 0.231421 | 0.461699 | 4 |
| 74 | 1 | 6 | 29 | 0.156417 | 5 | 38 | 1.526276 | 0.186053 | 0.449176 | 6 |
| 156 | 2 | 13 | 54 | 0.150369 | 2 | 18 | 0.935547 | 0.288826 | 0.443511 | 5 |
| 60 | 1 | 4 | 15 | 0.148257 | 397 | 21 | 0.976389 | 0.243900 | 0.377444 | 7 |
| 145 | 2 | 11 | 43 | 0.148110 | 1 | 49 | 2.225433 | 0.125890 | 0.471341 | 4 |
| 89 | 1 | 12 | 44 | 0.140268 | 543 | 26 | 1.321063 | 0.188080 | 0.435467 | 3 |
| 173 | 3 | 3 | 9 | 0.136184 | 49 | 20 | 0.794279 | 0.282274 | 0.392594 | 4 |
| 80 | 1 | 8 | 35 | 0.132119 | 1 | 27 | 1.412164 | 0.179984 | 0.480190 | 4 |
| 79 | 1 | 8 | 34 | 0.131210 | 2 | 27 | 1.464330 | 0.189470 | 0.527084 | 4 |
| 198 | 3 | 13 | 34 | 0.125699 | 266 | 26 | 0.862653 | 0.251575 | 0.420804 | 3 |
| 201 | 3 | 14 | 37 | 0.113385 | 11 | 36 | 1.961125 | 0.121871 | 0.525596 | 3 |
| 52 | 1 | 1 | 7 | 0.105444 | 91 | 15 | 0.571309 | 0.322258 | 0.427276 | 5 |
| 86 | 1 | 10 | 41 | 0.104808 | 3 | 25 | 1.835981 | 0.124215 | 0.540434 | 3 |
| 131 | 2 | 8 | 29 | 0.104168 | 42 | 15 | 0.792322 | 0.248719 | 0.471405 | 3 |
| 77 | 1 | 7 | 32 | 0.101813 | 3 | 28 | 2.014848 | 0.139933 | 0.638893 | 4 |
| 91 | 1 | 12 | 46 | 0.101234 | 1 | 26 | 1.344811 | 0.142968 | 0.473469 | 5 |
| 97 | 1 | 13 | 52 | 0.100772 | 1 | 16 | 1.536831 | 0.159211 | 0.588153 | 2 |
In [ ]:
In [97]:
cluster_id, node_id, block_id, L_mean, index_handle = get_module_info(146)
Top scored node is in cluster 2, node 44, block 11 Module of size 20: mean abs corr = 0.2477 Adding module of size 20 with mean abs corr = 0.2477 Module of size 14: mean abs corr = 0.4084 Adding module of size 14 with mean abs corr = 0.4084 Module of size 8: mean abs corr = 0.2126 Adding module of size 8 with mean abs corr = 0.2126 Module of size 7: mean abs corr = 0.2540 Adding module of size 7 with mean abs corr = 0.2540 Using 4 modules for heatmap plotting.
In [101]:
module_id = 1
scores_146, fig_spatial = get_module_score_plot_spatial(146, module_id, cluster_id, node_id, block_id, L_mean, index_handle)
Getting module 1 score for cluster 2, node 44, block 11
In [102]:
cluster_id, node_id, block_id, L_mean, index_handle = get_module_info(145)
Top scored node is in cluster 2, node 43, block 11 Module of size 28: mean abs corr = 0.2114 Adding module of size 28 with mean abs corr = 0.2114 Module of size 18: mean abs corr = 0.5462 Adding module of size 18 with mean abs corr = 0.5462 Module of size 2: mean abs corr = 0.5234 Adding module of size 2 with mean abs corr = 0.5234 Using 3 modules for heatmap plotting.
In [ ]:
In [103]:
module_id = 1
scores_145, fig_spatial = get_module_score_plot_spatial(145, module_id, cluster_id, node_id, block_id, L_mean, index_handle)
Getting module 1 score for cluster 2, node 43, block 11
In [113]:
plt.scatter(scores_145, scores_146, s=1, alpha=0.1)
plt.ylabel(f"Cluster{cluster_id} Module 1 Score in Node 43")
plt.xlabel(f"Cluster{cluster_id} Module 1 Score in Node 44")
Out[113]:
Text(0.5, 0, 'Cluster2 Module 1 Score in Node 44')
In [108]:
plt.hist(scores_145, bins=50, alpha=0.5);
plt.hist(scores_146, bins=50, alpha=0.5);
In [ ]:
In [89]:
#id = node_score_df.sort_values(by='node_score', ascending=False).index[1]
id = 146
# Take the top scored node
cluster_id = int(node_score_df.iloc[id]['cluster_id'])
node_id = int(node_score_df.iloc[id]['node_id'])
block_id = int(dag_stats_df[(dag_stats_df['cluster_id'] == cluster_id) & (dag_stats_df['node_id'] == node_id)]['block_id'].values[0])
print(f"Top scored node is in cluster {cluster_id}, node {node_id}, block {block_id}")
fig, ax, ordered_genes, gene_to_mod = plotting.plot_corr_heatmap_with_modules(score_list[id],
show_values=False, value_fontsize=10, bold_labels=True, figsize=(8,7), filter_zero_correlation=True, zero_corr_threshold=0.05)
# save
fig.savefig(f"/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X/corr_heatmap_cluster{cluster_id}_node_{node_id}.png", dpi=300)
# Find out the most frequent center in the same block
freq_node_center = dag_stats_df.loc[(dag_stats_df['cluster_id'] == cluster_id) & (dag_stats_df['block_id'] == block_id)].sort_values(by='num_spds', ascending=False).node_id.values[0]
index_handle = dag_dict[cluster_id]
#L_mean = dag_dict[cluster_id].nodes[freq_node_center].metadata.representative_mean
L_mean = index.get_node_log_ref(index_handle, block_id, use_representative_mean=True)
Top scored node is in cluster 2, node 44, block 11 Module of size 20: mean abs corr = 0.2477 Adding module of size 20 with mean abs corr = 0.2477 Module of size 14: mean abs corr = 0.4084 Adding module of size 14 with mean abs corr = 0.4084 Module of size 8: mean abs corr = 0.2126 Adding module of size 8 with mean abs corr = 0.2126 Module of size 7: mean abs corr = 0.2540 Adding module of size 7 with mean abs corr = 0.2540 Using 4 modules for heatmap plotting.
In [ ]:
In [90]:
importlib.reload(plotting)
Out[90]:
<module 'spindle_dev.plotting' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/plotting.py'>
In [91]:
importlib.reload(index)
Out[91]:
<module 'spindle_dev.index' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/index.py'>
In [92]:
# Differential LE score for all tiles for module 2
module_id = 1
scores_tiles_wrt_ref, info = index.tile_module_scores_from_reference(data, score_list[id]['modules'][module_id], index_handle, block_id, L_mean, cluster_id)
# assign module scores to spots
spot_score = plotting.assign_module_score_to_spots(data, scores_tiles_wrt_ref)
scores = np.array(list(spot_score.values()))
# fig, ax = plotting.plot_spot_module_scores(spot_score, tiles, adata,grid=False, figsize=(6,3.5), label="Module 2 Score")
# # save
# fig.savefig(f"/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/spatial_module{module_id}_score_cluster{cluster_id}_node{node_id}.png", dpi=300)
In [93]:
importlib.reload(plotting)
Out[93]:
<module 'spindle_dev.plotting' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/plotting.py'>
In [94]:
fig, ax = plotting.plot_spot_module_scores(spot_score, tiles, adata,grid=False, figsize=(6,3.5), label="Module 2 Score")
In [88]:
fig, ax = plotting.plot_spot_module_scores(spot_score, tiles, adata,grid=False, figsize=(6,3.5), label="Module 2 Score")
In [71]:
from spindle_dev import go_score
In [ ]:
out = go_score.enrich_modules_with_gseapy(score_list[146]['modules'])
Module 0: enrichment completed with 401 terms Module 1: enrichment completed with 340 terms Module 2: enrichment completed with 270 terms Module 3: enrichment completed with 206 terms
In [76]:
for module_id, module_genes, res in out:
fig, axes = go_score.plot_module_enrichment_libraries(
module_id=module_id,
module_genes=module_genes,
res=res,
top_n=7,
ncols=2,
bar_color="#D41515EE", # pick your color
tick_fontsize=10,
label_fontsize=8,
title_fontsize=8,
compact_height_per_term=0.2,
min_fig_h=8,
#save_dir="/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X/go_enrichment/"
)
if fig is not None:
plt.show()
['GO_Biological_Process_2023', 'KEGG_2021_Human', 'MSigDB_Hallmark_2020', 'Reactome_2022']
['GO_Biological_Process_2023', 'KEGG_2021_Human', 'MSigDB_Hallmark_2020', 'Reactome_2022']
['GO_Biological_Process_2023', 'KEGG_2021_Human', 'MSigDB_Hallmark_2020', 'Reactome_2022']
['GO_Biological_Process_2023', 'KEGG_2021_Human', 'MSigDB_Hallmark_2020', 'Reactome_2022']
In [39]:
from spindle_dev import index
importlib.reload(index)
Out[39]:
<module 'spindle_dev.index' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/index.py'>
In [40]:
import pandas as pd
In [41]:
cluster_id = 0
index_handle = dag_dict[cluster_id]
node_rows, per_node_tile_scores, tile_z = index.node_scores_from_reference(data, index_handle, cluster_id, representative_mean=True)
In [42]:
import pandas as pd
In [58]:
node_df = pd.DataFrame(node_rows).sort_values('z_scale', ascending=False)
In [60]:
node_score_df.loc[(node_score_df.cluster_id == cluster_id) & (node_score_df.node_id == 21)]
Out[60]:
| cluster_id | node_id | node_score | num_spds | block_size | E | Q | S | num_modules | |
|---|---|---|---|---|---|---|---|---|---|
| 21 | 0 | 21 | 0.052074 | 659 | 15 | 0.758758 | 0.153221 | 0.552082 | 3 |
In [59]:
node_df.head(10)
Out[59]:
| block_id | cluster_id | node_id | node_index | prevalence | median_dist | mad_dist | quality | interest | k_genes | z_median | z_mad | z_scale | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 21 | 7 | 0 | 21 | 21 | 1.0 | 3.609869 | 1.693648 | 0.004974 | 0.004974 | 15 | 3.609869 | 1.693648 | 2.511002 |
| 23 | 8 | 0 | 23 | 23 | 1.0 | 3.275327 | 1.256047 | 0.010766 | 0.010766 | 15 | 3.275327 | 1.256047 | 1.862216 |
| 16 | 5 | 0 | 16 | 16 | 1.0 | 3.077620 | 1.237488 | 0.013365 | 0.013365 | 15 | 3.077620 | 1.237488 | 1.834699 |
| 26 | 9 | 0 | 26 | 26 | 1.0 | 3.195635 | 1.227180 | 0.012000 | 0.012000 | 15 | 3.195635 | 1.227180 | 1.819417 |
| 5 | 1 | 0 | 5 | 5 | 1.0 | 4.973354 | 1.050387 | 0.002421 | 0.002421 | 15 | 4.973354 | 1.050387 | 1.557303 |
| 13 | 4 | 0 | 13 | 13 | 1.0 | 4.723527 | 0.958816 | 0.003406 | 0.003406 | 15 | 4.723527 | 0.958816 | 1.421540 |
| 10 | 3 | 0 | 10 | 10 | 1.0 | 5.474510 | 0.894123 | 0.001715 | 0.001715 | 15 | 5.474510 | 0.894123 | 1.325627 |
| 34 | 11 | 0 | 35 | 35 | 1.0 | 2.700770 | 0.753236 | 0.031619 | 0.031619 | 24 | 2.700770 | 0.753236 | 1.116748 |
| 30 | 10 | 0 | 30 | 30 | 1.0 | 3.227608 | 0.726557 | 0.019175 | 0.019175 | 18 | 3.227608 | 0.726557 | 1.077193 |
| 7 | 2 | 0 | 7 | 7 | 1.0 | 6.275040 | 0.642391 | 0.000990 | 0.000990 | 15 | 6.275040 | 0.642391 | 0.952408 |
In [61]:
id = node_score_df.index[21]
# Take the top scored node
cluster_id = int(node_score_df.iloc[id]['cluster_id'])
node_id = int(node_score_df.iloc[id]['node_id'])
block_id = int(dag_stats_df[(dag_stats_df['cluster_id'] == cluster_id) & (dag_stats_df['node_id'] == node_id)]['block_id'].values[0])
print(f"Top scored node is in cluster {cluster_id}, node {node_id}, block {block_id}")
fig, ax, ordered_genes, gene_to_mod = plotting.plot_corr_heatmap_with_modules(score_list[id],
show_values=False, value_fontsize=10, bold_labels=True, figsize=(8,7), filter_zero_correlation=True, zero_corr_threshold=0.05)
# save
fig.savefig(f"/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X/corr_heatmap_cluster{cluster_id}_node_{node_id}.png", dpi=300)
# Find out the most frequent center in the same block
freq_node_center = dag_stats_df.loc[(dag_stats_df['cluster_id'] == cluster_id) & (dag_stats_df['block_id'] == block_id)].sort_values(by='num_spds', ascending=False).node_id.values[0]
index_handle = dag_dict[cluster_id]
#L_mean = dag_dict[cluster_id].nodes[freq_node_center].metadata.representative_mean
L_mean = index.get_node_log_ref(index_handle, block_id, use_representative_mean=True)
Top scored node is in cluster 0, node 21, block 7 Module of size 6: mean abs corr = 0.2030 Adding module of size 6 with mean abs corr = 0.2030 Module of size 5: mean abs corr = 0.2500 Adding module of size 5 with mean abs corr = 0.2500 Module of size 4: mean abs corr = 0.3630 Adding module of size 4 with mean abs corr = 0.3630 Using 3 modules for heatmap plotting.
In [62]:
# Differential LE score for all tiles for module 2
module_id = 2
scores_tiles_wrt_ref, info = index.tile_module_scores_from_reference(data, score_list[id]['modules'][module_id], index_handle, block_id, L_mean, cluster_id)
# assign module scores to spots
spot_score = plotting.assign_module_score_to_spots(data, scores_tiles_wrt_ref)
scores = np.array(list(spot_score.values()))
# fig, ax = plotting.plot_spot_module_scores(spot_score, tiles, adata,grid=False, figsize=(6,3.5), label="Module 2 Score")
# # save
# fig.savefig(f"/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/spatial_module{module_id}_score_cluster{cluster_id}_node{node_id}.png", dpi=300)
In [63]:
fig, ax = plotting.plot_spot_module_scores(spot_score, tiles, adata,grid=False, figsize=(6,3.5), label="Module 2 Score")
In [46]:
from collections import defaultdict
def block_dag_edges_consecutive(nodes, block_id_from_members=True):
# group nodes by layer
by_layer = defaultdict(list)
for n in nodes:
by_layer[n.block_index].append(n)
layers = sorted(by_layer.keys())
# stable order within each layer
ordered_by_layer = {
b: sorted(by_layer[b], key=lambda n: (n.order_id, n.global_node_id))
for b in layers
}
# spd_id -> node_id in each layer
spd_to_node_in_layer = {b: {} for b in layers}
for b in layers:
for n in ordered_by_layer[b]:
for spd_id, blk_id in n.metadata.members:
if (not block_id_from_members) or (blk_id == b):
spd_to_node_in_layer[b][spd_id] = n.global_node_id
# edges between consecutive layers only (u -> v) counted by shared SPD ids
edges = [] # list of (u_id, v_id, count_true)
for b0, b1 in zip(layers[:-1], layers[1:]):
m0 = spd_to_node_in_layer[b0]
m1 = spd_to_node_in_layer[b1]
common = set(m0.keys()) & set(m1.keys())
cnt = defaultdict(int)
for spd_id in common:
cnt[(m0[spd_id], m1[spd_id])] += 1
for (u, v), c in cnt.items():
edges.append((u, v, c))
return layers, ordered_by_layer, edges
def block_dag_to_dot(layers, ordered_by_layer, edges, node_label_fn=None, edge_label_fn=None):
"""
node_label_fn(n) -> str
edge_label_fn(count) -> str
"""
if node_label_fn is None:
node_label_fn = lambda n: str(n.global_node_id)
if edge_label_fn is None:
edge_label_fn = lambda c: str(c)
# map node_id -> layer index (for rank)
node_to_layer = {}
for b in layers:
for n in ordered_by_layer[b]:
node_to_layer[n.global_node_id] = b
lines = []
lines.append("digraph G {")
lines.append(' graph [rankdir=TB, splines=true, nodesep=0.25, ranksep=0.5];')
lines.append(' node [shape=circle, fixedsize=true, width=0.28, fontsize=10, penwidth=1];')
lines.append(' edge [fontsize=9, penwidth=1, arrowsize=0.6];')
# --- ranks (each layer on one horizontal row)
for b in layers:
ids = [n.global_node_id for n in ordered_by_layer[b]]
lines.append(f" subgraph cluster_rank_{b} {{")
lines.append(" rank=same;")
# invisible box for rank grouping label if you want later
for nid in ids:
lines.append(f' "{nid}";')
lines.append(" }")
# --- node labels
for b in layers:
for n in ordered_by_layer[b]:
nid = n.global_node_id
label = node_label_fn(n).replace('"', '\\"')
lines.append(f' "{nid}" [label="{label}"];')
# --- edges with labels (counts)
for u, v, c in edges:
elab = edge_label_fn(c).replace('"', '\\"')
lines.append(f' "{u}" -> "{v}" [label="{elab}"];')
lines.append("}")
return "\n".join(lines)
def render_dot(dot_str, out_path, fmt="png"):
"""
Requires:
- system graphviz (dot)
- python package: graphviz (pip install graphviz)
"""
from graphviz import Source
src = Source(dot_str, format=fmt)
# graphviz adds extension automatically; keep clean name
filename = out_path.rsplit(".", 1)[0]
return src.render(filename=filename, cleanup=True)
In [47]:
layers, ordered_by_layer, edges = block_dag_edges_consecutive(dag_dict[0].nodes)
dot = block_dag_to_dot(
layers,
ordered_by_layer,
edges,
node_label_fn=lambda n: f"{n.order_id}", # or cluster id, or short name
edge_label_fn=lambda c: str(c), # true SPD counts
)
# save dot text
with open("block_dag.dot", "w") as f:
f.write(dot)
# render (optional)
render_dot(dot, "block_dag.png", fmt="png")
Out[47]:
'block_dag.png'
In [ ]:
In [ ]:
In [48]:
from graphviz import Source
In [49]:
Source(dot)
Out[49]:
In [ ]: