In [6]:
# Set up sys.path so that 'src/spindle_dev' is importable as 'spindle_dev'
import sys
import importlib
from pathlib import Path

project_root = '/data/sarkar_lab/Projects/spindle_dev'
src_path = Path(project_root) / 'src'
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

import spindle_dev
# Reload to pick up code changes without restarting the kernel
importlib.reload(spindle_dev)
Out[6]:
<module 'spindle_dev' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/__init__.py'>
In [7]:
import scanpy as sc
In [8]:
adata = sc.read_h5ad("/data/sarkar_lab/zenodo_data/2023_xenium_human_breast_cancer/adata.h5ad")
In [9]:
adata = adata[adata.obs.loc[adata.obs.Cluster != "Unlabeled"].index, :].copy()
In [10]:
sc.pl.spatial(adata, color=["Cluster"], spot_size=13)
/tmp/ipykernel_3030422/1942974121.py:1: FutureWarning: Use `squidpy.pl.spatial_scatter` instead.
  sc.pl.spatial(adata, color=["Cluster"], spot_size=13)
No description has been provided for this image
In [11]:
# Reload spindle_dev metrics, index, and package after code changes
import importlib
import spindle_dev
import spindle_dev.metrics as metrics
import spindle_dev.index as index
import spindle_dev.preprocessing as preprocessing
import spindle_dev.plotting as plotting

# Reload in dependency order so new symbols are available
importlib.reload(metrics)
importlib.reload(index)
importlib.reload(preprocessing)
importlib.reload(plotting)
importlib.reload(spindle_dev)
import spindle_dev.typing as typing
importlib.reload(typing)
import spindle_dev.test as test
import spindle_dev.search as search
importlib.reload(test)
importlib.reload(search)
Out[11]:
<module 'spindle_dev.search' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/search.py'>
In [12]:
def prepare_to_index(adata, resolution=0.2, min_final_size=20):
    coords = adata.obsm["spatial"]
    tiles = preprocessing.build_quadtree_tiles(coords, max_pts=200, min_side=0.0, max_depth=40)
    num_genes = adata.n_vars
    genes_work, gene_idx = spindle_dev.preprocessing.topvar_genes(adata, G=num_genes)  
    tile_covs = spindle_dev.preprocessing.build_tile_covs_full(adata, tiles, gene_idx, n_jobs=8, eps=1e-6)
    data = index.ProcessedData(tiles, tile_covs, genes_work, adata.n_obs)
    data.reduce_dim(num_pca_components=30, n_components=2, do_umap=True)
    data.cluster_spds(cluster_distance="tree", cluster_method="leiden", resolution=resolution)
    data.assign_label_to_spots()
    data.get_corr_mean_by_cluster()
    out_dict = data.get_adaptive_runs(find_blocks=True, with_size_guard=True,min_final_size=min_final_size,max_final_size=100)
    return tiles, tile_covs, genes_work, data, out_dict
In [13]:
tiles, tile_covs, genes_work, data, out_dict = prepare_to_index(adata, min_final_size=15)
[2026-01-19 12:59:14,378] INFO spindle_dev.index: Clustering SPD-s using 'tree' distance.
[2026-01-19 12:59:14,393] INFO spindle_dev.index: Building ultrametric features from SPD matrices.
[2026-01-19 12:59:25,299] INFO spindle_dev.index: Computing latent features from the tree representations.
[2026-01-19 12:59:25,964] INFO spindle_dev.index: Reducing latent features to 30 dimensions using PCA.
[2026-01-19 12:59:35,508] INFO spindle_dev.index: Explained variance ratios by PCA components: [0.06878107 0.05380314 0.02646384 0.01915205 0.01300586 0.01025453
 0.00781812 0.00735731 0.00665263 0.00645236 0.00500298 0.00465103
 0.00423426 0.00416654 0.00382042 0.00364907 0.00355717 0.00348549
 0.00334294 0.00319313 0.00303316 0.00301945 0.00300855 0.00294676
 0.00288963 0.00281065 0.00275823 0.00274145 0.00271152 0.00265672]
[2026-01-19 12:59:35,510] INFO spindle_dev.index: Reducing latent features to 2 dimensions using UMAP.
/panfs/accrepfs.vampire/home/sarkah1/miniforge3/envs/spatial/lib/python3.10/site-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(
[2026-01-19 12:59:47,595] INFO spindle_dev.index: Clustering SPD-s using 'tree' distance.
[2026-01-19 12:59:47,596] INFO spindle_dev.index: Clustering SPD matrices using Leiden clustering with resolution 0.20.
[2026-01-19 12:59:47,814] INFO spindle_dev.index: Since clustering method is tree, I am going to find global order per cluster
[2026-01-19 12:59:47,815] INFO spindle_dev.index: Finding consensus tree for cluster 0
[2026-01-19 12:59:47,916] INFO spindle_dev.index: Finding consensus tree for cluster 1
[2026-01-19 12:59:47,996] INFO spindle_dev.index: Finding consensus tree for cluster 2
[2026-01-19 12:59:48,068] INFO spindle_dev.index: Finding consensus tree for cluster 3
[2026-01-19 12:59:48,607] INFO spindle_dev.index: Computing mean correlation matrix for cluster 0
[2026-01-19 12:59:49,415] INFO spindle_dev.index: Computing mean correlation matrix for cluster 1
[2026-01-19 12:59:50,019] INFO spindle_dev.index: Computing mean correlation matrix for cluster 2
[2026-01-19 12:59:50,508] INFO spindle_dev.index: Computing mean correlation matrix for cluster 3
[2026-01-19 12:59:50,776] INFO spindle_dev.index: Finding adaptive block runs for cluster 0
[2026-01-19 12:59:51,045] INFO spindle_dev.index:  Chose t=0.7638580524895158 resulting in 200 blocks instead of 61 blocks would have gotten by default
[2026-01-19 12:59:51,076] INFO spindle_dev.index:  Final block runs for cluster 0: 16 blocks.
[2026-01-19 12:59:51,077] INFO spindle_dev.index: Finding adaptive block runs for cluster 1
[2026-01-19 12:59:51,351] INFO spindle_dev.index:  Chose t=0.8451053062066296 resulting in 159 blocks instead of 37 blocks would have gotten by default
[2026-01-19 12:59:51,370] INFO spindle_dev.index:  Final block runs for cluster 1: 14 blocks.
[2026-01-19 12:59:51,371] INFO spindle_dev.index: Finding adaptive block runs for cluster 2
[2026-01-19 12:59:51,641] INFO spindle_dev.index:  Chose t=0.7649764855224312 resulting in 188 blocks instead of 40 blocks would have gotten by default
[2026-01-19 12:59:51,669] INFO spindle_dev.index:  Final block runs for cluster 2: 16 blocks.
[2026-01-19 12:59:51,669] INFO spindle_dev.index: Finding adaptive block runs for cluster 3
[2026-01-19 12:59:51,939] INFO spindle_dev.index:  Chose t=0.8187994098657192 resulting in 220 blocks instead of 98 blocks would have gotten by default
[2026-01-19 12:59:51,977] INFO spindle_dev.index:  Final block runs for cluster 3: 15 blocks.
In [14]:
import matplotlib.pylab as plt
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
In [15]:
import numpy as np
palette = sc.pl.palettes.default_20
point_colors = [palette[lab] for lab in data.spot_label.values()]
labels = data.labels
indices = np.array([int(i) for i in data.spot_label.keys()])
coords = adata.obsm['spatial']
plt.scatter(coords[indices,0], coords[indices,1], s=1, c=point_colors, lw=0, alpha=0.8)
patches = []
for t in tiles:
    bbox = t.bbox if hasattr(t, "bbox") else t["bbox"]  # (xmin,ymin,xmax,ymax)
    x0, y0, x1, y1 = bbox
    patches.append(Rectangle((x0, y0), x1 - x0, y1 - y0, fill=False))
pc = PatchCollection(patches, match_original=True, linewidths=0.2, edgecolors='b', alpha=0.3)
# add collection
ax = plt.gca()
ax.add_collection(pc)
ax.invert_yaxis()
ax.axis('off')

plt.savefig("/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/spatial_plot_with_cluster.png", dpi=300)
plt.show()
No description has been provided for this image
In [237]:
import numpy as np
palette = sc.pl.palettes.default_20
point_colors = [palette[lab] for lab in data.spot_label.values()]
labels = data.labels
indices = np.array([int(i) for i in data.spot_label.keys()])
coords = adata.obsm['spatial']

# Create new color array - grey for all except cluster 3
new_point_colors = []
for idx, lab in zip(indices, data.spot_label.values()):
    if lab == 3:
        new_point_colors.append(palette[lab])
    else:
        new_point_colors.append("#E9E6E6")  # grey

plt.scatter(coords[indices,0], coords[indices,1], s=1, c=new_point_colors, lw=0, alpha=0.8)
ax = plt.gca()
patches = []
for t in tiles:
    bbox = t.bbox if hasattr(t, "bbox") else t["bbox"]  # (xmin,ymin,xmax,ymax)
    x0, y0, x1, y1 = bbox
    patches.append(Rectangle((x0, y0), x1 - x0, y1 - y0, fill=False))
pc = PatchCollection(patches, match_original=True, linewidths=0.01, edgecolors='b', alpha=0.1)
# add collection
ax.add_collection(pc)
ax.invert_yaxis()
ax.axis('off')

plt.savefig("/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/spatial_plot_cluster3_only.png", dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image
In [16]:
# Create colors for clusters
import numpy as np
import matplotlib.pyplot as plt
n_clusters = len(set(data.labels))
# Use Scanpy's default_20 palette for consistency
palette = sc.pl.palettes.default_20
cluster_colors = [palette[lab] for lab in data.labels]
plt.scatter(data.latent['umap'][:,0], data.latent['umap'][:,1], s=6, lw=0, alpha=0.8, c=cluster_colors)
# Write label on the plot
# with backgrond circle for better visibility
# and bold font
for i in range(n_clusters):
    cluster_points = data.latent['umap'][data.labels == i]
    if len(cluster_points) == 0:
        continue
    x_mean = np.mean(cluster_points[:,0])
    y_mean = np.mean(cluster_points[:,1])
    plt.text(x_mean, y_mean, str(i), color='black', fontsize=10, ha='center', va='center', fontweight='bold',
             bbox=dict(facecolor='white', edgecolor='black', boxstyle='circle,pad=0.5', alpha=0.7))
# take away top and right border
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# remove axis ticks
ax.set_xticks([])
ax.set_yticks([])
plt.savefig("/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/umap_plot_with_cluster_labels.png", dpi=300)
plt.show()
No description has been provided for this image
In [17]:
epsilon_block_wise_dict = {}
epsilon_dict = {}
for cluster_id in set(data.labels):
    eps_per_block, eps_elbow_per_block, eps = index.choose_adaptive_epsilons(data, cluster_id, k_target_per_block=64)
    epsilon_block_wise_dict[int(cluster_id)] = eps_elbow_per_block
    epsilon_dict[int(cluster_id)] = eps
In [ ]:
 
In [18]:
config = typing.IndexConfig()
config.epsilon_dict = epsilon_dict
config.epsilon_block_wise_dict = epsilon_block_wise_dict
config.threshold_type = 'block_wise'
config.kmean_method = 'epsilon_net'
In [19]:
epsilon_block_wise_dict
Out[19]:
{0: {0: 7.551487154783578,
  1: 7.688574605978839,
  2: 8.348619550689628,
  3: 8.12886229785936,
  4: 6.941772257682238,
  5: 7.11207711085268,
  6: 7.4634645865588745,
  7: 8.12941780999703,
  8: 7.394765319810765,
  9: 6.714566808424555,
  10: 7.174724333051149,
  11: 6.700368214926148,
  12: 6.223964337518237,
  13: 5.151625529484852,
  14: 4.577674131656347,
  15: 9.7037491104398},
 1: {0: 7.34932238515324,
  1: 8.493449641575141,
  2: 8.901136453589094,
  3: 7.8762835362284145,
  4: 7.151354147881101,
  5: 7.800650669737451,
  6: 8.425077404859794,
  7: 8.975881255696738,
  8: 8.588472145359653,
  9: 5.793292280974387,
  10: 9.691527557373046,
  11: 9.951465035398689,
  12: 6.811198569674318,
  13: 4.351012706756592},
 2: {0: 6.799333509719286,
  1: 8.557316793247027,
  2: 8.452427434089639,
  3: 7.588833091125586,
  4: 6.785369982212529,
  5: 5.5096939776630975,
  6: 5.248055444220603,
  7: 5.746564254670582,
  8: 5.975592355011169,
  9: 3.1322101267631437,
  10: 4.466462934560956,
  11: 8.84131131853376,
  12: 6.445534355120787,
  13: 5.415287845182453,
  14: 6.687208104027927,
  15: 7.126931829888523},
 3: {0: 7.388558401037387,
  1: 7.783292293548584,
  2: 8.289245743649037,
  3: 7.992608086706223,
  4: 7.854193064200638,
  5: 8.034181459182257,
  6: 7.878430512484119,
  7: 8.066423324555597,
  8: 9.167032732265142,
  9: 7.850585689461382,
  10: 8.363619363357321,
  11: 6.642502405394938,
  12: 10.199246521661854,
  13: 8.335221526620284,
  14: 8.539956410725912}}
In [20]:
# Create index 
dag_dict, stat, dist_list = index.index_spds(data, config=config)
[2026-01-19 12:59:59,265] INFO spindle_dev.index: Processing cluster 0
[2026-01-19 12:59:59,266] INFO spindle_dev.index: Building SPD index with epsilon=7.570320670118333
[2026-01-19 12:59:59,267] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices.
[2026-01-19 12:59:59,300] INFO spindle_dev.index: Cluster 0: 733 SPDs, 16 blocks
[2026-01-19 12:59:59,301] INFO spindle_dev.index:  Using epsilon-net clustering for block 0
[2026-01-19 12:59:59,356] INFO spindle_dev.index:  Finished block 0 in 0.05 seconds, found 2 clusters.
[2026-01-19 12:59:59,356] INFO spindle_dev.index:  Using epsilon-net clustering for block 1
[2026-01-19 12:59:59,412] INFO spindle_dev.index:  Finished block 1 in 0.06 seconds, found 5 clusters.
[2026-01-19 12:59:59,413] INFO spindle_dev.index:  Using epsilon-net clustering for block 2
[2026-01-19 12:59:59,458] INFO spindle_dev.index:  Finished block 2 in 0.05 seconds, found 3 clusters.
[2026-01-19 12:59:59,459] INFO spindle_dev.index:  Using epsilon-net clustering for block 3
[2026-01-19 12:59:59,506] INFO spindle_dev.index:  Finished block 3 in 0.05 seconds, found 3 clusters.
[2026-01-19 12:59:59,507] INFO spindle_dev.index:  Using epsilon-net clustering for block 4
[2026-01-19 12:59:59,555] INFO spindle_dev.index:  Finished block 4 in 0.05 seconds, found 3 clusters.
[2026-01-19 12:59:59,556] INFO spindle_dev.index:  Using epsilon-net clustering for block 5
[2026-01-19 12:59:59,605] INFO spindle_dev.index:  Finished block 5 in 0.05 seconds, found 3 clusters.
[2026-01-19 12:59:59,606] INFO spindle_dev.index:  Using epsilon-net clustering for block 6
[2026-01-19 12:59:59,664] INFO spindle_dev.index:  Finished block 6 in 0.06 seconds, found 2 clusters.
[2026-01-19 12:59:59,665] INFO spindle_dev.index:  Using epsilon-net clustering for block 7
[2026-01-19 12:59:59,708] INFO spindle_dev.index:  Finished block 7 in 0.04 seconds, found 2 clusters.
[2026-01-19 12:59:59,709] INFO spindle_dev.index:  Using epsilon-net clustering for block 8
[2026-01-19 12:59:59,758] INFO spindle_dev.index:  Finished block 8 in 0.05 seconds, found 3 clusters.
[2026-01-19 12:59:59,759] INFO spindle_dev.index:  Using epsilon-net clustering for block 9
[2026-01-19 12:59:59,814] INFO spindle_dev.index:  Finished block 9 in 0.06 seconds, found 4 clusters.
[2026-01-19 12:59:59,815] INFO spindle_dev.index:  Using epsilon-net clustering for block 10
[2026-01-19 12:59:59,883] INFO spindle_dev.index:  Finished block 10 in 0.07 seconds, found 5 clusters.
[2026-01-19 12:59:59,884] INFO spindle_dev.index:  Using epsilon-net clustering for block 11
[2026-01-19 12:59:59,954] INFO spindle_dev.index:  Finished block 11 in 0.07 seconds, found 2 clusters.
[2026-01-19 12:59:59,954] INFO spindle_dev.index:  Using epsilon-net clustering for block 12
[2026-01-19 13:00:00,029] INFO spindle_dev.index:  Finished block 12 in 0.07 seconds, found 2 clusters.
[2026-01-19 13:00:00,030] INFO spindle_dev.index:  Using epsilon-net clustering for block 13
[2026-01-19 13:00:00,076] INFO spindle_dev.index:  Finished block 13 in 0.05 seconds, found 2 clusters.
[2026-01-19 13:00:00,077] INFO spindle_dev.index:  Using epsilon-net clustering for block 14
[2026-01-19 13:00:00,128] INFO spindle_dev.index:  Finished block 14 in 0.05 seconds, found 3 clusters.
[2026-01-19 13:00:00,129] INFO spindle_dev.index:  Using epsilon-net clustering for block 15
[2026-01-19 13:00:00,338] INFO spindle_dev.index:  Finished block 15 in 0.21 seconds, found 1 clusters.
[2026-01-19 13:00:00,339] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters.
[2026-01-19 13:00:00,339] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by
[2026-01-19 13:00:00,339] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them?
[2026-01-19 13:00:00,340] INFO spindle_dev.index: We will use triangle inequality to order clusters.
[2026-01-19 13:00:00,340] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs.
[2026-01-19 13:00:00,346] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list
[2026-01-19 13:00:00,347] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances.
[2026-01-19 13:00:00,347] INFO spindle_dev.index: Processing cluster 1
[2026-01-19 13:00:00,347] INFO spindle_dev.index: Building SPD index with epsilon=8.204116141691717
[2026-01-19 13:00:00,348] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices.
[2026-01-19 13:00:00,375] INFO spindle_dev.index: Cluster 1: 567 SPDs, 14 blocks
[2026-01-19 13:00:00,375] INFO spindle_dev.index:  Using epsilon-net clustering for block 0
[2026-01-19 13:00:00,416] INFO spindle_dev.index:  Finished block 0 in 0.04 seconds, found 4 clusters.
[2026-01-19 13:00:00,417] INFO spindle_dev.index:  Using epsilon-net clustering for block 1
[2026-01-19 13:00:00,457] INFO spindle_dev.index:  Finished block 1 in 0.04 seconds, found 4 clusters.
[2026-01-19 13:00:00,458] INFO spindle_dev.index:  Using epsilon-net clustering for block 2
[2026-01-19 13:00:00,490] INFO spindle_dev.index:  Finished block 2 in 0.03 seconds, found 2 clusters.
[2026-01-19 13:00:00,491] INFO spindle_dev.index:  Using epsilon-net clustering for block 3
[2026-01-19 13:00:00,538] INFO spindle_dev.index:  Finished block 3 in 0.05 seconds, found 5 clusters.
[2026-01-19 13:00:00,538] INFO spindle_dev.index:  Using epsilon-net clustering for block 4
[2026-01-19 13:00:00,622] INFO spindle_dev.index:  Finished block 4 in 0.08 seconds, found 10 clusters.
[2026-01-19 13:00:00,622] INFO spindle_dev.index:  Using epsilon-net clustering for block 5
[2026-01-19 13:00:00,679] INFO spindle_dev.index:  Finished block 5 in 0.06 seconds, found 3 clusters.
[2026-01-19 13:00:00,679] INFO spindle_dev.index:  Using epsilon-net clustering for block 6
[2026-01-19 13:00:00,794] INFO spindle_dev.index:  Finished block 6 in 0.11 seconds, found 3 clusters.
[2026-01-19 13:00:00,795] INFO spindle_dev.index:  Using epsilon-net clustering for block 7
[2026-01-19 13:00:00,862] INFO spindle_dev.index:  Finished block 7 in 0.07 seconds, found 2 clusters.
[2026-01-19 13:00:00,862] INFO spindle_dev.index:  Using epsilon-net clustering for block 8
[2026-01-19 13:00:00,932] INFO spindle_dev.index:  Finished block 8 in 0.07 seconds, found 3 clusters.
[2026-01-19 13:00:00,933] INFO spindle_dev.index:  Using epsilon-net clustering for block 9
[2026-01-19 13:00:00,976] INFO spindle_dev.index:  Finished block 9 in 0.04 seconds, found 4 clusters.
[2026-01-19 13:00:00,977] INFO spindle_dev.index:  Using epsilon-net clustering for block 10
[2026-01-19 13:00:01,036] INFO spindle_dev.index:  Finished block 10 in 0.06 seconds, found 2 clusters.
[2026-01-19 13:00:01,036] INFO spindle_dev.index:  Using epsilon-net clustering for block 11
[2026-01-19 13:00:01,115] INFO spindle_dev.index:  Finished block 11 in 0.08 seconds, found 2 clusters.
[2026-01-19 13:00:01,116] INFO spindle_dev.index:  Using epsilon-net clustering for block 12
[2026-01-19 13:00:01,196] INFO spindle_dev.index:  Finished block 12 in 0.08 seconds, found 6 clusters.
[2026-01-19 13:00:01,196] INFO spindle_dev.index:  Using epsilon-net clustering for block 13
[2026-01-19 13:00:01,255] INFO spindle_dev.index:  Finished block 13 in 0.06 seconds, found 7 clusters.
[2026-01-19 13:00:01,256] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters.
[2026-01-19 13:00:01,256] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by
[2026-01-19 13:00:01,256] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them?
[2026-01-19 13:00:01,257] INFO spindle_dev.index: We will use triangle inequality to order clusters.
[2026-01-19 13:00:01,257] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs.
[2026-01-19 13:00:01,261] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list
[2026-01-19 13:00:01,262] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances.
[2026-01-19 13:00:01,262] INFO spindle_dev.index: Processing cluster 2
[2026-01-19 13:00:01,262] INFO spindle_dev.index: Building SPD index with epsilon=6.784064466997531
[2026-01-19 13:00:01,263] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices.
[2026-01-19 13:00:01,286] INFO spindle_dev.index: Cluster 2: 504 SPDs, 16 blocks
[2026-01-19 13:00:01,286] INFO spindle_dev.index:  Using epsilon-net clustering for block 0
[2026-01-19 13:00:01,335] INFO spindle_dev.index:  Finished block 0 in 0.05 seconds, found 5 clusters.
[2026-01-19 13:00:01,336] INFO spindle_dev.index:  Using epsilon-net clustering for block 1
[2026-01-19 13:00:01,364] INFO spindle_dev.index:  Finished block 1 in 0.03 seconds, found 2 clusters.
[2026-01-19 13:00:01,364] INFO spindle_dev.index:  Using epsilon-net clustering for block 2
[2026-01-19 13:00:01,393] INFO spindle_dev.index:  Finished block 2 in 0.03 seconds, found 2 clusters.
[2026-01-19 13:00:01,394] INFO spindle_dev.index:  Using epsilon-net clustering for block 3
[2026-01-19 13:00:01,424] INFO spindle_dev.index:  Finished block 3 in 0.03 seconds, found 2 clusters.
[2026-01-19 13:00:01,424] INFO spindle_dev.index:  Using epsilon-net clustering for block 4
[2026-01-19 13:00:01,464] INFO spindle_dev.index:  Finished block 4 in 0.04 seconds, found 3 clusters.
[2026-01-19 13:00:01,465] INFO spindle_dev.index:  Using epsilon-net clustering for block 5
[2026-01-19 13:00:01,507] INFO spindle_dev.index:  Finished block 5 in 0.04 seconds, found 5 clusters.
[2026-01-19 13:00:01,508] INFO spindle_dev.index:  Using epsilon-net clustering for block 6
[2026-01-19 13:00:01,552] INFO spindle_dev.index:  Finished block 6 in 0.04 seconds, found 4 clusters.
[2026-01-19 13:00:01,552] INFO spindle_dev.index:  Using epsilon-net clustering for block 7
[2026-01-19 13:00:01,583] INFO spindle_dev.index:  Finished block 7 in 0.03 seconds, found 2 clusters.
[2026-01-19 13:00:01,584] INFO spindle_dev.index:  Using epsilon-net clustering for block 8
[2026-01-19 13:00:01,627] INFO spindle_dev.index:  Finished block 8 in 0.04 seconds, found 5 clusters.
[2026-01-19 13:00:01,627] INFO spindle_dev.index:  Using epsilon-net clustering for block 9
[2026-01-19 13:00:01,682] INFO spindle_dev.index:  Finished block 9 in 0.05 seconds, found 8 clusters.
[2026-01-19 13:00:01,682] INFO spindle_dev.index:  Using epsilon-net clustering for block 10
[2026-01-19 13:00:01,754] INFO spindle_dev.index:  Finished block 10 in 0.07 seconds, found 4 clusters.
[2026-01-19 13:00:01,755] INFO spindle_dev.index:  Using epsilon-net clustering for block 11
[2026-01-19 13:00:01,904] INFO spindle_dev.index:  Finished block 11 in 0.15 seconds, found 4 clusters.
[2026-01-19 13:00:01,905] INFO spindle_dev.index:  Using epsilon-net clustering for block 12
[2026-01-19 13:00:01,952] INFO spindle_dev.index:  Finished block 12 in 0.05 seconds, found 4 clusters.
[2026-01-19 13:00:01,953] INFO spindle_dev.index:  Using epsilon-net clustering for block 13
[2026-01-19 13:00:02,001] INFO spindle_dev.index:  Finished block 13 in 0.05 seconds, found 5 clusters.
[2026-01-19 13:00:02,001] INFO spindle_dev.index:  Using epsilon-net clustering for block 14
[2026-01-19 13:00:02,043] INFO spindle_dev.index:  Finished block 14 in 0.04 seconds, found 4 clusters.
[2026-01-19 13:00:02,044] INFO spindle_dev.index:  Using epsilon-net clustering for block 15
[2026-01-19 13:00:02,083] INFO spindle_dev.index:  Finished block 15 in 0.04 seconds, found 3 clusters.
[2026-01-19 13:00:02,084] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters.
[2026-01-19 13:00:02,084] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by
[2026-01-19 13:00:02,085] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them?
[2026-01-19 13:00:02,085] INFO spindle_dev.index: We will use triangle inequality to order clusters.
[2026-01-19 13:00:02,085] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs.
[2026-01-19 13:00:02,090] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list
[2026-01-19 13:00:02,090] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances.
[2026-01-19 13:00:02,090] INFO spindle_dev.index: Processing cluster 3
[2026-01-19 13:00:02,091] INFO spindle_dev.index: Building SPD index with epsilon=8.425444379709184
[2026-01-19 13:00:02,091] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices.
[2026-01-19 13:00:02,105] INFO spindle_dev.index: Cluster 3: 277 SPDs, 15 blocks
[2026-01-19 13:00:02,105] INFO spindle_dev.index:  Using epsilon-net clustering for block 0
[2026-01-19 13:00:02,125] INFO spindle_dev.index:  Finished block 0 in 0.02 seconds, found 3 clusters.
[2026-01-19 13:00:02,125] INFO spindle_dev.index:  Using epsilon-net clustering for block 1
[2026-01-19 13:00:02,142] INFO spindle_dev.index:  Finished block 1 in 0.02 seconds, found 3 clusters.
[2026-01-19 13:00:02,143] INFO spindle_dev.index:  Using epsilon-net clustering for block 2
[2026-01-19 13:00:02,157] INFO spindle_dev.index:  Finished block 2 in 0.01 seconds, found 2 clusters.
[2026-01-19 13:00:02,158] INFO spindle_dev.index:  Using epsilon-net clustering for block 3
[2026-01-19 13:00:02,179] INFO spindle_dev.index:  Finished block 3 in 0.02 seconds, found 3 clusters.
[2026-01-19 13:00:02,179] INFO spindle_dev.index:  Using epsilon-net clustering for block 4
[2026-01-19 13:00:02,197] INFO spindle_dev.index:  Finished block 4 in 0.02 seconds, found 3 clusters.
[2026-01-19 13:00:02,197] INFO spindle_dev.index:  Using epsilon-net clustering for block 5
[2026-01-19 13:00:02,215] INFO spindle_dev.index:  Finished block 5 in 0.02 seconds, found 3 clusters.
[2026-01-19 13:00:02,216] INFO spindle_dev.index:  Using epsilon-net clustering for block 6
[2026-01-19 13:00:02,242] INFO spindle_dev.index:  Finished block 6 in 0.03 seconds, found 4 clusters.
[2026-01-19 13:00:02,243] INFO spindle_dev.index:  Using epsilon-net clustering for block 7
[2026-01-19 13:00:02,259] INFO spindle_dev.index:  Finished block 7 in 0.02 seconds, found 2 clusters.
[2026-01-19 13:00:02,260] INFO spindle_dev.index:  Using epsilon-net clustering for block 8
[2026-01-19 13:00:02,276] INFO spindle_dev.index:  Finished block 8 in 0.02 seconds, found 2 clusters.
[2026-01-19 13:00:02,277] INFO spindle_dev.index:  Using epsilon-net clustering for block 9
[2026-01-19 13:00:02,298] INFO spindle_dev.index:  Finished block 9 in 0.02 seconds, found 2 clusters.
[2026-01-19 13:00:02,299] INFO spindle_dev.index:  Using epsilon-net clustering for block 10
[2026-01-19 13:00:02,316] INFO spindle_dev.index:  Finished block 10 in 0.02 seconds, found 2 clusters.
[2026-01-19 13:00:02,317] INFO spindle_dev.index:  Using epsilon-net clustering for block 11
[2026-01-19 13:00:02,336] INFO spindle_dev.index:  Finished block 11 in 0.02 seconds, found 3 clusters.
[2026-01-19 13:00:02,336] INFO spindle_dev.index:  Using epsilon-net clustering for block 12
[2026-01-19 13:00:02,412] INFO spindle_dev.index:  Finished block 12 in 0.08 seconds, found 2 clusters.
[2026-01-19 13:00:02,412] INFO spindle_dev.index:  Using epsilon-net clustering for block 13
[2026-01-19 13:00:02,441] INFO spindle_dev.index:  Finished block 13 in 0.03 seconds, found 2 clusters.
[2026-01-19 13:00:02,441] INFO spindle_dev.index:  Using epsilon-net clustering for block 14
[2026-01-19 13:00:02,487] INFO spindle_dev.index:  Finished block 14 in 0.05 seconds, found 2 clusters.
[2026-01-19 13:00:02,487] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters.
[2026-01-19 13:00:02,488] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by
[2026-01-19 13:00:02,488] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them?
[2026-01-19 13:00:02,488] INFO spindle_dev.index: We will use triangle inequality to order clusters.
[2026-01-19 13:00:02,489] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs.
[2026-01-19 13:00:02,491] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list
[2026-01-19 13:00:02,492] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances.
In [21]:
search_cfg = search.SearchConfig(max_results=2, debug=False, max_failed_starts=5, max_failed_paths=10)
In [22]:
test_results = test.run_sanity_search(data, dag_dict, config, search_cfg, max_queries=2000, skip_baseline=True)
[2026-01-19 13:00:02,893] INFO spindle_dev.test: Built ground-truth paths for 733 SPDs in cluster 0 across 16 blocks.
[2026-01-19 13:00:03,058] INFO spindle_dev.test: Built ground-truth paths for 567 SPDs in cluster 1 across 14 blocks.
[2026-01-19 13:00:03,208] INFO spindle_dev.test: Built ground-truth paths for 504 SPDs in cluster 2 across 16 blocks.
[2026-01-19 13:00:03,253] INFO spindle_dev.test: Built ground-truth paths for 277 SPDs in cluster 3 across 15 blocks.
[2026-01-19 13:00:03,254] INFO spindle_dev.test: Created ground-truth paths for all clusters.
[2026-01-19 13:00:03,254] INFO spindle_dev.test: Search config: SearchConfig(max_results=2, max_failed_starts=5, max_failed_paths=10, total_paths_limit=1000, deterministic=DeterministicConfig(seed=0), debug=False)
[2026-01-19 13:00:03,255] INFO spindle_dev.test: Running sanity search with 2000 queries.
100%|██████████| 2000/2000 [04:41<00:00,  7.11it/s]
[2026-01-19 13:04:44,476] INFO spindle_dev.test: Sanity search: 2000 queries, 2000 exact path matches, 2000 leaf matches, mean search_time=0.1399s
In [181]:
fig = plotting.visualize_block_dag_sankey_scaled(dag_dict[2].nodes, height=800, thickness=20.0)
fig
In [24]:
#fig.write_image("/data/sarkar_lab/Projects/spindle_dev/ISMB_notebook/figures/breast_cluster_0_block_dag_sankey.png",  engine="kaleido", width=2000, height=1200, scale=2)
In [25]:
import pandas as pd
def get_dag_stats(dag_dict):
    dag_stats = []
    for cluster_id, cluster_dag in dag_dict.items():
        block_to_node = cluster_dag.block_to_node_indices
        for node in cluster_dag.nodes:
            
            num_spds = len(node.metadata.members)
            block_size = node.metadata.mean.shape[0]
            block_id = node.block_index
            radius = node.metadata.radius
            
            #print(f"Cluster_id {cluster_id} Node id {node.global_node_id} Block {block_id}: {num_spds} spds")
            dag_stats.append({
                "cluster_id": cluster_id,
                "node_id": node.global_node_id,
                "block_id": block_id,
                "block_cluster_id": node.block_cluster_id,
                "num_spds": num_spds,
                "radius": radius,
                "block_size": block_size
            })
    dag_stats_df = pd.DataFrame(dag_stats)
    return dag_stats_df
In [26]:
dag_stats_df = get_dag_stats(dag_dict)
In [117]:
importlib.reload(index)
Out[117]:
<module 'spindle_dev.index' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/index.py'>
In [118]:
score_list = []
for cluster_id in set(data.labels):
    cluster_dag = dag_dict[cluster_id]
    cluster_score = index.score_nodes_from_a_cluster(data, cluster_id, dag_dict, take_representative_mean=True)
    score_list.extend(cluster_score)
100%|██████████| 45/45 [00:00<00:00, 326.93it/s]
100%|██████████| 57/57 [00:00<00:00, 228.34it/s]
100%|██████████| 62/62 [00:00<00:00, 243.52it/s]
100%|██████████| 38/38 [00:00<00:00, 282.96it/s]
In [119]:
node_scores = []
for node_score in score_list:
    node_scores.append({
        "cluster_id": node_score["cluster_id"],
        "block_id": node_score["block_id"],
        "node_id": node_score["node_id"],
        "node_score": node_score["node_score"],
        "num_spds": node_score["num_spds"],
        "block_size": node_score["block_size"],
        "E": node_score["E"],
        "Q": node_score["Q"],
        "S": node_score["S"],
        "num_modules": len(node_score["modules"])
    })
node_score_df = pd.DataFrame(node_scores)
In [121]:
node_score_df.sort_values(by='node_score', ascending=False).head(20)
Out[121]:
cluster_id block_id node_id node_score num_spds block_size E Q S num_modules
75 1 6 30 0.239966 1 38 1.932651 0.254274 0.511693 3
146 2 11 44 0.165974 1 49 1.627859 0.164071 0.378574 4
5 0 1 5 0.159690 91 15 0.774484 0.390674 0.472224 4
94 1 12 49 0.158631 13 26 1.273377 0.231421 0.461699 4
74 1 6 29 0.156417 5 38 1.526276 0.186053 0.449176 6
156 2 13 54 0.150369 2 18 0.935547 0.288826 0.443511 5
60 1 4 15 0.148257 397 21 0.976389 0.243900 0.377444 7
145 2 11 43 0.148110 1 49 2.225433 0.125890 0.471341 4
89 1 12 44 0.140268 543 26 1.321063 0.188080 0.435467 3
173 3 3 9 0.136184 49 20 0.794279 0.282274 0.392594 4
80 1 8 35 0.132119 1 27 1.412164 0.179984 0.480190 4
79 1 8 34 0.131210 2 27 1.464330 0.189470 0.527084 4
198 3 13 34 0.125699 266 26 0.862653 0.251575 0.420804 3
201 3 14 37 0.113385 11 36 1.961125 0.121871 0.525596 3
52 1 1 7 0.105444 91 15 0.571309 0.322258 0.427276 5
86 1 10 41 0.104808 3 25 1.835981 0.124215 0.540434 3
131 2 8 29 0.104168 42 15 0.792322 0.248719 0.471405 3
77 1 7 32 0.101813 3 28 2.014848 0.139933 0.638893 4
91 1 12 46 0.101234 1 26 1.344811 0.142968 0.473469 5
97 1 13 52 0.100772 1 16 1.536831 0.159211 0.588153 2
In [156]:
importlib.reload(plotting)
Out[156]:
<module 'spindle_dev.plotting' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/plotting.py'>
In [217]:
def get_module_info(id):
    """
    Return metadata + L_mean/index_handle WITHOUT making any plots.
    """
    # Take the top scored node
    cluster_id = int(node_score_df.iloc[id]["cluster_id"])
    node_id    = int(node_score_df.iloc[id]["node_id"])

    block_id = int(
        dag_stats_df.loc[
            (dag_stats_df["cluster_id"] == cluster_id) & (dag_stats_df["node_id"] == node_id),
            "block_id",
        ].values[0]
    )

    print(f"Top scored node is in cluster {cluster_id}, node {node_id}, block {block_id}")

    # Most frequent center (kept for your debug / parity, not strictly needed for L_mean retrieval below)
    _ = dag_stats_df.loc[
        (dag_stats_df["cluster_id"] == cluster_id) & (dag_stats_df["block_id"] == block_id)
    ].sort_values(by="num_spds", ascending=False).node_id.values[0]

    index_handle = dag_dict[cluster_id]

    # Representative log-mean reference for this block
    L_mean = index.get_node_log_ref(index_handle, block_id, use_representative_mean=True)

    return cluster_id, node_id, block_id, L_mean, index_handle
def _fig_to_rgb_array(fig):
    from io import BytesIO
    from PIL import Image
    
    # Ensure figure is rendered
    try:
        fig.canvas.draw()
    except:
        pass
    
    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=100, bbox_inches='tight', pad_inches=0.05)
    buf.seek(0)
    img = np.array(Image.open(buf))
    if img.shape[2] == 4:
        # If RGBA, convert to RGB
        img = img[..., :3]
    buf.close()
    return img

def _as_figure(fig_or_tuple):
    # Accept fig, or (fig, ax), or (fig, axs), etc.
    if isinstance(fig_or_tuple, tuple):
        return fig_or_tuple[0]
    return fig_or_tuple

def plot_module_heatmap_plus_spatial(id, module_ids=(0, 1), zero_corr_threshold=0.05):
    """
    One panel:
      - Left: corr heatmap with modules (auto-sized based on L_mean.shape[0])
      - Right: spatial module score plots for module_ids (stacked, constant size)
    """
    # Get metadata
    cluster_id = int(node_score_df.iloc[id]["cluster_id"])
    node_id = int(node_score_df.iloc[id]["node_id"])
    block_id = int(
        dag_stats_df.loc[
            (dag_stats_df["cluster_id"] == cluster_id) & (dag_stats_df["node_id"] == node_id),
            "block_id",
        ].values[0]
    )
    index_handle = dag_dict[cluster_id]
    L_mean = index.get_node_log_ref(index_handle, block_id, use_representative_mean=True)
    p = int(L_mean.shape[0])

    print(f"Creating combined figure for cluster {cluster_id}, node {node_id}, block {block_id}")

    # Make the heatmap and embed as image
    fig_hm, ax_hm, ordered_genes, gene_to_mod = plotting.plot_corr_heatmap_with_modules(
        score_list[id],
        show_values=False,
        value_fontsize=10,
        bold_labels=True,
        figsize=(8, 7),
        filter_zero_correlation=True,
        zero_corr_threshold=zero_corr_threshold,
    )
    hm_img = _fig_to_rgb_array(fig_hm)
    plt.close(fig_hm)

    # Build the combined figure
    heat_w = max(6.5, 0.25 * p)
    heat_h = max(5.0, 0.21 * p)
    spatial_w = 5.5
    total_w = heat_w + spatial_w
    total_h = max(heat_h, 6.5)

    fig = plt.figure(figsize=(total_w, total_h), constrained_layout=False)
    gs = fig.add_gridspec(
        nrows=2, ncols=2,
        width_ratios=[heat_w, spatial_w],
        height_ratios=[1, 1],
        wspace=0.05, hspace=0.10
    )

    ax_heat = fig.add_subplot(gs[:, 0])
    ax_sp0  = fig.add_subplot(gs[0, 1])
    ax_sp1  = fig.add_subplot(gs[1, 1])

    # Place heatmap image
    ax_heat.imshow(hm_img)
    ax_heat.set_axis_off()
    ax_heat.set_title(f"Cluster {cluster_id} | Node {node_id} | Block {block_id}", fontsize=10, pad=6)

    # Create spatial plots directly into axes (NO intermediate figures!)
    scores_final = None
    for ax_sp, mid in zip([ax_sp0, ax_sp1], module_ids):
        # print(f"Getting module {mid} score for cluster {cluster_id}, node {node_id}, block {block_id}")
        
        scores_tiles_wrt_ref, info = index.tile_module_scores_from_reference(
            data,
            score_list[id]["modules"][mid],
            index_handle,
            block_id,
            L_mean,
            cluster_id,
        )
        
        spot_score = plotting.assign_module_score_to_spots(data, scores_tiles_wrt_ref)
        scores = np.array(list(spot_score.values()))
        scores_final = scores
        
        # Plot directly into the axis - no intermediate figure!
        plotting.plot_spot_module_scores(
            spot_score, tiles, adata,
            ax=ax_sp,  # Pass the axis directly!
            grid=False,
            label=f"Module {mid}",
        )
        ax_sp.set_title(f"Module {mid}", fontsize=10, pad=4)

    return scores_final, fig
In [142]:
# This function is deprecated - use plot_module_heatmap_plus_spatial instead
# def get_module_info(id):
#     # Take the top scored node
#     cluster_id = int(node_score_df.iloc[id]['cluster_id'])
#     node_id = int(node_score_df.iloc[id]['node_id'])
#     block_id = int(dag_stats_df[(dag_stats_df['cluster_id'] == cluster_id) & (dag_stats_df['node_id'] == node_id)]['block_id'].values[0])
#     print(f"Top scored node is in cluster {cluster_id}, node {node_id}, block {block_id}")
#     fig, ax, ordered_genes, gene_to_mod = plotting.plot_corr_heatmap_with_modules(score_list[id], 
#         show_values=False, value_fontsize=10, bold_labels=True, figsize=(8,7), filter_zero_correlation=True, zero_corr_threshold=0.05)
#     # save
#     # fig.savefig(f"/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X/corr_heatmap_cluster{cluster_id}_node_{node_id}.png", dpi=300)
# 
#     # Find out the most frequent center in the same block
#     freq_node_center =  dag_stats_df.loc[(dag_stats_df['cluster_id'] == cluster_id) & (dag_stats_df['block_id'] == block_id)].sort_values(by='num_spds', ascending=False).node_id.values[0]
#     index_handle = dag_dict[cluster_id]
#     #L_mean = dag_dict[cluster_id].nodes[freq_node_center].metadata.representative_mean
#     L_mean = index.get_node_log_ref(index_handle, block_id, use_representative_mean=True)
#     return cluster_id, node_id, block_id, L_mean, index_handle
In [219]:
scores_146, fig_146 = plot_module_heatmap_plus_spatial(146)
fig_146.savefig("/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/module_146_heatmap_spatial.png", dpi=300)
Creating combined figure for cluster 2, node 44, block 11
No description has been provided for this image
In [220]:
out_146 = go_score.enrich_modules_with_gseapy(score_list[146]['modules'])
  Module 0: enrichment completed with 401 terms
  Module 1: enrichment completed with 340 terms
  Module 2: enrichment completed with 270 terms
  Module 3: enrichment completed with 206 terms
In [ ]:
 
In [227]:
for module_id, module_genes, res in out_146:
    if module_id == 1:
        print(module_genes)
        fig, axes = go_score.plot_module_enrichment_libraries(
            module_id=module_id,
            module_genes=module_genes,
            res=res,
            top_n=7,
            ncols=2,
            bar_color="#D41515EE",   # pick your color
            tick_fontsize=10,
            label_fontsize=10,
            title_fontsize=8,
            compact_height_per_term=0.2,
            min_fig_h=6,
            save_dir="/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/go_enrichment/"
        )
        if fig is not None:
            plt.show()
['ADAM9', 'CCDC6', 'CCND1', 'CD9', 'DSP', 'ELF3', 'EPCAM', 'JUP', 'KRT7', 'KRT8', 'S100A14', 'SERPINA3', 'TACSTD2', 'TRAF4']
['GO_Biological_Process_2023', 'KEGG_2021_Human', 'MSigDB_Hallmark_2020', 'Reactome_2022']
No description has been provided for this image
In [158]:
scores_156, fig_156 = plot_module_heatmap_plus_spatial(156)
Creating combined figure for cluster 2, node 54, block 13
Creating combined figure for cluster 2, node 54, block 13
No description has been provided for this image
In [175]:
# Create a dictionary of tile_id to block_id
# Clusters do not share tile ids across them
from collections import defaultdict
tile_to_block_dict = {}
cluster_to_tile = defaultdict(set)
for cluster_id, cluster_dag in dag_dict.items():
    block_to_node = cluster_dag.block_to_node_indices
    for node in cluster_dag.nodes:
        member_tile_ids = [tid for tid,_ in node.metadata.members]
        for tile_id in member_tile_ids:
            if not tile_id in tile_to_block_dict:
                tile_to_block_dict[tile_id] = {}
            tile_to_block_dict[tile_id][node.block_index] = node.block_cluster_id
            cluster_to_tile[cluster_id].add(tile_id)
In [193]:
importlib.reload(plotting)
Out[193]:
<module 'spindle_dev.plotting' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/plotting.py'>
In [196]:
def get_spot_score_dict(block_id, cluster_id, tile_to_block_dict, grid=False):
    spot_score = {}
    tiles_of_cluster = cluster_to_tile[cluster_id]
    for tid in tiles_of_cluster:
        if tid not in tile_to_block_dict:
            continue
        c = tile_to_block_dict[tid][block_id]
        t = data.metadata['tiles'][tid]
        for id in t.idx:
            spot_score[id] = c
    fig, ax = plotting.plot_spot_module_scores(spot_score, tiles, adata, color='discrete', grid=grid)
    return fig, ax
In [207]:
fig, ax = get_spot_score_dict(5, 2, tile_to_block_dict, grid=False)
No description has been provided for this image
In [208]:
node_score_df.loc[(node_score_df['cluster_id'] == 2) & (node_score_df['block_id'] == 5)]
Out[208]:
cluster_id block_id node_id node_score num_spds block_size E Q S num_modules
116 2 5 14 0.001398 350 15 1.661521 0.010595 0.920589 2
117 2 5 15 0.003958 2 15 0.563854 0.051842 0.864585 4
118 2 5 16 0.001554 22 15 0.287832 0.029581 0.817513 5
119 2 5 17 0.010833 67 15 0.351048 0.100392 0.692624 3
120 2 5 18 0.007216 63 15 0.419383 0.075795 0.772996 4
In [214]:
scores_116, fig_116 = plot_module_heatmap_plus_spatial(116)
Creating combined figure for cluster 2, node 14, block 5
No description has been provided for this image
In [215]:
fig_116.savefig("/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/module_heatmap_plus_spatial_node_116.png", dpi=300)
In [ ]:
 
In [ ]:
scores_102, fig_102 = plot_module_heatmap_plus_spatial()
In [211]:
out = go_score.enrich_modules_with_gseapy(score_list[116]['modules'])
  Module 0: enrichment completed with 239 terms
  Module 1: skipped (only 2 genes)
In [212]:
for module_id, module_genes, res in out:
    fig, axes = go_score.plot_module_enrichment_libraries(
        module_id=module_id,
        module_genes=module_genes,
        res=res,
        top_n=7,
        ncols=2,
        bar_color="#D41515EE",   # pick your color
        tick_fontsize=10,
        label_fontsize=8,
        title_fontsize=8,
        compact_height_per_term=0.2,
        min_fig_h=8,
        #save_dir="/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X/go_enrichment/"
    )
    if fig is not None:
        plt.show()
['GO_Biological_Process_2023', 'KEGG_2021_Human', 'MSigDB_Hallmark_2020', 'Reactome_2022']
No description has been provided for this image
In [ ]:
 
In [ ]:
 
In [168]:
importlib.reload(plotting)
Out[168]:
<module 'spindle_dev.plotting' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/plotting.py'>
In [ ]:
 
In [165]:
len(tile_to_block_dict)
Out[165]:
2081
In [ ]:
 
In [166]:
len(tiles)
Out[166]:
2081
In [ ]:
 
In [162]:
dag_stats_df[['cluster_id', 'block_id', 'block_cluster_id', ]]
Out[162]:
cluster_id block_id block_cluster_id
0 0 0 0
1 0 0 1
2 0 1 0
3 0 1 1
4 0 1 2
... ... ... ...
197 3 12 1
198 3 13 0
199 3 13 1
200 3 14 0
201 3 14 1

202 rows × 3 columns

In [ ]:
 
In [123]:
node_score_df.sort_values(by='node_score', ascending=False).head(20)
Out[123]:
cluster_id block_id node_id node_score num_spds block_size E Q S num_modules
75 1 6 30 0.239966 1 38 1.932651 0.254274 0.511693 3
146 2 11 44 0.165974 1 49 1.627859 0.164071 0.378574 4
5 0 1 5 0.159690 91 15 0.774484 0.390674 0.472224 4
94 1 12 49 0.158631 13 26 1.273377 0.231421 0.461699 4
74 1 6 29 0.156417 5 38 1.526276 0.186053 0.449176 6
156 2 13 54 0.150369 2 18 0.935547 0.288826 0.443511 5
60 1 4 15 0.148257 397 21 0.976389 0.243900 0.377444 7
145 2 11 43 0.148110 1 49 2.225433 0.125890 0.471341 4
89 1 12 44 0.140268 543 26 1.321063 0.188080 0.435467 3
173 3 3 9 0.136184 49 20 0.794279 0.282274 0.392594 4
80 1 8 35 0.132119 1 27 1.412164 0.179984 0.480190 4
79 1 8 34 0.131210 2 27 1.464330 0.189470 0.527084 4
198 3 13 34 0.125699 266 26 0.862653 0.251575 0.420804 3
201 3 14 37 0.113385 11 36 1.961125 0.121871 0.525596 3
52 1 1 7 0.105444 91 15 0.571309 0.322258 0.427276 5
86 1 10 41 0.104808 3 25 1.835981 0.124215 0.540434 3
131 2 8 29 0.104168 42 15 0.792322 0.248719 0.471405 3
77 1 7 32 0.101813 3 28 2.014848 0.139933 0.638893 4
91 1 12 46 0.101234 1 26 1.344811 0.142968 0.473469 5
97 1 13 52 0.100772 1 16 1.536831 0.159211 0.588153 2
In [ ]:
 
In [97]:
cluster_id, node_id, block_id, L_mean, index_handle = get_module_info(146)
Top scored node is in cluster 2, node 44, block 11
Module of size 20: mean abs corr = 0.2477
Adding module of size 20 with mean abs corr = 0.2477
Module of size 14: mean abs corr = 0.4084
Adding module of size 14 with mean abs corr = 0.4084
Module of size 8: mean abs corr = 0.2126
Adding module of size 8 with mean abs corr = 0.2126
Module of size 7: mean abs corr = 0.2540
Adding module of size 7 with mean abs corr = 0.2540
Using 4 modules for heatmap plotting.
No description has been provided for this image
In [101]:
module_id = 1
scores_146, fig_spatial = get_module_score_plot_spatial(146, module_id, cluster_id, node_id, block_id, L_mean, index_handle)
Getting module 1 score for cluster 2, node 44, block 11
No description has been provided for this image
In [102]:
cluster_id, node_id, block_id, L_mean, index_handle = get_module_info(145)
Top scored node is in cluster 2, node 43, block 11
Module of size 28: mean abs corr = 0.2114
Adding module of size 28 with mean abs corr = 0.2114
Module of size 18: mean abs corr = 0.5462
Adding module of size 18 with mean abs corr = 0.5462
Module of size 2: mean abs corr = 0.5234
Adding module of size 2 with mean abs corr = 0.5234
Using 3 modules for heatmap plotting.
No description has been provided for this image
In [ ]:
 
In [103]:
module_id = 1
scores_145, fig_spatial = get_module_score_plot_spatial(145, module_id, cluster_id, node_id, block_id, L_mean, index_handle)
Getting module 1 score for cluster 2, node 43, block 11
No description has been provided for this image
In [113]:
plt.scatter(scores_145, scores_146, s=1, alpha=0.1)
plt.ylabel(f"Cluster{cluster_id} Module 1 Score in Node 43")
plt.xlabel(f"Cluster{cluster_id} Module 1 Score in Node 44")
Out[113]:
Text(0.5, 0, 'Cluster2 Module 1 Score in Node 44')
No description has been provided for this image
In [108]:
plt.hist(scores_145, bins=50, alpha=0.5);
plt.hist(scores_146, bins=50, alpha=0.5);
No description has been provided for this image
In [ ]:
 
In [89]:
#id = node_score_df.sort_values(by='node_score', ascending=False).index[1]
id = 146
# Take the top scored node
cluster_id = int(node_score_df.iloc[id]['cluster_id'])
node_id = int(node_score_df.iloc[id]['node_id'])
block_id = int(dag_stats_df[(dag_stats_df['cluster_id'] == cluster_id) & (dag_stats_df['node_id'] == node_id)]['block_id'].values[0])
print(f"Top scored node is in cluster {cluster_id}, node {node_id}, block {block_id}")
fig, ax, ordered_genes, gene_to_mod = plotting.plot_corr_heatmap_with_modules(score_list[id], 
    show_values=False, value_fontsize=10, bold_labels=True, figsize=(8,7), filter_zero_correlation=True, zero_corr_threshold=0.05)
# save
fig.savefig(f"/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X/corr_heatmap_cluster{cluster_id}_node_{node_id}.png", dpi=300)

# Find out the most frequent center in the same block
freq_node_center =  dag_stats_df.loc[(dag_stats_df['cluster_id'] == cluster_id) & (dag_stats_df['block_id'] == block_id)].sort_values(by='num_spds', ascending=False).node_id.values[0]
index_handle = dag_dict[cluster_id]
#L_mean = dag_dict[cluster_id].nodes[freq_node_center].metadata.representative_mean
L_mean = index.get_node_log_ref(index_handle, block_id, use_representative_mean=True)
Top scored node is in cluster 2, node 44, block 11
Module of size 20: mean abs corr = 0.2477
Adding module of size 20 with mean abs corr = 0.2477
Module of size 14: mean abs corr = 0.4084
Adding module of size 14 with mean abs corr = 0.4084
Module of size 8: mean abs corr = 0.2126
Adding module of size 8 with mean abs corr = 0.2126
Module of size 7: mean abs corr = 0.2540
Adding module of size 7 with mean abs corr = 0.2540
Using 4 modules for heatmap plotting.
No description has been provided for this image
In [ ]:
 
In [90]:
importlib.reload(plotting)
Out[90]:
<module 'spindle_dev.plotting' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/plotting.py'>
In [91]:
importlib.reload(index)
Out[91]:
<module 'spindle_dev.index' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/index.py'>
In [92]:
# Differential LE score for all tiles for module 2
module_id = 1
scores_tiles_wrt_ref, info = index.tile_module_scores_from_reference(data, score_list[id]['modules'][module_id], index_handle, block_id, L_mean, cluster_id)
# assign module scores to spots
spot_score = plotting.assign_module_score_to_spots(data, scores_tiles_wrt_ref)
scores = np.array(list(spot_score.values()))
# fig, ax = plotting.plot_spot_module_scores(spot_score, tiles, adata,grid=False, figsize=(6,3.5), label="Module 2 Score")
# # save
# fig.savefig(f"/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/spatial_module{module_id}_score_cluster{cluster_id}_node{node_id}.png", dpi=300)
In [93]:
importlib.reload(plotting)
Out[93]:
<module 'spindle_dev.plotting' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/plotting.py'>
In [94]:
fig, ax = plotting.plot_spot_module_scores(spot_score, tiles, adata,grid=False, figsize=(6,3.5), label="Module 2 Score")
No description has been provided for this image
In [88]:
fig, ax = plotting.plot_spot_module_scores(spot_score, tiles, adata,grid=False, figsize=(6,3.5), label="Module 2 Score")
No description has been provided for this image
In [71]:
from spindle_dev import go_score
In [ ]:
out = go_score.enrich_modules_with_gseapy(score_list[146]['modules'])
  Module 0: enrichment completed with 401 terms
  Module 1: enrichment completed with 340 terms
  Module 2: enrichment completed with 270 terms
  Module 3: enrichment completed with 206 terms
In [76]:
for module_id, module_genes, res in out:
    fig, axes = go_score.plot_module_enrichment_libraries(
        module_id=module_id,
        module_genes=module_genes,
        res=res,
        top_n=7,
        ncols=2,
        bar_color="#D41515EE",   # pick your color
        tick_fontsize=10,
        label_fontsize=8,
        title_fontsize=8,
        compact_height_per_term=0.2,
        min_fig_h=8,
        #save_dir="/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X/go_enrichment/"
    )
    if fig is not None:
        plt.show()
['GO_Biological_Process_2023', 'KEGG_2021_Human', 'MSigDB_Hallmark_2020', 'Reactome_2022']
No description has been provided for this image
['GO_Biological_Process_2023', 'KEGG_2021_Human', 'MSigDB_Hallmark_2020', 'Reactome_2022']
No description has been provided for this image
['GO_Biological_Process_2023', 'KEGG_2021_Human', 'MSigDB_Hallmark_2020', 'Reactome_2022']
No description has been provided for this image
['GO_Biological_Process_2023', 'KEGG_2021_Human', 'MSigDB_Hallmark_2020', 'Reactome_2022']
No description has been provided for this image
In [39]:
from spindle_dev import index
importlib.reload(index)
Out[39]:
<module 'spindle_dev.index' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/index.py'>
In [40]:
import pandas as pd
In [41]:
cluster_id = 0
index_handle = dag_dict[cluster_id]
node_rows, per_node_tile_scores, tile_z = index.node_scores_from_reference(data, index_handle, cluster_id, representative_mean=True)
    
In [42]:
import pandas as pd
In [58]:
node_df = pd.DataFrame(node_rows).sort_values('z_scale', ascending=False)
In [60]:
node_score_df.loc[(node_score_df.cluster_id == cluster_id) & (node_score_df.node_id == 21)]
Out[60]:
cluster_id node_id node_score num_spds block_size E Q S num_modules
21 0 21 0.052074 659 15 0.758758 0.153221 0.552082 3
In [59]:
node_df.head(10)
Out[59]:
block_id cluster_id node_id node_index prevalence median_dist mad_dist quality interest k_genes z_median z_mad z_scale
21 7 0 21 21 1.0 3.609869 1.693648 0.004974 0.004974 15 3.609869 1.693648 2.511002
23 8 0 23 23 1.0 3.275327 1.256047 0.010766 0.010766 15 3.275327 1.256047 1.862216
16 5 0 16 16 1.0 3.077620 1.237488 0.013365 0.013365 15 3.077620 1.237488 1.834699
26 9 0 26 26 1.0 3.195635 1.227180 0.012000 0.012000 15 3.195635 1.227180 1.819417
5 1 0 5 5 1.0 4.973354 1.050387 0.002421 0.002421 15 4.973354 1.050387 1.557303
13 4 0 13 13 1.0 4.723527 0.958816 0.003406 0.003406 15 4.723527 0.958816 1.421540
10 3 0 10 10 1.0 5.474510 0.894123 0.001715 0.001715 15 5.474510 0.894123 1.325627
34 11 0 35 35 1.0 2.700770 0.753236 0.031619 0.031619 24 2.700770 0.753236 1.116748
30 10 0 30 30 1.0 3.227608 0.726557 0.019175 0.019175 18 3.227608 0.726557 1.077193
7 2 0 7 7 1.0 6.275040 0.642391 0.000990 0.000990 15 6.275040 0.642391 0.952408
In [61]:
id = node_score_df.index[21]
# Take the top scored node
cluster_id = int(node_score_df.iloc[id]['cluster_id'])
node_id = int(node_score_df.iloc[id]['node_id'])
block_id = int(dag_stats_df[(dag_stats_df['cluster_id'] == cluster_id) & (dag_stats_df['node_id'] == node_id)]['block_id'].values[0])
print(f"Top scored node is in cluster {cluster_id}, node {node_id}, block {block_id}")
fig, ax, ordered_genes, gene_to_mod = plotting.plot_corr_heatmap_with_modules(score_list[id], 
    show_values=False, value_fontsize=10, bold_labels=True, figsize=(8,7), filter_zero_correlation=True, zero_corr_threshold=0.05)
# save
fig.savefig(f"/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X/corr_heatmap_cluster{cluster_id}_node_{node_id}.png", dpi=300)

# Find out the most frequent center in the same block
freq_node_center =  dag_stats_df.loc[(dag_stats_df['cluster_id'] == cluster_id) & (dag_stats_df['block_id'] == block_id)].sort_values(by='num_spds', ascending=False).node_id.values[0]
index_handle = dag_dict[cluster_id]
#L_mean = dag_dict[cluster_id].nodes[freq_node_center].metadata.representative_mean
L_mean = index.get_node_log_ref(index_handle, block_id, use_representative_mean=True)
Top scored node is in cluster 0, node 21, block 7
Module of size 6: mean abs corr = 0.2030
Adding module of size 6 with mean abs corr = 0.2030
Module of size 5: mean abs corr = 0.2500
Adding module of size 5 with mean abs corr = 0.2500
Module of size 4: mean abs corr = 0.3630
Adding module of size 4 with mean abs corr = 0.3630
Using 3 modules for heatmap plotting.
No description has been provided for this image
In [62]:
# Differential LE score for all tiles for module 2
module_id = 2
scores_tiles_wrt_ref, info = index.tile_module_scores_from_reference(data, score_list[id]['modules'][module_id], index_handle, block_id, L_mean, cluster_id)
# assign module scores to spots
spot_score = plotting.assign_module_score_to_spots(data, scores_tiles_wrt_ref)
scores = np.array(list(spot_score.values()))
# fig, ax = plotting.plot_spot_module_scores(spot_score, tiles, adata,grid=False, figsize=(6,3.5), label="Module 2 Score")
# # save
# fig.savefig(f"/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/spatial_module{module_id}_score_cluster{cluster_id}_node{node_id}.png", dpi=300)
In [63]:
fig, ax = plotting.plot_spot_module_scores(spot_score, tiles, adata,grid=False, figsize=(6,3.5), label="Module 2 Score")
No description has been provided for this image
In [46]:
from collections import defaultdict

def block_dag_edges_consecutive(nodes, block_id_from_members=True):
    # group nodes by layer
    by_layer = defaultdict(list)
    for n in nodes:
        by_layer[n.block_index].append(n)
    layers = sorted(by_layer.keys())

    # stable order within each layer
    ordered_by_layer = {
        b: sorted(by_layer[b], key=lambda n: (n.order_id, n.global_node_id))
        for b in layers
    }

    # spd_id -> node_id in each layer
    spd_to_node_in_layer = {b: {} for b in layers}
    for b in layers:
        for n in ordered_by_layer[b]:
            for spd_id, blk_id in n.metadata.members:
                if (not block_id_from_members) or (blk_id == b):
                    spd_to_node_in_layer[b][spd_id] = n.global_node_id

    # edges between consecutive layers only (u -> v) counted by shared SPD ids
    edges = []  # list of (u_id, v_id, count_true)
    for b0, b1 in zip(layers[:-1], layers[1:]):
        m0 = spd_to_node_in_layer[b0]
        m1 = spd_to_node_in_layer[b1]
        common = set(m0.keys()) & set(m1.keys())

        cnt = defaultdict(int)
        for spd_id in common:
            cnt[(m0[spd_id], m1[spd_id])] += 1

        for (u, v), c in cnt.items():
            edges.append((u, v, c))

    return layers, ordered_by_layer, edges


def block_dag_to_dot(layers, ordered_by_layer, edges, node_label_fn=None, edge_label_fn=None):
    """
    node_label_fn(n) -> str
    edge_label_fn(count) -> str
    """
    if node_label_fn is None:
        node_label_fn = lambda n: str(n.global_node_id)
    if edge_label_fn is None:
        edge_label_fn = lambda c: str(c)

    # map node_id -> layer index (for rank)
    node_to_layer = {}
    for b in layers:
        for n in ordered_by_layer[b]:
            node_to_layer[n.global_node_id] = b

    lines = []
    lines.append("digraph G {")
    lines.append('  graph [rankdir=TB, splines=true, nodesep=0.25, ranksep=0.5];')
    lines.append('  node  [shape=circle, fixedsize=true, width=0.28, fontsize=10, penwidth=1];')
    lines.append('  edge  [fontsize=9, penwidth=1, arrowsize=0.6];')

    # --- ranks (each layer on one horizontal row)
    for b in layers:
        ids = [n.global_node_id for n in ordered_by_layer[b]]
        lines.append(f"  subgraph cluster_rank_{b} {{")
        lines.append("    rank=same;")
        # invisible box for rank grouping label if you want later
        for nid in ids:
            lines.append(f'    "{nid}";')
        lines.append("  }")

    # --- node labels
    for b in layers:
        for n in ordered_by_layer[b]:
            nid = n.global_node_id
            label = node_label_fn(n).replace('"', '\\"')
            lines.append(f'  "{nid}" [label="{label}"];')

    # --- edges with labels (counts)
    for u, v, c in edges:
        elab = edge_label_fn(c).replace('"', '\\"')
        lines.append(f'  "{u}" -> "{v}" [label="{elab}"];')

    lines.append("}")
    return "\n".join(lines)


def render_dot(dot_str, out_path, fmt="png"):
    """
    Requires:
      - system graphviz (dot)
      - python package: graphviz  (pip install graphviz)
    """
    from graphviz import Source
    src = Source(dot_str, format=fmt)
    # graphviz adds extension automatically; keep clean name
    filename = out_path.rsplit(".", 1)[0]
    return src.render(filename=filename, cleanup=True)
In [47]:
layers, ordered_by_layer, edges = block_dag_edges_consecutive(dag_dict[0].nodes)

dot = block_dag_to_dot(
    layers,
    ordered_by_layer,
    edges,
    node_label_fn=lambda n: f"{n.order_id}",   # or cluster id, or short name
    edge_label_fn=lambda c: str(c),            # true SPD counts
)

# save dot text
with open("block_dag.dot", "w") as f:
    f.write(dot)

# render (optional)
render_dot(dot, "block_dag.png", fmt="png")
Out[47]:
'block_dag.png'
In [ ]:
 
In [ ]:
 
In [48]:
from graphviz import Source
In [49]:
Source(dot)
Out[49]:
No description has been provided for this image
In [ ]: