In [1]:
# Set up sys.path so that 'src/spindle_dev' is importable as 'spindle_dev'
import sys
import importlib
from pathlib import Path
project_root = '/data/sarkar_lab/Projects/spindle_dev'
src_path = Path(project_root) / 'src'
if str(src_path) not in sys.path:
sys.path.insert(0, str(src_path))
import spindle_dev
# Reload to pick up code changes without restarting the kernel
importlib.reload(spindle_dev)
Out[1]:
<module 'spindle_dev' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/__init__.py'>
In [2]:
import scanpy as sc
import glob
In [3]:
h5ad_files = glob.glob("/data/sarkar_lab/insitupy_demo_data_xenium/*.h5ad")
In [4]:
import spindle_dev
import spindle_dev.metrics as metrics
import spindle_dev.index as index
import spindle_dev.preprocessing as preprocessing
import spindle_dev.plotting as plotting
import spindle_dev.test as test
import spindle_dev.search as search
import spindle_dev.typing as typing
import time
In [5]:
from pathlib import Path
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed
from joblib import Parallel, delayed
import argparse
import pandas as pd
In [6]:
h5ad_files = glob.glob("/data/sarkar_lab/insitupy_demo_data_xenium/*.h5ad")
In [ ]:
index_files = glob.glob("/data/sarkar_lab/Projects/spindle_dev/")
In [63]:
import os
import pickle
BASE = "/data/sarkar_lab/insitupy_demo_data_xenium"
# -----------------------------
# Index-level stats
# -----------------------------
index_dirs = glob.glob(os.path.join(BASE, "xenium_*_index"))
index_rows = []
for d in index_dirs:
experiment = os.path.basename(d).replace("_index", "")
if '5k' in experiment:
continue
spindle_path = os.path.join(d, "spindle.pkl")
index_handle = index.load_index(spindle_path)
num_clusters = len(index_handle.dag_dict)
print(experiment, num_clusters)
xenium_human_skin_melanoma 6 xenium_human_kidney_nondiseased 6 xenium_human_breast_cancer 6 xenium_human_pancreatic_cancer 4 xenium_human_lymph_node 6 xenium_human_brain_cancer 8 xenium_human_lung_cancer 7
In [61]:
len(index_handle.dag_dict)
Out[61]:
6
In [7]:
h5ad_files
Out[7]:
['/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_skin_melanoma.h5ad', '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_brain_cancer.h5ad', '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_kidney_nondiseased.h5ad', '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_lung_cancer.h5ad', '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_lymph_node.h5ad', '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_lymph_node_5k.h5ad', '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_pancreatic_cancer.h5ad', '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_breast_cancer.h5ad']
In [54]:
len(data.spd_matrices)
Out[54]:
2188
In [9]:
adata = sc.read_h5ad('/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_breast_cancer.h5ad')
In [70]:
adata = adata[adata.obs.loc[adata.obs.Cluster != "Unlabeled"].index, :].copy()
In [71]:
resolution=0.5
min_final_size=10
top_vars=200
n_queries = 2000
all_genes=True
# start time
# Create index directory if it doesn't exist
#Path(index_path).mkdir(parents=True, exist_ok=True)
start_time = time.time()
coords = adata.obsm["spatial"]
tiles = preprocessing.build_quadtree_tiles(coords, max_pts=200, min_side=0.0, max_depth=40)
# remove tiles with less than 5 spots
tiles = [tile for tile in tiles if len(tile.idx) >= 5]
tiles = preprocessing.reindex_tiles(tiles)
if all_genes:
num_genes = adata.n_vars
else:
num_genes = top_vars
genes_work, gene_idx = spindle_dev.preprocessing.topvar_genes(adata, G=num_genes)
tile_covs = spindle_dev.preprocessing.build_tile_covs_full_serial(adata, tiles, gene_idx, eps=1e-6)
data = index.ProcessedData(tiles, tile_covs, genes_work, adata.n_obs)
if 'pca' not in data.latent:
data.reduce_dim(num_pca_components=30, n_components=2, do_umap=True)
data.cluster_spds(cluster_distance="tree", cluster_method="leiden", resolution=resolution)
data.assign_label_to_spots()
data.get_corr_mean_by_cluster()
out_dict = data.get_adaptive_runs(find_blocks=True, with_size_guard=True,min_final_size=min_final_size,max_final_size=100)
epsilon_block_wise_dict = {}
epsilon_dict = {}
for cluster_id in set(data.labels):
eps_per_block, eps_elbow_per_block, eps = index.choose_adaptive_epsilons(data, cluster_id, k_target_per_block=64)
epsilon_block_wise_dict[int(cluster_id)] = eps_elbow_per_block
epsilon_dict[int(cluster_id)] = eps
# Create indices config
config = typing.IndexConfig()
config.epsilon_dict = epsilon_dict
config.epsilon_block_wise_dict = epsilon_block_wise_dict
config.threshold_type = 'constant'
config.kmean_method = 'epsilon_net'
dag_dict, stat, dist_list = index.index_spds(data, config=config)
[2026-01-20 20:45:32,891] INFO spindle_dev.index: Clustering SPD-s using 'tree' distance. [2026-01-20 20:45:32,891] INFO spindle_dev.index: Building ultrametric features from SPD matrices. [2026-01-20 20:45:49,433] INFO spindle_dev.index: Computing latent features from the tree representations. [2026-01-20 20:45:49,975] INFO spindle_dev.index: Reducing latent features to 30 dimensions using PCA. [2026-01-20 20:46:00,096] INFO spindle_dev.index: Explained variance ratios by PCA components: [0.06845865 0.05360552 0.02648471 0.01915467 0.01301552 0.01026255 0.00782341 0.00736292 0.0066578 0.00645745 0.00500687 0.00465036 0.00423697 0.00417093 0.00382233 0.00365148 0.00355614 0.00349192 0.00334309 0.0031902 0.00303663 0.00302339 0.0029695 0.00293939 0.0029074 0.00282528 0.00278963 0.00277179 0.00273333 0.00270156] [2026-01-20 20:46:00,097] INFO spindle_dev.index: Reducing latent features to 2 dimensions using UMAP. /panfs/accrepfs.vampire/home/sarkah1/miniforge3/envs/spatial/lib/python3.10/site-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism. warn( [2026-01-20 20:46:05,615] INFO spindle_dev.index: Clustering SPD-s using 'tree' distance. [2026-01-20 20:46:05,616] INFO spindle_dev.index: Clustering SPD matrices using Leiden clustering with resolution 0.50. [2026-01-20 20:46:05,850] INFO spindle_dev.index: Since clustering method is tree, I am going to find global order per cluster [2026-01-20 20:46:05,851] INFO spindle_dev.index: Finding consensus tree for cluster 0 [2026-01-20 20:46:05,959] INFO spindle_dev.index: Finding consensus tree for cluster 1 [2026-01-20 20:46:06,060] INFO spindle_dev.index: Finding consensus tree for cluster 2 [2026-01-20 20:46:06,142] INFO spindle_dev.index: Finding consensus tree for cluster 3 [2026-01-20 20:46:06,196] INFO spindle_dev.index: Finding consensus tree for cluster 4 [2026-01-20 20:46:06,250] INFO spindle_dev.index: Finding consensus tree for cluster 5 [2026-01-20 20:46:06,537] INFO spindle_dev.index: Computing mean correlation matrix for cluster 0 [2026-01-20 20:46:24,210] INFO spindle_dev.index: Computing mean correlation matrix for cluster 1 [2026-01-20 20:46:36,276] INFO spindle_dev.index: Computing mean correlation matrix for cluster 2 [2026-01-20 20:46:47,256] INFO spindle_dev.index: Computing mean correlation matrix for cluster 3 [2026-01-20 20:46:54,296] INFO spindle_dev.index: Computing mean correlation matrix for cluster 4 [2026-01-20 20:47:02,638] INFO spindle_dev.index: Computing mean correlation matrix for cluster 5 [2026-01-20 20:47:07,318] INFO spindle_dev.index: Finding adaptive block runs for cluster 0 [2026-01-20 20:47:07,824] INFO spindle_dev.index: Chose t=0.762928003964036 resulting in 189 blocks instead of 40 blocks would have gotten by default [2026-01-20 20:47:07,872] INFO spindle_dev.index: Final block runs for cluster 0: 22 blocks. [2026-01-20 20:47:07,872] INFO spindle_dev.index: Finding adaptive block runs for cluster 1 [2026-01-20 20:47:08,347] INFO spindle_dev.index: Chose t=0.7942023685368615 resulting in 192 blocks instead of 76 blocks would have gotten by default [2026-01-20 20:47:08,397] INFO spindle_dev.index: Final block runs for cluster 1: 21 blocks. [2026-01-20 20:47:08,398] INFO spindle_dev.index: Finding adaptive block runs for cluster 2 [2026-01-20 20:47:08,874] INFO spindle_dev.index: Chose t=0.8351684028224012 resulting in 164 blocks instead of 31 blocks would have gotten by default [2026-01-20 20:47:08,910] INFO spindle_dev.index: Final block runs for cluster 2: 21 blocks. [2026-01-20 20:47:08,911] INFO spindle_dev.index: Finding adaptive block runs for cluster 3 [2026-01-20 20:47:09,366] INFO spindle_dev.index: Chose t=0.719141883507654 resulting in 207 blocks instead of 46 blocks would have gotten by default [2026-01-20 20:47:09,423] INFO spindle_dev.index: Final block runs for cluster 3: 24 blocks. [2026-01-20 20:47:09,423] INFO spindle_dev.index: Finding adaptive block runs for cluster 4 [2026-01-20 20:47:09,891] INFO spindle_dev.index: Chose t=0.8060507957776382 resulting in 223 blocks instead of 94 blocks would have gotten by default [2026-01-20 20:47:09,956] INFO spindle_dev.index: Final block runs for cluster 4: 23 blocks. [2026-01-20 20:47:09,956] INFO spindle_dev.index: Finding adaptive block runs for cluster 5 [2026-01-20 20:47:10,426] INFO spindle_dev.index: Chose t=0.8924332101367483 resulting in 133 blocks instead of 68 blocks would have gotten by default [2026-01-20 20:47:10,450] INFO spindle_dev.index: Final block runs for cluster 5: 20 blocks. [2026-01-20 20:47:19,004] INFO spindle_dev.index: Processing cluster 0 [2026-01-20 20:47:19,005] INFO spindle_dev.index: Building SPD index with epsilon=6.917388844497137 [2026-01-20 20:47:19,005] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices. [2026-01-20 20:47:19,083] INFO spindle_dev.index: Cluster 0: 499 SPDs, 22 blocks [2026-01-20 20:47:19,084] INFO spindle_dev.index: Using epsilon-net clustering for block 0 [2026-01-20 20:47:19,136] INFO spindle_dev.index: Finished block 0 in 0.05 seconds, found 4 clusters. [2026-01-20 20:47:19,137] INFO spindle_dev.index: Using epsilon-net clustering for block 1 [2026-01-20 20:47:19,223] INFO spindle_dev.index: Finished block 1 in 0.09 seconds, found 9 clusters. [2026-01-20 20:47:19,223] INFO spindle_dev.index: Using epsilon-net clustering for block 2 [2026-01-20 20:47:19,318] INFO spindle_dev.index: Finished block 2 in 0.10 seconds, found 11 clusters. [2026-01-20 20:47:19,319] INFO spindle_dev.index: Using epsilon-net clustering for block 3 [2026-01-20 20:47:19,406] INFO spindle_dev.index: Finished block 3 in 0.09 seconds, found 10 clusters. [2026-01-20 20:47:19,407] INFO spindle_dev.index: Using epsilon-net clustering for block 4 [2026-01-20 20:47:19,465] INFO spindle_dev.index: Finished block 4 in 0.06 seconds, found 5 clusters. [2026-01-20 20:47:19,466] INFO spindle_dev.index: Using epsilon-net clustering for block 5 [2026-01-20 20:47:19,524] INFO spindle_dev.index: Finished block 5 in 0.06 seconds, found 5 clusters. [2026-01-20 20:47:19,524] INFO spindle_dev.index: Using epsilon-net clustering for block 6 [2026-01-20 20:47:19,585] INFO spindle_dev.index: Finished block 6 in 0.06 seconds, found 4 clusters. [2026-01-20 20:47:19,585] INFO spindle_dev.index: Using epsilon-net clustering for block 7 [2026-01-20 20:47:19,633] INFO spindle_dev.index: Finished block 7 in 0.05 seconds, found 2 clusters. [2026-01-20 20:47:19,633] INFO spindle_dev.index: Using epsilon-net clustering for block 8 [2026-01-20 20:47:19,668] INFO spindle_dev.index: Finished block 8 in 0.03 seconds, found 1 clusters. [2026-01-20 20:47:19,668] INFO spindle_dev.index: Using epsilon-net clustering for block 9 [2026-01-20 20:47:19,708] INFO spindle_dev.index: Finished block 9 in 0.04 seconds, found 2 clusters. [2026-01-20 20:47:19,708] INFO spindle_dev.index: Using epsilon-net clustering for block 10 [2026-01-20 20:47:19,752] INFO spindle_dev.index: Finished block 10 in 0.04 seconds, found 2 clusters. [2026-01-20 20:47:19,752] INFO spindle_dev.index: Using epsilon-net clustering for block 11 [2026-01-20 20:47:19,801] INFO spindle_dev.index: Finished block 11 in 0.05 seconds, found 1 clusters. [2026-01-20 20:47:19,801] INFO spindle_dev.index: Using epsilon-net clustering for block 12 [2026-01-20 20:47:19,839] INFO spindle_dev.index: Finished block 12 in 0.04 seconds, found 1 clusters. [2026-01-20 20:47:19,839] INFO spindle_dev.index: Using epsilon-net clustering for block 13 [2026-01-20 20:47:19,875] INFO spindle_dev.index: Finished block 13 in 0.04 seconds, found 1 clusters. [2026-01-20 20:47:19,875] INFO spindle_dev.index: Using epsilon-net clustering for block 14 [2026-01-20 20:47:19,911] INFO spindle_dev.index: Finished block 14 in 0.04 seconds, found 2 clusters. [2026-01-20 20:47:19,911] INFO spindle_dev.index: Using epsilon-net clustering for block 15 [2026-01-20 20:47:19,943] INFO spindle_dev.index: Finished block 15 in 0.03 seconds, found 1 clusters. [2026-01-20 20:47:19,943] INFO spindle_dev.index: Using epsilon-net clustering for block 16 [2026-01-20 20:47:20,140] INFO spindle_dev.index: Finished block 16 in 0.20 seconds, found 7 clusters. [2026-01-20 20:47:20,140] INFO spindle_dev.index: Using epsilon-net clustering for block 17 [2026-01-20 20:47:20,194] INFO spindle_dev.index: Finished block 17 in 0.05 seconds, found 3 clusters. [2026-01-20 20:47:20,195] INFO spindle_dev.index: Using epsilon-net clustering for block 18 [2026-01-20 20:47:20,252] INFO spindle_dev.index: Finished block 18 in 0.06 seconds, found 4 clusters. [2026-01-20 20:47:20,252] INFO spindle_dev.index: Using epsilon-net clustering for block 19 [2026-01-20 20:47:20,309] INFO spindle_dev.index: Finished block 19 in 0.06 seconds, found 5 clusters. [2026-01-20 20:47:20,309] INFO spindle_dev.index: Using epsilon-net clustering for block 20 [2026-01-20 20:47:20,346] INFO spindle_dev.index: Finished block 20 in 0.04 seconds, found 2 clusters. [2026-01-20 20:47:20,347] INFO spindle_dev.index: Using epsilon-net clustering for block 21 [2026-01-20 20:47:20,394] INFO spindle_dev.index: Finished block 21 in 0.05 seconds, found 1 clusters. [2026-01-20 20:47:20,394] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters. [2026-01-20 20:47:20,395] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by [2026-01-20 20:47:20,395] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them? [2026-01-20 20:47:20,395] INFO spindle_dev.index: We will use triangle inequality to order clusters. [2026-01-20 20:47:20,396] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs. [2026-01-20 20:47:20,408] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list [2026-01-20 20:47:20,408] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances. [2026-01-20 20:47:20,408] INFO spindle_dev.index: Processing cluster 1 [2026-01-20 20:47:20,409] INFO spindle_dev.index: Building SPD index with epsilon=7.061656880525277 [2026-01-20 20:47:20,409] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices. [2026-01-20 20:47:20,941] INFO spindle_dev.index: Cluster 1: 470 SPDs, 21 blocks [2026-01-20 20:47:20,942] INFO spindle_dev.index: Using epsilon-net clustering for block 0 [2026-01-20 20:47:20,978] INFO spindle_dev.index: Finished block 0 in 0.04 seconds, found 2 clusters. [2026-01-20 20:47:20,978] INFO spindle_dev.index: Using epsilon-net clustering for block 1 [2026-01-20 20:47:21,049] INFO spindle_dev.index: Finished block 1 in 0.07 seconds, found 8 clusters. [2026-01-20 20:47:21,050] INFO spindle_dev.index: Using epsilon-net clustering for block 2 [2026-01-20 20:47:21,111] INFO spindle_dev.index: Finished block 2 in 0.06 seconds, found 7 clusters. [2026-01-20 20:47:21,112] INFO spindle_dev.index: Using epsilon-net clustering for block 3 [2026-01-20 20:47:21,180] INFO spindle_dev.index: Finished block 3 in 0.07 seconds, found 8 clusters. [2026-01-20 20:47:21,181] INFO spindle_dev.index: Using epsilon-net clustering for block 4 [2026-01-20 20:47:21,239] INFO spindle_dev.index: Finished block 4 in 0.06 seconds, found 7 clusters. [2026-01-20 20:47:21,240] INFO spindle_dev.index: Using epsilon-net clustering for block 5 [2026-01-20 20:47:21,288] INFO spindle_dev.index: Finished block 5 in 0.05 seconds, found 5 clusters. [2026-01-20 20:47:21,288] INFO spindle_dev.index: Using epsilon-net clustering for block 6 [2026-01-20 20:47:21,351] INFO spindle_dev.index: Finished block 6 in 0.06 seconds, found 8 clusters. [2026-01-20 20:47:21,351] INFO spindle_dev.index: Using epsilon-net clustering for block 7 [2026-01-20 20:47:21,406] INFO spindle_dev.index: Finished block 7 in 0.05 seconds, found 6 clusters. [2026-01-20 20:47:21,406] INFO spindle_dev.index: Using epsilon-net clustering for block 8 [2026-01-20 20:47:21,439] INFO spindle_dev.index: Finished block 8 in 0.03 seconds, found 2 clusters. [2026-01-20 20:47:21,440] INFO spindle_dev.index: Using epsilon-net clustering for block 9 [2026-01-20 20:47:21,474] INFO spindle_dev.index: Finished block 9 in 0.03 seconds, found 2 clusters. [2026-01-20 20:47:21,474] INFO spindle_dev.index: Using epsilon-net clustering for block 10 [2026-01-20 20:47:21,509] INFO spindle_dev.index: Finished block 10 in 0.04 seconds, found 2 clusters. [2026-01-20 20:47:21,510] INFO spindle_dev.index: Using epsilon-net clustering for block 11 [2026-01-20 20:47:21,539] INFO spindle_dev.index: Finished block 11 in 0.03 seconds, found 1 clusters. [2026-01-20 20:47:21,540] INFO spindle_dev.index: Using epsilon-net clustering for block 12 [2026-01-20 20:47:21,574] INFO spindle_dev.index: Finished block 12 in 0.03 seconds, found 2 clusters. [2026-01-20 20:47:21,575] INFO spindle_dev.index: Using epsilon-net clustering for block 13 [2026-01-20 20:47:21,806] INFO spindle_dev.index: Finished block 13 in 0.23 seconds, found 10 clusters. [2026-01-20 20:47:21,807] INFO spindle_dev.index: Using epsilon-net clustering for block 14 [2026-01-20 20:47:21,845] INFO spindle_dev.index: Finished block 14 in 0.04 seconds, found 1 clusters. [2026-01-20 20:47:21,845] INFO spindle_dev.index: Using epsilon-net clustering for block 15 [2026-01-20 20:47:21,885] INFO spindle_dev.index: Finished block 15 in 0.04 seconds, found 1 clusters. [2026-01-20 20:47:21,885] INFO spindle_dev.index: Using epsilon-net clustering for block 16 [2026-01-20 20:47:21,968] INFO spindle_dev.index: Finished block 16 in 0.08 seconds, found 4 clusters. [2026-01-20 20:47:21,969] INFO spindle_dev.index: Using epsilon-net clustering for block 17 [2026-01-20 20:47:22,020] INFO spindle_dev.index: Finished block 17 in 0.05 seconds, found 2 clusters. [2026-01-20 20:47:22,021] INFO spindle_dev.index: Using epsilon-net clustering for block 18 [2026-01-20 20:47:22,084] INFO spindle_dev.index: Finished block 18 in 0.06 seconds, found 3 clusters. [2026-01-20 20:47:22,084] INFO spindle_dev.index: Using epsilon-net clustering for block 19 [2026-01-20 20:47:22,136] INFO spindle_dev.index: Finished block 19 in 0.05 seconds, found 5 clusters. [2026-01-20 20:47:22,136] INFO spindle_dev.index: Using epsilon-net clustering for block 20 [2026-01-20 20:47:22,185] INFO spindle_dev.index: Finished block 20 in 0.05 seconds, found 4 clusters. [2026-01-20 20:47:22,186] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters. [2026-01-20 20:47:22,186] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by [2026-01-20 20:47:22,186] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them? [2026-01-20 20:47:22,187] INFO spindle_dev.index: We will use triangle inequality to order clusters. [2026-01-20 20:47:22,187] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs. [2026-01-20 20:47:22,196] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list [2026-01-20 20:47:22,197] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances. [2026-01-20 20:47:22,197] INFO spindle_dev.index: Processing cluster 2 [2026-01-20 20:47:22,197] INFO spindle_dev.index: Building SPD index with epsilon=7.971775058313972 [2026-01-20 20:47:22,198] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices. [2026-01-20 20:47:22,231] INFO spindle_dev.index: Cluster 2: 413 SPDs, 21 blocks [2026-01-20 20:47:22,232] INFO spindle_dev.index: Using epsilon-net clustering for block 0 [2026-01-20 20:47:22,260] INFO spindle_dev.index: Finished block 0 in 0.03 seconds, found 2 clusters. [2026-01-20 20:47:22,260] INFO spindle_dev.index: Using epsilon-net clustering for block 1 [2026-01-20 20:47:22,293] INFO spindle_dev.index: Finished block 1 in 0.03 seconds, found 3 clusters. [2026-01-20 20:47:22,293] INFO spindle_dev.index: Using epsilon-net clustering for block 2 [2026-01-20 20:47:22,327] INFO spindle_dev.index: Finished block 2 in 0.03 seconds, found 3 clusters. [2026-01-20 20:47:22,327] INFO spindle_dev.index: Using epsilon-net clustering for block 3 [2026-01-20 20:47:22,365] INFO spindle_dev.index: Finished block 3 in 0.04 seconds, found 4 clusters. [2026-01-20 20:47:22,365] INFO spindle_dev.index: Using epsilon-net clustering for block 4 [2026-01-20 20:47:22,399] INFO spindle_dev.index: Finished block 4 in 0.03 seconds, found 3 clusters. [2026-01-20 20:47:22,400] INFO spindle_dev.index: Using epsilon-net clustering for block 5 [2026-01-20 20:47:22,438] INFO spindle_dev.index: Finished block 5 in 0.04 seconds, found 4 clusters. [2026-01-20 20:47:22,438] INFO spindle_dev.index: Using epsilon-net clustering for block 6 [2026-01-20 20:47:22,473] INFO spindle_dev.index: Finished block 6 in 0.04 seconds, found 3 clusters. [2026-01-20 20:47:22,474] INFO spindle_dev.index: Using epsilon-net clustering for block 7 [2026-01-20 20:47:22,509] INFO spindle_dev.index: Finished block 7 in 0.04 seconds, found 2 clusters. [2026-01-20 20:47:22,510] INFO spindle_dev.index: Using epsilon-net clustering for block 8 [2026-01-20 20:47:22,545] INFO spindle_dev.index: Finished block 8 in 0.04 seconds, found 3 clusters. [2026-01-20 20:47:22,545] INFO spindle_dev.index: Using epsilon-net clustering for block 9 [2026-01-20 20:47:22,577] INFO spindle_dev.index: Finished block 9 in 0.03 seconds, found 2 clusters. [2026-01-20 20:47:22,577] INFO spindle_dev.index: Using epsilon-net clustering for block 10 [2026-01-20 20:47:22,614] INFO spindle_dev.index: Finished block 10 in 0.04 seconds, found 2 clusters. [2026-01-20 20:47:22,614] INFO spindle_dev.index: Using epsilon-net clustering for block 11 [2026-01-20 20:47:22,687] INFO spindle_dev.index: Finished block 11 in 0.07 seconds, found 4 clusters. [2026-01-20 20:47:22,687] INFO spindle_dev.index: Using epsilon-net clustering for block 12 [2026-01-20 20:47:22,721] INFO spindle_dev.index: Finished block 12 in 0.03 seconds, found 2 clusters. [2026-01-20 20:47:22,721] INFO spindle_dev.index: Using epsilon-net clustering for block 13 [2026-01-20 20:47:22,755] INFO spindle_dev.index: Finished block 13 in 0.03 seconds, found 2 clusters. [2026-01-20 20:47:22,755] INFO spindle_dev.index: Using epsilon-net clustering for block 14 [2026-01-20 20:47:22,790] INFO spindle_dev.index: Finished block 14 in 0.04 seconds, found 1 clusters. [2026-01-20 20:47:22,791] INFO spindle_dev.index: Using epsilon-net clustering for block 15 [2026-01-20 20:47:22,816] INFO spindle_dev.index: Finished block 15 in 0.03 seconds, found 1 clusters. [2026-01-20 20:47:22,817] INFO spindle_dev.index: Using epsilon-net clustering for block 16 [2026-01-20 20:47:22,843] INFO spindle_dev.index: Finished block 16 in 0.03 seconds, found 1 clusters. [2026-01-20 20:47:22,844] INFO spindle_dev.index: Using epsilon-net clustering for block 17 [2026-01-20 20:47:22,869] INFO spindle_dev.index: Finished block 17 in 0.03 seconds, found 1 clusters. [2026-01-20 20:47:22,870] INFO spindle_dev.index: Using epsilon-net clustering for block 18 [2026-01-20 20:47:23,328] INFO spindle_dev.index: Finished block 18 in 0.46 seconds, found 58 clusters. [2026-01-20 20:47:23,329] INFO spindle_dev.index: Using epsilon-net clustering for block 19 [2026-01-20 20:47:23,365] INFO spindle_dev.index: Finished block 19 in 0.04 seconds, found 1 clusters. [2026-01-20 20:47:23,365] INFO spindle_dev.index: Using epsilon-net clustering for block 20 [2026-01-20 20:47:23,442] INFO spindle_dev.index: Finished block 20 in 0.08 seconds, found 3 clusters. [2026-01-20 20:47:23,443] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters. [2026-01-20 20:47:23,443] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by [2026-01-20 20:47:23,443] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them? [2026-01-20 20:47:23,443] INFO spindle_dev.index: We will use triangle inequality to order clusters. [2026-01-20 20:47:23,444] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs. [2026-01-20 20:47:23,451] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list [2026-01-20 20:47:23,452] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances. [2026-01-20 20:47:23,452] INFO spindle_dev.index: Processing cluster 3 [2026-01-20 20:47:23,452] INFO spindle_dev.index: Building SPD index with epsilon=6.034811805274417 [2026-01-20 20:47:23,453] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices. [2026-01-20 20:47:23,475] INFO spindle_dev.index: Cluster 3: 270 SPDs, 24 blocks [2026-01-20 20:47:23,476] INFO spindle_dev.index: Using epsilon-net clustering for block 0 [2026-01-20 20:47:23,509] INFO spindle_dev.index: Finished block 0 in 0.03 seconds, found 7 clusters. [2026-01-20 20:47:23,510] INFO spindle_dev.index: Using epsilon-net clustering for block 1 [2026-01-20 20:47:23,559] INFO spindle_dev.index: Finished block 1 in 0.05 seconds, found 12 clusters. [2026-01-20 20:47:23,560] INFO spindle_dev.index: Using epsilon-net clustering for block 2 [2026-01-20 20:47:23,630] INFO spindle_dev.index: Finished block 2 in 0.07 seconds, found 19 clusters. [2026-01-20 20:47:23,631] INFO spindle_dev.index: Using epsilon-net clustering for block 3 [2026-01-20 20:47:23,699] INFO spindle_dev.index: Finished block 3 in 0.07 seconds, found 18 clusters. [2026-01-20 20:47:23,699] INFO spindle_dev.index: Using epsilon-net clustering for block 4 [2026-01-20 20:47:23,772] INFO spindle_dev.index: Finished block 4 in 0.07 seconds, found 20 clusters. [2026-01-20 20:47:23,773] INFO spindle_dev.index: Using epsilon-net clustering for block 5 [2026-01-20 20:47:23,834] INFO spindle_dev.index: Finished block 5 in 0.06 seconds, found 16 clusters. [2026-01-20 20:47:23,835] INFO spindle_dev.index: Using epsilon-net clustering for block 6 [2026-01-20 20:47:23,867] INFO spindle_dev.index: Finished block 6 in 0.03 seconds, found 6 clusters. [2026-01-20 20:47:23,867] INFO spindle_dev.index: Using epsilon-net clustering for block 7 [2026-01-20 20:47:23,890] INFO spindle_dev.index: Finished block 7 in 0.02 seconds, found 3 clusters. [2026-01-20 20:47:23,890] INFO spindle_dev.index: Using epsilon-net clustering for block 8 [2026-01-20 20:47:23,910] INFO spindle_dev.index: Finished block 8 in 0.02 seconds, found 2 clusters. [2026-01-20 20:47:23,910] INFO spindle_dev.index: Using epsilon-net clustering for block 9 [2026-01-20 20:47:23,942] INFO spindle_dev.index: Finished block 9 in 0.03 seconds, found 6 clusters. [2026-01-20 20:47:23,943] INFO spindle_dev.index: Using epsilon-net clustering for block 10 [2026-01-20 20:47:23,970] INFO spindle_dev.index: Finished block 10 in 0.03 seconds, found 2 clusters. [2026-01-20 20:47:23,971] INFO spindle_dev.index: Using epsilon-net clustering for block 11 [2026-01-20 20:47:23,994] INFO spindle_dev.index: Finished block 11 in 0.02 seconds, found 1 clusters. [2026-01-20 20:47:23,995] INFO spindle_dev.index: Using epsilon-net clustering for block 12 [2026-01-20 20:47:24,027] INFO spindle_dev.index: Finished block 12 in 0.03 seconds, found 6 clusters. [2026-01-20 20:47:24,028] INFO spindle_dev.index: Using epsilon-net clustering for block 13 [2026-01-20 20:47:24,053] INFO spindle_dev.index: Finished block 13 in 0.03 seconds, found 4 clusters. [2026-01-20 20:47:24,054] INFO spindle_dev.index: Using epsilon-net clustering for block 14 [2026-01-20 20:47:24,071] INFO spindle_dev.index: Finished block 14 in 0.02 seconds, found 1 clusters. [2026-01-20 20:47:24,071] INFO spindle_dev.index: Using epsilon-net clustering for block 15 [2026-01-20 20:47:24,095] INFO spindle_dev.index: Finished block 15 in 0.02 seconds, found 3 clusters. [2026-01-20 20:47:24,096] INFO spindle_dev.index: Using epsilon-net clustering for block 16 [2026-01-20 20:47:24,116] INFO spindle_dev.index: Finished block 16 in 0.02 seconds, found 2 clusters. [2026-01-20 20:47:24,116] INFO spindle_dev.index: Using epsilon-net clustering for block 17 [2026-01-20 20:47:24,148] INFO spindle_dev.index: Finished block 17 in 0.03 seconds, found 4 clusters. [2026-01-20 20:47:24,148] INFO spindle_dev.index: Using epsilon-net clustering for block 18 [2026-01-20 20:47:24,168] INFO spindle_dev.index: Finished block 18 in 0.02 seconds, found 2 clusters. [2026-01-20 20:47:24,168] INFO spindle_dev.index: Using epsilon-net clustering for block 19 [2026-01-20 20:47:24,185] INFO spindle_dev.index: Finished block 19 in 0.02 seconds, found 1 clusters. [2026-01-20 20:47:24,186] INFO spindle_dev.index: Using epsilon-net clustering for block 20 [2026-01-20 20:47:24,213] INFO spindle_dev.index: Finished block 20 in 0.03 seconds, found 1 clusters. [2026-01-20 20:47:24,213] INFO spindle_dev.index: Using epsilon-net clustering for block 21 [2026-01-20 20:47:24,231] INFO spindle_dev.index: Finished block 21 in 0.02 seconds, found 1 clusters. [2026-01-20 20:47:24,231] INFO spindle_dev.index: Using epsilon-net clustering for block 22 [2026-01-20 20:47:24,248] INFO spindle_dev.index: Finished block 22 in 0.02 seconds, found 1 clusters. [2026-01-20 20:47:24,248] INFO spindle_dev.index: Using epsilon-net clustering for block 23 [2026-01-20 20:47:24,380] INFO spindle_dev.index: Finished block 23 in 0.13 seconds, found 11 clusters. [2026-01-20 20:47:24,380] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters. [2026-01-20 20:47:24,380] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by [2026-01-20 20:47:24,381] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them? [2026-01-20 20:47:24,381] INFO spindle_dev.index: We will use triangle inequality to order clusters. [2026-01-20 20:47:24,381] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs. [2026-01-20 20:47:24,389] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list [2026-01-20 20:47:24,389] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances. [2026-01-20 20:47:24,389] INFO spindle_dev.index: Processing cluster 4 [2026-01-20 20:47:24,390] INFO spindle_dev.index: Building SPD index with epsilon=7.355224122437038 [2026-01-20 20:47:24,390] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices. [2026-01-20 20:47:24,411] INFO spindle_dev.index: Cluster 4: 266 SPDs, 23 blocks [2026-01-20 20:47:24,412] INFO spindle_dev.index: Using epsilon-net clustering for block 0 [2026-01-20 20:47:24,434] INFO spindle_dev.index: Finished block 0 in 0.02 seconds, found 2 clusters. [2026-01-20 20:47:24,434] INFO spindle_dev.index: Using epsilon-net clustering for block 1 [2026-01-20 20:47:24,458] INFO spindle_dev.index: Finished block 1 in 0.02 seconds, found 4 clusters. [2026-01-20 20:47:24,459] INFO spindle_dev.index: Using epsilon-net clustering for block 2 [2026-01-20 20:47:24,486] INFO spindle_dev.index: Finished block 2 in 0.03 seconds, found 5 clusters. [2026-01-20 20:47:24,486] INFO spindle_dev.index: Using epsilon-net clustering for block 3 [2026-01-20 20:47:24,513] INFO spindle_dev.index: Finished block 3 in 0.03 seconds, found 5 clusters. [2026-01-20 20:47:24,513] INFO spindle_dev.index: Using epsilon-net clustering for block 4 [2026-01-20 20:47:24,543] INFO spindle_dev.index: Finished block 4 in 0.03 seconds, found 6 clusters. [2026-01-20 20:47:24,544] INFO spindle_dev.index: Using epsilon-net clustering for block 5 [2026-01-20 20:47:24,577] INFO spindle_dev.index: Finished block 5 in 0.03 seconds, found 7 clusters. [2026-01-20 20:47:24,578] INFO spindle_dev.index: Using epsilon-net clustering for block 6 [2026-01-20 20:47:24,612] INFO spindle_dev.index: Finished block 6 in 0.03 seconds, found 7 clusters. [2026-01-20 20:47:24,613] INFO spindle_dev.index: Using epsilon-net clustering for block 7 [2026-01-20 20:47:24,644] INFO spindle_dev.index: Finished block 7 in 0.03 seconds, found 6 clusters. [2026-01-20 20:47:24,644] INFO spindle_dev.index: Using epsilon-net clustering for block 8 [2026-01-20 20:47:24,673] INFO spindle_dev.index: Finished block 8 in 0.03 seconds, found 5 clusters. [2026-01-20 20:47:24,673] INFO spindle_dev.index: Using epsilon-net clustering for block 9 [2026-01-20 20:47:24,710] INFO spindle_dev.index: Finished block 9 in 0.04 seconds, found 7 clusters. [2026-01-20 20:47:24,711] INFO spindle_dev.index: Using epsilon-net clustering for block 10 [2026-01-20 20:47:24,736] INFO spindle_dev.index: Finished block 10 in 0.03 seconds, found 4 clusters. [2026-01-20 20:47:24,736] INFO spindle_dev.index: Using epsilon-net clustering for block 11 [2026-01-20 20:47:24,759] INFO spindle_dev.index: Finished block 11 in 0.02 seconds, found 3 clusters. [2026-01-20 20:47:24,759] INFO spindle_dev.index: Using epsilon-net clustering for block 12 [2026-01-20 20:47:24,788] INFO spindle_dev.index: Finished block 12 in 0.03 seconds, found 5 clusters. [2026-01-20 20:47:24,788] INFO spindle_dev.index: Using epsilon-net clustering for block 13 [2026-01-20 20:47:24,810] INFO spindle_dev.index: Finished block 13 in 0.02 seconds, found 2 clusters. [2026-01-20 20:47:24,810] INFO spindle_dev.index: Using epsilon-net clustering for block 14 [2026-01-20 20:47:24,839] INFO spindle_dev.index: Finished block 14 in 0.03 seconds, found 2 clusters. [2026-01-20 20:47:24,839] INFO spindle_dev.index: Using epsilon-net clustering for block 15 [2026-01-20 20:47:24,862] INFO spindle_dev.index: Finished block 15 in 0.02 seconds, found 3 clusters. [2026-01-20 20:47:24,862] INFO spindle_dev.index: Using epsilon-net clustering for block 16 [2026-01-20 20:47:24,879] INFO spindle_dev.index: Finished block 16 in 0.02 seconds, found 1 clusters. [2026-01-20 20:47:24,879] INFO spindle_dev.index: Using epsilon-net clustering for block 17 [2026-01-20 20:47:24,898] INFO spindle_dev.index: Finished block 17 in 0.02 seconds, found 2 clusters. [2026-01-20 20:47:24,899] INFO spindle_dev.index: Using epsilon-net clustering for block 18 [2026-01-20 20:47:25,013] INFO spindle_dev.index: Finished block 18 in 0.11 seconds, found 9 clusters. [2026-01-20 20:47:25,014] INFO spindle_dev.index: Using epsilon-net clustering for block 19 [2026-01-20 20:47:25,043] INFO spindle_dev.index: Finished block 19 in 0.03 seconds, found 3 clusters. [2026-01-20 20:47:25,043] INFO spindle_dev.index: Using epsilon-net clustering for block 20 [2026-01-20 20:47:25,069] INFO spindle_dev.index: Finished block 20 in 0.03 seconds, found 4 clusters. [2026-01-20 20:47:25,070] INFO spindle_dev.index: Using epsilon-net clustering for block 21 [2026-01-20 20:47:25,114] INFO spindle_dev.index: Finished block 21 in 0.04 seconds, found 5 clusters. [2026-01-20 20:47:25,114] INFO spindle_dev.index: Using epsilon-net clustering for block 22 [2026-01-20 20:47:25,141] INFO spindle_dev.index: Finished block 22 in 0.03 seconds, found 2 clusters. [2026-01-20 20:47:25,142] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters. [2026-01-20 20:47:25,142] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by [2026-01-20 20:47:25,142] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them? [2026-01-20 20:47:25,143] INFO spindle_dev.index: We will use triangle inequality to order clusters. [2026-01-20 20:47:25,143] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs. [2026-01-20 20:47:25,149] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list [2026-01-20 20:47:25,149] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances. [2026-01-20 20:47:25,150] INFO spindle_dev.index: Processing cluster 5 [2026-01-20 20:47:25,150] INFO spindle_dev.index: Building SPD index with epsilon=7.572297895556834 [2026-01-20 20:47:25,150] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices. [2026-01-20 20:47:25,160] INFO spindle_dev.index: Cluster 5: 162 SPDs, 20 blocks [2026-01-20 20:47:25,161] INFO spindle_dev.index: Using epsilon-net clustering for block 0 [2026-01-20 20:47:25,173] INFO spindle_dev.index: Finished block 0 in 0.01 seconds, found 2 clusters. [2026-01-20 20:47:25,173] INFO spindle_dev.index: Using epsilon-net clustering for block 1 [2026-01-20 20:47:25,187] INFO spindle_dev.index: Finished block 1 in 0.01 seconds, found 3 clusters. [2026-01-20 20:47:25,187] INFO spindle_dev.index: Using epsilon-net clustering for block 2 [2026-01-20 20:47:25,202] INFO spindle_dev.index: Finished block 2 in 0.02 seconds, found 4 clusters. [2026-01-20 20:47:25,202] INFO spindle_dev.index: Using epsilon-net clustering for block 3 [2026-01-20 20:47:25,216] INFO spindle_dev.index: Finished block 3 in 0.01 seconds, found 3 clusters. [2026-01-20 20:47:25,216] INFO spindle_dev.index: Using epsilon-net clustering for block 4 [2026-01-20 20:47:25,231] INFO spindle_dev.index: Finished block 4 in 0.02 seconds, found 4 clusters. [2026-01-20 20:47:25,232] INFO spindle_dev.index: Using epsilon-net clustering for block 5 [2026-01-20 20:47:25,247] INFO spindle_dev.index: Finished block 5 in 0.02 seconds, found 3 clusters. [2026-01-20 20:47:25,247] INFO spindle_dev.index: Using epsilon-net clustering for block 6 [2026-01-20 20:47:25,263] INFO spindle_dev.index: Finished block 6 in 0.02 seconds, found 4 clusters. [2026-01-20 20:47:25,263] INFO spindle_dev.index: Using epsilon-net clustering for block 7 [2026-01-20 20:47:25,277] INFO spindle_dev.index: Finished block 7 in 0.01 seconds, found 3 clusters. [2026-01-20 20:47:25,277] INFO spindle_dev.index: Using epsilon-net clustering for block 8 [2026-01-20 20:47:25,291] INFO spindle_dev.index: Finished block 8 in 0.01 seconds, found 3 clusters. [2026-01-20 20:47:25,292] INFO spindle_dev.index: Using epsilon-net clustering for block 9 [2026-01-20 20:47:25,306] INFO spindle_dev.index: Finished block 9 in 0.01 seconds, found 3 clusters. [2026-01-20 20:47:25,306] INFO spindle_dev.index: Using epsilon-net clustering for block 10 [2026-01-20 20:47:25,330] INFO spindle_dev.index: Finished block 10 in 0.02 seconds, found 5 clusters. [2026-01-20 20:47:25,330] INFO spindle_dev.index: Using epsilon-net clustering for block 11 [2026-01-20 20:47:25,347] INFO spindle_dev.index: Finished block 11 in 0.02 seconds, found 2 clusters. [2026-01-20 20:47:25,348] INFO spindle_dev.index: Using epsilon-net clustering for block 12 [2026-01-20 20:47:25,390] INFO spindle_dev.index: Finished block 12 in 0.04 seconds, found 2 clusters. [2026-01-20 20:47:25,391] INFO spindle_dev.index: Using epsilon-net clustering for block 13 [2026-01-20 20:47:25,409] INFO spindle_dev.index: Finished block 13 in 0.02 seconds, found 2 clusters. [2026-01-20 20:47:25,409] INFO spindle_dev.index: Using epsilon-net clustering for block 14 [2026-01-20 20:47:25,422] INFO spindle_dev.index: Finished block 14 in 0.01 seconds, found 2 clusters. [2026-01-20 20:47:25,422] INFO spindle_dev.index: Using epsilon-net clustering for block 15 [2026-01-20 20:47:25,435] INFO spindle_dev.index: Finished block 15 in 0.01 seconds, found 2 clusters. [2026-01-20 20:47:25,435] INFO spindle_dev.index: Using epsilon-net clustering for block 16 [2026-01-20 20:47:25,464] INFO spindle_dev.index: Finished block 16 in 0.03 seconds, found 2 clusters. [2026-01-20 20:47:25,464] INFO spindle_dev.index: Using epsilon-net clustering for block 17 [2026-01-20 20:47:25,482] INFO spindle_dev.index: Finished block 17 in 0.02 seconds, found 4 clusters. [2026-01-20 20:47:25,483] INFO spindle_dev.index: Using epsilon-net clustering for block 18 [2026-01-20 20:47:25,513] INFO spindle_dev.index: Finished block 18 in 0.03 seconds, found 2 clusters. [2026-01-20 20:47:25,514] INFO spindle_dev.index: Using epsilon-net clustering for block 19 [2026-01-20 20:47:25,534] INFO spindle_dev.index: Finished block 19 in 0.02 seconds, found 4 clusters. [2026-01-20 20:47:25,535] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters. [2026-01-20 20:47:25,535] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by [2026-01-20 20:47:25,535] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them? [2026-01-20 20:47:25,535] INFO spindle_dev.index: We will use triangle inequality to order clusters. [2026-01-20 20:47:25,536] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs. [2026-01-20 20:47:25,539] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list [2026-01-20 20:47:25,539] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances.
In [65]:
# Create colors for clusters
import numpy as np
import matplotlib.pyplot as plt
n_clusters = len(set(data.labels))
# Use Scanpy's default_20 palette for consistency
palette = sc.pl.palettes.default_20
cluster_colors = [palette[lab] for lab in data.labels]
plt.scatter(data.latent['umap'][:,0], data.latent['umap'][:,1], s=6, lw=0, alpha=0.8, c=cluster_colors)
# Write label on the plot
# with backgrond circle for better visibility
# and bold font
for i in range(n_clusters):
cluster_points = data.latent['umap'][data.labels == i]
if len(cluster_points) == 0:
continue
x_mean = np.mean(cluster_points[:,0])
y_mean = np.mean(cluster_points[:,1])
plt.text(x_mean, y_mean, str(i), color='black', fontsize=10, ha='center', va='center', fontweight='bold',
bbox=dict(facecolor='white', edgecolor='black', boxstyle='circle,pad=0.5', alpha=0.7))
# take away top and right border
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# remove axis ticks
ax.set_xticks([])
ax.set_yticks([])
#plt.savefig(f"{result_dir}/umap_plot_with_cluster_labels.png", dpi=300)
plt.show()
In [67]:
import matplotlib.pylab as plt
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
In [69]:
import numpy as np
palette = sc.pl.palettes.default_20
point_colors = [palette[lab] for lab in data.spot_label.values()]
labels = data.labels
indices = np.array([int(i) for i in data.spot_label.keys()])
coords = adata.obsm['spatial']
plt.scatter(coords[indices,0], coords[indices,1], s=1, c=point_colors, lw=0, alpha=0.8)
patches = []
for t in tiles:
bbox = t.bbox if hasattr(t, "bbox") else t["bbox"] # (xmin,ymin,xmax,ymax)
x0, y0, x1, y1 = bbox
patches.append(Rectangle((x0, y0), x1 - x0, y1 - y0, fill=False))
pc = PatchCollection(patches, match_original=True, linewidths=0.2, edgecolors='b', alpha=0.3)
# add collection
ax = plt.gca()
ax.add_collection(pc)
ax.invert_yaxis()
ax.axis('off')
#ax.set_aspect('equal')
#plt.savefig(f"{result_dir}/spatial_plot_with_cluster.png", dpi=300)
plt.show()
In [76]:
# Save spatial plot to results directory
# sc.pl.spatial(adata, color="Cluster", groups=["DCIS_1", "DCIS_2"], spot_size=12, frameon=False,
# save="_dcis_groups.png")
# This saves to scanpy's default figures directory
# Or save to a specific location:
import matplotlib.pyplot as plt
sc.pl.spatial(adata, color="Cluster", groups=["DCIS_1", "DCIS_2"], spot_size=12, frameon=False, show=False)
plt.savefig('/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/spatial_dcis_groups.png', dpi=300, bbox_inches='tight')
plt.show()
/tmp/ipykernel_101252/2522203223.py:8: FutureWarning: Use `squidpy.pl.spatial_scatter` instead. sc.pl.spatial(adata, color="Cluster", groups=["DCIS_1", "DCIS_2"], spot_size=12, frameon=False, show=False)
In [ ]:
import numpy as np
palette = sc.pl.palettes.default_20
point_colors = [palette[lab] for lab in data.spot_label.values()]
labels = data.labels
indices = np.array([int(i) for i in data.spot_label.keys()])
coords = adata.obsm['spatial']
#
plt.scatter(coords[indices,0], coords[indices,1], s=1, c=point_colors, lw=0, alpha=0.8)
# patches = []
# for t in tiles:
# bbox = t.bbox if hasattr(t, "bbox") else t["bbox"] # (xmin,ymin,xmax,ymax)
# x0, y0, x1, y1 = bbox
# patches.append(Rectangle((x0, y0), x1 - x0, y1 - y0, fill=False))
# pc = PatchCollection(patches, match_original=True, linewidths=0.2, edgecolors='b', alpha=0.3)
# add collection
ax = plt.gca()
ax.add_collection(pc)
ax.invert_yaxis()
ax.axis('off')
#ax.set_aspect('equal')
#plt.savefig(f"{result_dir}/spatial_plot_with_cluster.png", dpi=300)
plt.show()
In [ ]:
# Number of nodes in dag_dict
In [ ]:
n_queries = 10
seed = 40
rng = np.random.default_rng(seed)
all_indices = np.arange(len(data.spd_matrices))
valid_clusters = list(dag_dict.keys())
mask = np.isin(data.labels, valid_clusters)
candidate_indices = all_indices[mask]
query_indices = rng.choice(candidate_indices, size=n_queries, replace=False)
dag_dict, stat, dist_list = index.index_spds(data, config=config)
gt_paths = test.create_ground_truth_paths(dag_dict)
query_matrices = [data.spd_matrices[i] for i in query_indices]
In [12]:
import spindle_dev.metrics as metrics
importlib.reload(metrics)
Out[12]:
<module 'spindle_dev.metrics' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/metrics.py'>
In [14]:
gt_paths = test.create_ground_truth_paths(dag_dict)
[2026-01-20 17:50:11,714] INFO spindle_dev.test: Built ground-truth paths for 760 SPDs in cluster 0 across 22 blocks. [2026-01-20 17:50:12,020] INFO spindle_dev.test: Built ground-truth paths for 515 SPDs in cluster 1 across 22 blocks. [2026-01-20 17:50:12,258] INFO spindle_dev.test: Built ground-truth paths for 431 SPDs in cluster 2 across 23 blocks. [2026-01-20 17:50:12,354] INFO spindle_dev.test: Built ground-truth paths for 260 SPDs in cluster 3 across 24 blocks. [2026-01-20 17:50:12,386] INFO spindle_dev.test: Built ground-truth paths for 156 SPDs in cluster 4 across 19 blocks. [2026-01-20 17:50:12,395] INFO spindle_dev.test: Built ground-truth paths for 66 SPDs in cluster 5 across 25 blocks.
In [20]:
seed = 40
n_queries_noise = 200
rng = np.random.default_rng(seed)
all_indices = np.arange(len(data.spd_matrices))
valid_clusters = list(dag_dict.keys())
mask = np.isin(data.labels, valid_clusters)
candidate_indices = all_indices[mask]
query_indices = rng.choice(candidate_indices, size=n_queries_noise, replace=False)
# Create noisy query matrices
query_matrices_clean = [data.spd_matrices[i] for i in query_indices]
In [21]:
import tqdm
In [22]:
# catche eigen value eigen vectors for reuse
# --- cache eigendecompositions for the clean query matrices ---
def _as_array(matrix):
return np.asarray(matrix, dtype=np.float64)
eig_floor = 1e-8 # same clamp you used before
query_cache = []
for j, A in tqdm.tqdm(enumerate(query_matrices_clean)):
A = _as_array(A)
A = 0.5 * (A + A.T)
w, V = np.linalg.eigh(A)
w = np.maximum(w, eig_floor)
query_cache.append({
"idx": int(query_indices[j]), # original index in data.spd_matrices (optional)
"V": V, # eigenvectors
"logw": np.log(w), # cached log-eigenvalues
"p": A.shape[0],
})
200it [00:28, 6.96it/s]
In [23]:
def sample_noisy_from_cache_with_le(cache, noise_level=0.1, seed=None):
rng = np.random.default_rng(seed)
mats = []
d_le = []
for c in cache:
eps = rng.normal(0.0, noise_level, size=c["logw"].shape)
noisy_w = np.exp(c["logw"] + eps)
A_noisy = c["V"] @ (noisy_w[:, None] * c["V"].T) # V diag(noisy_w) V^T
A_noisy = 0.5 * (A_noisy + A_noisy.T)
p = c["p"]
dist = float(np.linalg.norm(eps) / np.sqrt(p))
mats.append(A_noisy)
d_le.append(dist)
return mats, np.array(d_le, dtype=float)
In [24]:
true_clusters = [int(data.labels[i]) for i in query_indices]
In [27]:
true_paths = [gt_paths.get(true_cluster, {}).get(qid) for qid, true_cluster in zip(query_indices, true_clusters)]
In [35]:
# Test robustness with noisy query matrices
# Add varying levels of noise and see if we can still retrieve the correct cluster
noise_levels = [0.0, 0.05, 0.1, 0.2, 0.5, 1.0]
n_queries_noise = 10
search_cfg = search.SearchConfig(max_results=2, debug=False, max_failed_starts=5, max_failed_paths=10)
results_by_noise = []
for noise_level in noise_levels:
print(f"\nTesting with noise level: {noise_level}")
noise_start = time.time()
# Generate noisy matrices
noise_gen_start = time.time()
if noise_level > 0:
query_matrices_noisy, le_distances = sample_noisy_from_cache_with_le(
query_cache, noise_level=noise_level, seed=seed
)
print(f" Generated LE distances - min: {le_distances.min():.4f}, max: {le_distances.max():.4f}, mean: {le_distances.mean():.4f}")
else:
query_matrices_noisy = query_matrices_clean
le_distances = np.array([0.0] * len(query_matrices_clean))
noise_gen_time = time.time() - noise_gen_start
print(f" Time to generate noisy matrices: {noise_gen_time:.3f}s")
# Predict clusters using noisy queries
cluster_pred_start = time.time()
predicted_clusters = search.assign_clusters_to_new_spds(query_matrices_noisy, data)
cluster_pred_time = time.time() - cluster_pred_start
print(f" Time to predict clusters: {cluster_pred_time:.3f}s")
# Search index for each query
search_start = time.time()
matched_list = []
matched_leaf_list = []
budget_list = []
matched_budget_list = []
for j, cluster_id in enumerate(predicted_clusters):
true_cluster = true_clusters[j]
gt_path = true_paths[j]
index_handle = dag_dict[cluster_id]
num_blocks = len(index_handle.sorted_blocks)
epsilon = config.epsilon_dict[cluster_id]
budget = float(epsilon) * float(num_blocks) * 2
q_spd = query_matrices_noisy[j]
perm = data.perm_list[cluster_id]
q_spd_perm = q_spd[np.ix_(perm, perm)]
query_block_runs = data.block_dict[cluster_id]
num_blocks = len(query_block_runs) if query_block_runs is not None else 1
results = search.search_index(
index_handle,
q_spd_perm,
[],
query_block_runs,
budget,
config=search_cfg,
)
if cluster_id == true_cluster:
matched = False
matched_leaf = False
matched_budget = None
for path in results.paths:
if path.node_path == gt_path:
matched = True
matched_leaf = True
matched_budget = path.total_distance
break
# If no exact path match, see if any result shares the same leaf.
if not matched:
gt_leaf_node = gt_path[-1]
for path in results.paths:
if path.node_path and path.node_path[-1] == gt_leaf_node:
matched_leaf = True
matched_budget = path.total_distance
break
else:
budget_used = results.paths[0].total_distance if results.paths else None
matched = False
matched_leaf = False
matched_budget = budget_used
matched_list += [matched]
matched_leaf_list += [matched_leaf]
budget_list += [budget]
matched_budget_list += [matched_budget]
search_time = time.time() - search_start
print(f" Time to search index: {search_time:.3f}s ({search_time/len(predicted_clusters):.4f}s per query)")
# Store individual results for each query
for q_idx, le_dist, true_clust, pred_clust, matched, matched_leaf, budget, matched_budget in zip(
query_indices, le_distances, true_clusters, predicted_clusters,
matched_list, matched_leaf_list, budget_list, matched_budget_list
):
results_by_noise.append({
'noise_level': noise_level,
'query_index': int(q_idx),
'le_distance': float(le_dist),
'true_cluster': true_clust,
'predicted_cluster': pred_clust,
'correct': true_clust == pred_clust,
"matched": matched,
"matched_leaf": matched_leaf,
"budget": matched_budget,
"query_budget": budget
})
# Print summary for this noise level
correct = sum(1 for t, p in zip(true_clusters, predicted_clusters) if t == p)
accuracy = correct / len(true_clusters)
avg_le_distance = float(le_distances.mean())
total_time = time.time() - noise_start
print(f" Accuracy: {accuracy:.3f} ({correct}/{len(true_clusters)})")
print(f" Avg LE distance: {avg_le_distance:.4f}")
print(f" Total time for noise level: {total_time:.3f}s")
Testing with noise level: 0.0 Time to generate noisy matrices: 0.000s Time to predict clusters: 5.870s Time to search index: 25.987s (0.1299s per query) Accuracy: 0.990 (198/200) Avg LE distance: 0.0000 Total time for noise level: 31.859s Testing with noise level: 0.05 Generated LE distances - min: 0.0444, max: 0.0566, mean: 0.0499 Time to generate noisy matrices: 3.989s Time to predict clusters: 6.112s Time to search index: 24.988s (0.1249s per query) Accuracy: 0.990 (198/200) Avg LE distance: 0.0499 Total time for noise level: 35.091s Testing with noise level: 0.1 Generated LE distances - min: 0.0888, max: 0.1131, mean: 0.0998 Time to generate noisy matrices: 9.051s Time to predict clusters: 7.363s Time to search index: 24.335s (0.1217s per query) Accuracy: 0.990 (198/200) Avg LE distance: 0.0998 Total time for noise level: 40.750s Testing with noise level: 0.2 Generated LE distances - min: 0.1776, max: 0.2263, mean: 0.1997 Time to generate noisy matrices: 1.333s Time to predict clusters: 1.867s Time to search index: 25.278s (0.1264s per query) Accuracy: 0.980 (196/200) Avg LE distance: 0.1997 Total time for noise level: 28.480s Testing with noise level: 0.5 Generated LE distances - min: 0.4441, max: 0.5657, mean: 0.4992 Time to generate noisy matrices: 1.669s Time to predict clusters: 1.884s Time to search index: 23.828s (0.1191s per query) Accuracy: 0.900 (180/200) Avg LE distance: 0.4992 Total time for noise level: 27.382s Testing with noise level: 1.0 Generated LE distances - min: 0.8882, max: 1.1315, mean: 0.9985 Time to generate noisy matrices: 5.120s Time to predict clusters: 1.718s Time to search index: 17.994s (0.0900s per query) Accuracy: 0.665 (133/200) Avg LE distance: 0.9985 Total time for noise level: 24.833s
In [ ]:
In [36]:
# Compile results into DataFrame
results_df = pd.DataFrame(results_by_noise)
In [37]:
results_df
Out[37]:
| noise_level | query_index | le_distance | true_cluster | predicted_cluster | correct | matched | matched_leaf | budget | query_budget | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 501 | 0.000000 | 0 | 0 | True | True | True | 67.695243 | 320.917748 |
| 1 | 0.0 | 845 | 0.000000 | 0 | 0 | True | True | True | 70.041807 | 320.917748 |
| 2 | 0.0 | 1281 | 0.000000 | 1 | 1 | True | True | True | 72.495370 | 283.410554 |
| 3 | 0.0 | 1394 | 0.000000 | 1 | 1 | True | True | True | 63.988327 | 283.410554 |
| 4 | 0.0 | 2036 | 0.000000 | 3 | 3 | True | True | True | 93.941560 | 350.702153 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1195 | 1.0 | 269 | 1.011084 | 0 | 0 | True | True | True | 84.290473 | 320.917748 |
| 1196 | 1.0 | 670 | 0.967996 | 0 | 2 | False | False | False | 78.951327 | 330.083902 |
| 1197 | 1.0 | 626 | 0.953492 | 4 | 2 | False | False | False | 93.235828 | 330.083902 |
| 1198 | 1.0 | 1874 | 1.026542 | 3 | 1 | False | False | False | 106.153718 | 283.410554 |
| 1199 | 1.0 | 1315 | 0.974375 | 2 | 2 | True | True | True | 77.521555 | 330.083902 |
1200 rows × 10 columns
In [53]:
# Simple bar plot: accuracy fraction by noise level
import matplotlib.pyplot as plt
import seaborn as sns
# Calculate accuracy for each noise level
accuracy_summary = results_df.groupby('noise_level')['correct'].mean().reset_index()
accuracy_summary.columns = ['noise_level', 'accuracy']
plt.figure(figsize=(4, 5))
# Use categorical positions for bars
x_pos = range(len(accuracy_summary))
plt.bar(x_pos, accuracy_summary['accuracy'],
alpha=0.7, color='steelblue', edgecolor='black', linewidth=1)
sns.barplot(data=accuracy_summary, x='noise_level', y='accuracy')
plt.xticks(x_pos, accuracy_summary['noise_level'])
plt.xlabel('Noise Level', fontsize=12)
plt.ylabel('Accuracy (Fraction Correct)', fontsize=12)
plt.title('')
plt.ylim(0, 1)
#plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
sns.despine()
plt.savefig('/data/sarkar_lab/Projects/spindle_dev/results/plots/cross_cluster_noise.png', dpi=300, bbox_inches='tight')
plt.show()
In [44]:
print("\nAccuracy by Noise Level:")
print(accuracy_summary.to_string(index=False))
Accuracy by Noise Level:
noise_level accuracy
0.00 0.990
0.05 0.990
0.10 0.990
0.20 0.980
0.50 0.900
1.00 0.665
In [38]:
# Find out for how many case true cluster is found but matched_leaf is false
results_df.loc[results_df['correct'] & ~results_df['matched_leaf']]
Out[38]:
| noise_level | query_index | le_distance | true_cluster | predicted_cluster | correct | matched | matched_leaf | budget | query_budget |
|---|
In [ ]:
# Create linearized DataFrame with individual query results
noise_df = pd.DataFrame(results_by_noise)
print(f"Total records: {len(noise_df)}")
print(f"\nSummary by noise level:")
summary = noise_df.groupby('noise_level').agg({
'correct': ['sum', 'count', 'mean'],
'le_distance': ['mean', 'std']
}).round(4)
summary.columns = ['correct', 'total', 'accuracy', 'avg_le_distance', 'std_le_distance']
print(summary)
noise_df.head(15)
In [ ]:
# Plot accuracy vs LE distance
import matplotlib.pyplot as plt
# Aggregate for plotting
plot_df = noise_df.groupby('noise_level').agg({
'correct': 'mean',
'le_distance': ['mean', 'std']
}).reset_index()
plot_df.columns = ['noise_level', 'accuracy', 'avg_le_distance', 'std_le_distance']
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
# Plot 1: Accuracy vs Noise Level
ax1.plot(plot_df['noise_level'], plot_df['accuracy'], marker='o', linewidth=2, markersize=8, color='steelblue')
ax1.set_xlabel('Noise Level (std dev in log-eigenvalue space)', fontsize=11)
ax1.set_ylabel('Cluster Assignment Accuracy', fontsize=11)
ax1.set_title('Accuracy vs Noise Level', fontsize=12, fontweight='bold')
ax1.grid(True, alpha=0.3)
ax1.set_ylim([0, 1.05])
# Plot 2: Accuracy vs Average LE Distance
ax2.errorbar(plot_df['avg_le_distance'], plot_df['accuracy'],
xerr=plot_df['std_le_distance'],
marker='o', linewidth=2, markersize=8, capsize=5, color='darkorange')
ax2.set_xlabel('Average LE Distance from Original', fontsize=11)
ax2.set_ylabel('Cluster Assignment Accuracy', fontsize=11)
ax2.set_title('Accuracy vs LE Distance', fontsize=12, fontweight='bold')
ax2.grid(True, alpha=0.3)
ax2.set_ylim([0, 1.05])
plt.tight_layout()
plt.show()
In [ ]:
seed = 40
rng = np.random.default_rng(seed)
all_indices = np.arange(len(data.spd_matrices))
valid_clusters = list(dag_dict.keys())
mask = np.isin(data.labels, valid_clusters)
candidate_indices = all_indices[mask]
query_indices = rng.choice(candidate_indices, size=n_queries, replace=False)
dag_dict, stat, dist_list = index.index_spds(data, config=config)
gt_paths = test.create_ground_truth_paths(dag_dict)
query_matrices = [data.spd_matrices[i] for i in query_indices]
true_clusters = [int(data.labels[i]) for i in query_indices]
predicted_clusters = search.assign_clusters_to_new_spds(query_matrices, data)
predicted_df = pd.DataFrame({'True Cluster': true_clusters, 'Predicted Cluster': predicted_clusters})
print(f"Done with search")
return predicted_df