In [1]:
# Set up sys.path so that 'src/spindle_dev' is importable as 'spindle_dev'
import sys
import importlib
from pathlib import Path

project_root = '/data/sarkar_lab/Projects/spindle_dev'
src_path = Path(project_root) / 'src'
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

import spindle_dev
# Reload to pick up code changes without restarting the kernel
importlib.reload(spindle_dev)
Out[1]:
<module 'spindle_dev' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/__init__.py'>
In [2]:
import scanpy as sc
import glob
In [3]:
h5ad_files = glob.glob("/data/sarkar_lab/insitupy_demo_data_xenium/*.h5ad")
In [4]:
import spindle_dev
import spindle_dev.metrics as metrics
import spindle_dev.index as index
import spindle_dev.preprocessing as preprocessing
import spindle_dev.plotting as plotting
import spindle_dev.test as test
import spindle_dev.search as search
import spindle_dev.typing as typing
import time
In [5]:
from pathlib import Path
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed
from joblib import Parallel, delayed
import argparse
import pandas as pd
In [6]:
h5ad_files = glob.glob("/data/sarkar_lab/insitupy_demo_data_xenium/*.h5ad")
In [ ]:
index_files = glob.glob("/data/sarkar_lab/Projects/spindle_dev/")
In [63]:
import os
import pickle
BASE = "/data/sarkar_lab/insitupy_demo_data_xenium"

# -----------------------------
# Index-level stats
# -----------------------------
index_dirs = glob.glob(os.path.join(BASE, "xenium_*_index"))

index_rows = []
for d in index_dirs:
    experiment = os.path.basename(d).replace("_index", "")
    if '5k' in experiment:
        continue
    
    spindle_path = os.path.join(d, "spindle.pkl")
    index_handle = index.load_index(spindle_path)
    num_clusters = len(index_handle.dag_dict)
    print(experiment, num_clusters)
xenium_human_skin_melanoma 6
xenium_human_kidney_nondiseased 6
xenium_human_breast_cancer 6
xenium_human_pancreatic_cancer 4
xenium_human_lymph_node 6
xenium_human_brain_cancer 8
xenium_human_lung_cancer 7
In [61]:
len(index_handle.dag_dict)
Out[61]:
6
In [7]:
h5ad_files
Out[7]:
['/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_skin_melanoma.h5ad',
 '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_brain_cancer.h5ad',
 '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_kidney_nondiseased.h5ad',
 '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_lung_cancer.h5ad',
 '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_lymph_node.h5ad',
 '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_lymph_node_5k.h5ad',
 '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_pancreatic_cancer.h5ad',
 '/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_breast_cancer.h5ad']
In [54]:
len(data.spd_matrices)
Out[54]:
2188
In [9]:
adata = sc.read_h5ad('/data/sarkar_lab/insitupy_demo_data_xenium/xenium_human_breast_cancer.h5ad')
In [70]:
adata = adata[adata.obs.loc[adata.obs.Cluster != "Unlabeled"].index, :].copy()
In [71]:
resolution=0.5
min_final_size=10
top_vars=200
n_queries = 2000
all_genes=True
# start time 

# Create index directory if it doesn't exist
#Path(index_path).mkdir(parents=True, exist_ok=True)

start_time = time.time()


coords = adata.obsm["spatial"]
tiles = preprocessing.build_quadtree_tiles(coords, max_pts=200, min_side=0.0, max_depth=40)
# remove tiles with less than 5 spots
tiles = [tile for tile in tiles if len(tile.idx) >= 5]
tiles = preprocessing.reindex_tiles(tiles)
if all_genes:
    num_genes = adata.n_vars
else:
    num_genes = top_vars
genes_work, gene_idx = spindle_dev.preprocessing.topvar_genes(adata, G=num_genes)  
tile_covs = spindle_dev.preprocessing.build_tile_covs_full_serial(adata, tiles, gene_idx, eps=1e-6)
data = index.ProcessedData(tiles, tile_covs, genes_work, adata.n_obs)
if 'pca' not in data.latent:
    data.reduce_dim(num_pca_components=30, n_components=2, do_umap=True)
data.cluster_spds(cluster_distance="tree", cluster_method="leiden", resolution=resolution)
data.assign_label_to_spots()
data.get_corr_mean_by_cluster()
out_dict = data.get_adaptive_runs(find_blocks=True, with_size_guard=True,min_final_size=min_final_size,max_final_size=100)
epsilon_block_wise_dict = {}
epsilon_dict = {}
for cluster_id in set(data.labels):
    eps_per_block, eps_elbow_per_block, eps = index.choose_adaptive_epsilons(data, cluster_id, k_target_per_block=64)
    epsilon_block_wise_dict[int(cluster_id)] = eps_elbow_per_block
    epsilon_dict[int(cluster_id)] = eps

# Create indices config
config = typing.IndexConfig()
config.epsilon_dict = epsilon_dict
config.epsilon_block_wise_dict = epsilon_block_wise_dict
config.threshold_type = 'constant'
config.kmean_method = 'epsilon_net' 
dag_dict, stat, dist_list = index.index_spds(data, config=config)
[2026-01-20 20:45:32,891] INFO spindle_dev.index: Clustering SPD-s using 'tree' distance.
[2026-01-20 20:45:32,891] INFO spindle_dev.index: Building ultrametric features from SPD matrices.
[2026-01-20 20:45:49,433] INFO spindle_dev.index: Computing latent features from the tree representations.
[2026-01-20 20:45:49,975] INFO spindle_dev.index: Reducing latent features to 30 dimensions using PCA.
[2026-01-20 20:46:00,096] INFO spindle_dev.index: Explained variance ratios by PCA components: [0.06845865 0.05360552 0.02648471 0.01915467 0.01301552 0.01026255
 0.00782341 0.00736292 0.0066578  0.00645745 0.00500687 0.00465036
 0.00423697 0.00417093 0.00382233 0.00365148 0.00355614 0.00349192
 0.00334309 0.0031902  0.00303663 0.00302339 0.0029695  0.00293939
 0.0029074  0.00282528 0.00278963 0.00277179 0.00273333 0.00270156]
[2026-01-20 20:46:00,097] INFO spindle_dev.index: Reducing latent features to 2 dimensions using UMAP.
/panfs/accrepfs.vampire/home/sarkah1/miniforge3/envs/spatial/lib/python3.10/site-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(
[2026-01-20 20:46:05,615] INFO spindle_dev.index: Clustering SPD-s using 'tree' distance.
[2026-01-20 20:46:05,616] INFO spindle_dev.index: Clustering SPD matrices using Leiden clustering with resolution 0.50.
[2026-01-20 20:46:05,850] INFO spindle_dev.index: Since clustering method is tree, I am going to find global order per cluster
[2026-01-20 20:46:05,851] INFO spindle_dev.index: Finding consensus tree for cluster 0
[2026-01-20 20:46:05,959] INFO spindle_dev.index: Finding consensus tree for cluster 1
[2026-01-20 20:46:06,060] INFO spindle_dev.index: Finding consensus tree for cluster 2
[2026-01-20 20:46:06,142] INFO spindle_dev.index: Finding consensus tree for cluster 3
[2026-01-20 20:46:06,196] INFO spindle_dev.index: Finding consensus tree for cluster 4
[2026-01-20 20:46:06,250] INFO spindle_dev.index: Finding consensus tree for cluster 5
[2026-01-20 20:46:06,537] INFO spindle_dev.index: Computing mean correlation matrix for cluster 0
[2026-01-20 20:46:24,210] INFO spindle_dev.index: Computing mean correlation matrix for cluster 1
[2026-01-20 20:46:36,276] INFO spindle_dev.index: Computing mean correlation matrix for cluster 2
[2026-01-20 20:46:47,256] INFO spindle_dev.index: Computing mean correlation matrix for cluster 3
[2026-01-20 20:46:54,296] INFO spindle_dev.index: Computing mean correlation matrix for cluster 4
[2026-01-20 20:47:02,638] INFO spindle_dev.index: Computing mean correlation matrix for cluster 5
[2026-01-20 20:47:07,318] INFO spindle_dev.index: Finding adaptive block runs for cluster 0
[2026-01-20 20:47:07,824] INFO spindle_dev.index:  Chose t=0.762928003964036 resulting in 189 blocks instead of 40 blocks would have gotten by default
[2026-01-20 20:47:07,872] INFO spindle_dev.index:  Final block runs for cluster 0: 22 blocks.
[2026-01-20 20:47:07,872] INFO spindle_dev.index: Finding adaptive block runs for cluster 1
[2026-01-20 20:47:08,347] INFO spindle_dev.index:  Chose t=0.7942023685368615 resulting in 192 blocks instead of 76 blocks would have gotten by default
[2026-01-20 20:47:08,397] INFO spindle_dev.index:  Final block runs for cluster 1: 21 blocks.
[2026-01-20 20:47:08,398] INFO spindle_dev.index: Finding adaptive block runs for cluster 2
[2026-01-20 20:47:08,874] INFO spindle_dev.index:  Chose t=0.8351684028224012 resulting in 164 blocks instead of 31 blocks would have gotten by default
[2026-01-20 20:47:08,910] INFO spindle_dev.index:  Final block runs for cluster 2: 21 blocks.
[2026-01-20 20:47:08,911] INFO spindle_dev.index: Finding adaptive block runs for cluster 3
[2026-01-20 20:47:09,366] INFO spindle_dev.index:  Chose t=0.719141883507654 resulting in 207 blocks instead of 46 blocks would have gotten by default
[2026-01-20 20:47:09,423] INFO spindle_dev.index:  Final block runs for cluster 3: 24 blocks.
[2026-01-20 20:47:09,423] INFO spindle_dev.index: Finding adaptive block runs for cluster 4
[2026-01-20 20:47:09,891] INFO spindle_dev.index:  Chose t=0.8060507957776382 resulting in 223 blocks instead of 94 blocks would have gotten by default
[2026-01-20 20:47:09,956] INFO spindle_dev.index:  Final block runs for cluster 4: 23 blocks.
[2026-01-20 20:47:09,956] INFO spindle_dev.index: Finding adaptive block runs for cluster 5
[2026-01-20 20:47:10,426] INFO spindle_dev.index:  Chose t=0.8924332101367483 resulting in 133 blocks instead of 68 blocks would have gotten by default
[2026-01-20 20:47:10,450] INFO spindle_dev.index:  Final block runs for cluster 5: 20 blocks.
[2026-01-20 20:47:19,004] INFO spindle_dev.index: Processing cluster 0
[2026-01-20 20:47:19,005] INFO spindle_dev.index: Building SPD index with epsilon=6.917388844497137
[2026-01-20 20:47:19,005] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices.
[2026-01-20 20:47:19,083] INFO spindle_dev.index: Cluster 0: 499 SPDs, 22 blocks
[2026-01-20 20:47:19,084] INFO spindle_dev.index:  Using epsilon-net clustering for block 0
[2026-01-20 20:47:19,136] INFO spindle_dev.index:  Finished block 0 in 0.05 seconds, found 4 clusters.
[2026-01-20 20:47:19,137] INFO spindle_dev.index:  Using epsilon-net clustering for block 1
[2026-01-20 20:47:19,223] INFO spindle_dev.index:  Finished block 1 in 0.09 seconds, found 9 clusters.
[2026-01-20 20:47:19,223] INFO spindle_dev.index:  Using epsilon-net clustering for block 2
[2026-01-20 20:47:19,318] INFO spindle_dev.index:  Finished block 2 in 0.10 seconds, found 11 clusters.
[2026-01-20 20:47:19,319] INFO spindle_dev.index:  Using epsilon-net clustering for block 3
[2026-01-20 20:47:19,406] INFO spindle_dev.index:  Finished block 3 in 0.09 seconds, found 10 clusters.
[2026-01-20 20:47:19,407] INFO spindle_dev.index:  Using epsilon-net clustering for block 4
[2026-01-20 20:47:19,465] INFO spindle_dev.index:  Finished block 4 in 0.06 seconds, found 5 clusters.
[2026-01-20 20:47:19,466] INFO spindle_dev.index:  Using epsilon-net clustering for block 5
[2026-01-20 20:47:19,524] INFO spindle_dev.index:  Finished block 5 in 0.06 seconds, found 5 clusters.
[2026-01-20 20:47:19,524] INFO spindle_dev.index:  Using epsilon-net clustering for block 6
[2026-01-20 20:47:19,585] INFO spindle_dev.index:  Finished block 6 in 0.06 seconds, found 4 clusters.
[2026-01-20 20:47:19,585] INFO spindle_dev.index:  Using epsilon-net clustering for block 7
[2026-01-20 20:47:19,633] INFO spindle_dev.index:  Finished block 7 in 0.05 seconds, found 2 clusters.
[2026-01-20 20:47:19,633] INFO spindle_dev.index:  Using epsilon-net clustering for block 8
[2026-01-20 20:47:19,668] INFO spindle_dev.index:  Finished block 8 in 0.03 seconds, found 1 clusters.
[2026-01-20 20:47:19,668] INFO spindle_dev.index:  Using epsilon-net clustering for block 9
[2026-01-20 20:47:19,708] INFO spindle_dev.index:  Finished block 9 in 0.04 seconds, found 2 clusters.
[2026-01-20 20:47:19,708] INFO spindle_dev.index:  Using epsilon-net clustering for block 10
[2026-01-20 20:47:19,752] INFO spindle_dev.index:  Finished block 10 in 0.04 seconds, found 2 clusters.
[2026-01-20 20:47:19,752] INFO spindle_dev.index:  Using epsilon-net clustering for block 11
[2026-01-20 20:47:19,801] INFO spindle_dev.index:  Finished block 11 in 0.05 seconds, found 1 clusters.
[2026-01-20 20:47:19,801] INFO spindle_dev.index:  Using epsilon-net clustering for block 12
[2026-01-20 20:47:19,839] INFO spindle_dev.index:  Finished block 12 in 0.04 seconds, found 1 clusters.
[2026-01-20 20:47:19,839] INFO spindle_dev.index:  Using epsilon-net clustering for block 13
[2026-01-20 20:47:19,875] INFO spindle_dev.index:  Finished block 13 in 0.04 seconds, found 1 clusters.
[2026-01-20 20:47:19,875] INFO spindle_dev.index:  Using epsilon-net clustering for block 14
[2026-01-20 20:47:19,911] INFO spindle_dev.index:  Finished block 14 in 0.04 seconds, found 2 clusters.
[2026-01-20 20:47:19,911] INFO spindle_dev.index:  Using epsilon-net clustering for block 15
[2026-01-20 20:47:19,943] INFO spindle_dev.index:  Finished block 15 in 0.03 seconds, found 1 clusters.
[2026-01-20 20:47:19,943] INFO spindle_dev.index:  Using epsilon-net clustering for block 16
[2026-01-20 20:47:20,140] INFO spindle_dev.index:  Finished block 16 in 0.20 seconds, found 7 clusters.
[2026-01-20 20:47:20,140] INFO spindle_dev.index:  Using epsilon-net clustering for block 17
[2026-01-20 20:47:20,194] INFO spindle_dev.index:  Finished block 17 in 0.05 seconds, found 3 clusters.
[2026-01-20 20:47:20,195] INFO spindle_dev.index:  Using epsilon-net clustering for block 18
[2026-01-20 20:47:20,252] INFO spindle_dev.index:  Finished block 18 in 0.06 seconds, found 4 clusters.
[2026-01-20 20:47:20,252] INFO spindle_dev.index:  Using epsilon-net clustering for block 19
[2026-01-20 20:47:20,309] INFO spindle_dev.index:  Finished block 19 in 0.06 seconds, found 5 clusters.
[2026-01-20 20:47:20,309] INFO spindle_dev.index:  Using epsilon-net clustering for block 20
[2026-01-20 20:47:20,346] INFO spindle_dev.index:  Finished block 20 in 0.04 seconds, found 2 clusters.
[2026-01-20 20:47:20,347] INFO spindle_dev.index:  Using epsilon-net clustering for block 21
[2026-01-20 20:47:20,394] INFO spindle_dev.index:  Finished block 21 in 0.05 seconds, found 1 clusters.
[2026-01-20 20:47:20,394] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters.
[2026-01-20 20:47:20,395] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by
[2026-01-20 20:47:20,395] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them?
[2026-01-20 20:47:20,395] INFO spindle_dev.index: We will use triangle inequality to order clusters.
[2026-01-20 20:47:20,396] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs.
[2026-01-20 20:47:20,408] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list
[2026-01-20 20:47:20,408] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances.
[2026-01-20 20:47:20,408] INFO spindle_dev.index: Processing cluster 1
[2026-01-20 20:47:20,409] INFO spindle_dev.index: Building SPD index with epsilon=7.061656880525277
[2026-01-20 20:47:20,409] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices.
[2026-01-20 20:47:20,941] INFO spindle_dev.index: Cluster 1: 470 SPDs, 21 blocks
[2026-01-20 20:47:20,942] INFO spindle_dev.index:  Using epsilon-net clustering for block 0
[2026-01-20 20:47:20,978] INFO spindle_dev.index:  Finished block 0 in 0.04 seconds, found 2 clusters.
[2026-01-20 20:47:20,978] INFO spindle_dev.index:  Using epsilon-net clustering for block 1
[2026-01-20 20:47:21,049] INFO spindle_dev.index:  Finished block 1 in 0.07 seconds, found 8 clusters.
[2026-01-20 20:47:21,050] INFO spindle_dev.index:  Using epsilon-net clustering for block 2
[2026-01-20 20:47:21,111] INFO spindle_dev.index:  Finished block 2 in 0.06 seconds, found 7 clusters.
[2026-01-20 20:47:21,112] INFO spindle_dev.index:  Using epsilon-net clustering for block 3
[2026-01-20 20:47:21,180] INFO spindle_dev.index:  Finished block 3 in 0.07 seconds, found 8 clusters.
[2026-01-20 20:47:21,181] INFO spindle_dev.index:  Using epsilon-net clustering for block 4
[2026-01-20 20:47:21,239] INFO spindle_dev.index:  Finished block 4 in 0.06 seconds, found 7 clusters.
[2026-01-20 20:47:21,240] INFO spindle_dev.index:  Using epsilon-net clustering for block 5
[2026-01-20 20:47:21,288] INFO spindle_dev.index:  Finished block 5 in 0.05 seconds, found 5 clusters.
[2026-01-20 20:47:21,288] INFO spindle_dev.index:  Using epsilon-net clustering for block 6
[2026-01-20 20:47:21,351] INFO spindle_dev.index:  Finished block 6 in 0.06 seconds, found 8 clusters.
[2026-01-20 20:47:21,351] INFO spindle_dev.index:  Using epsilon-net clustering for block 7
[2026-01-20 20:47:21,406] INFO spindle_dev.index:  Finished block 7 in 0.05 seconds, found 6 clusters.
[2026-01-20 20:47:21,406] INFO spindle_dev.index:  Using epsilon-net clustering for block 8
[2026-01-20 20:47:21,439] INFO spindle_dev.index:  Finished block 8 in 0.03 seconds, found 2 clusters.
[2026-01-20 20:47:21,440] INFO spindle_dev.index:  Using epsilon-net clustering for block 9
[2026-01-20 20:47:21,474] INFO spindle_dev.index:  Finished block 9 in 0.03 seconds, found 2 clusters.
[2026-01-20 20:47:21,474] INFO spindle_dev.index:  Using epsilon-net clustering for block 10
[2026-01-20 20:47:21,509] INFO spindle_dev.index:  Finished block 10 in 0.04 seconds, found 2 clusters.
[2026-01-20 20:47:21,510] INFO spindle_dev.index:  Using epsilon-net clustering for block 11
[2026-01-20 20:47:21,539] INFO spindle_dev.index:  Finished block 11 in 0.03 seconds, found 1 clusters.
[2026-01-20 20:47:21,540] INFO spindle_dev.index:  Using epsilon-net clustering for block 12
[2026-01-20 20:47:21,574] INFO spindle_dev.index:  Finished block 12 in 0.03 seconds, found 2 clusters.
[2026-01-20 20:47:21,575] INFO spindle_dev.index:  Using epsilon-net clustering for block 13
[2026-01-20 20:47:21,806] INFO spindle_dev.index:  Finished block 13 in 0.23 seconds, found 10 clusters.
[2026-01-20 20:47:21,807] INFO spindle_dev.index:  Using epsilon-net clustering for block 14
[2026-01-20 20:47:21,845] INFO spindle_dev.index:  Finished block 14 in 0.04 seconds, found 1 clusters.
[2026-01-20 20:47:21,845] INFO spindle_dev.index:  Using epsilon-net clustering for block 15
[2026-01-20 20:47:21,885] INFO spindle_dev.index:  Finished block 15 in 0.04 seconds, found 1 clusters.
[2026-01-20 20:47:21,885] INFO spindle_dev.index:  Using epsilon-net clustering for block 16
[2026-01-20 20:47:21,968] INFO spindle_dev.index:  Finished block 16 in 0.08 seconds, found 4 clusters.
[2026-01-20 20:47:21,969] INFO spindle_dev.index:  Using epsilon-net clustering for block 17
[2026-01-20 20:47:22,020] INFO spindle_dev.index:  Finished block 17 in 0.05 seconds, found 2 clusters.
[2026-01-20 20:47:22,021] INFO spindle_dev.index:  Using epsilon-net clustering for block 18
[2026-01-20 20:47:22,084] INFO spindle_dev.index:  Finished block 18 in 0.06 seconds, found 3 clusters.
[2026-01-20 20:47:22,084] INFO spindle_dev.index:  Using epsilon-net clustering for block 19
[2026-01-20 20:47:22,136] INFO spindle_dev.index:  Finished block 19 in 0.05 seconds, found 5 clusters.
[2026-01-20 20:47:22,136] INFO spindle_dev.index:  Using epsilon-net clustering for block 20
[2026-01-20 20:47:22,185] INFO spindle_dev.index:  Finished block 20 in 0.05 seconds, found 4 clusters.
[2026-01-20 20:47:22,186] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters.
[2026-01-20 20:47:22,186] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by
[2026-01-20 20:47:22,186] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them?
[2026-01-20 20:47:22,187] INFO spindle_dev.index: We will use triangle inequality to order clusters.
[2026-01-20 20:47:22,187] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs.
[2026-01-20 20:47:22,196] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list
[2026-01-20 20:47:22,197] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances.
[2026-01-20 20:47:22,197] INFO spindle_dev.index: Processing cluster 2
[2026-01-20 20:47:22,197] INFO spindle_dev.index: Building SPD index with epsilon=7.971775058313972
[2026-01-20 20:47:22,198] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices.
[2026-01-20 20:47:22,231] INFO spindle_dev.index: Cluster 2: 413 SPDs, 21 blocks
[2026-01-20 20:47:22,232] INFO spindle_dev.index:  Using epsilon-net clustering for block 0
[2026-01-20 20:47:22,260] INFO spindle_dev.index:  Finished block 0 in 0.03 seconds, found 2 clusters.
[2026-01-20 20:47:22,260] INFO spindle_dev.index:  Using epsilon-net clustering for block 1
[2026-01-20 20:47:22,293] INFO spindle_dev.index:  Finished block 1 in 0.03 seconds, found 3 clusters.
[2026-01-20 20:47:22,293] INFO spindle_dev.index:  Using epsilon-net clustering for block 2
[2026-01-20 20:47:22,327] INFO spindle_dev.index:  Finished block 2 in 0.03 seconds, found 3 clusters.
[2026-01-20 20:47:22,327] INFO spindle_dev.index:  Using epsilon-net clustering for block 3
[2026-01-20 20:47:22,365] INFO spindle_dev.index:  Finished block 3 in 0.04 seconds, found 4 clusters.
[2026-01-20 20:47:22,365] INFO spindle_dev.index:  Using epsilon-net clustering for block 4
[2026-01-20 20:47:22,399] INFO spindle_dev.index:  Finished block 4 in 0.03 seconds, found 3 clusters.
[2026-01-20 20:47:22,400] INFO spindle_dev.index:  Using epsilon-net clustering for block 5
[2026-01-20 20:47:22,438] INFO spindle_dev.index:  Finished block 5 in 0.04 seconds, found 4 clusters.
[2026-01-20 20:47:22,438] INFO spindle_dev.index:  Using epsilon-net clustering for block 6
[2026-01-20 20:47:22,473] INFO spindle_dev.index:  Finished block 6 in 0.04 seconds, found 3 clusters.
[2026-01-20 20:47:22,474] INFO spindle_dev.index:  Using epsilon-net clustering for block 7
[2026-01-20 20:47:22,509] INFO spindle_dev.index:  Finished block 7 in 0.04 seconds, found 2 clusters.
[2026-01-20 20:47:22,510] INFO spindle_dev.index:  Using epsilon-net clustering for block 8
[2026-01-20 20:47:22,545] INFO spindle_dev.index:  Finished block 8 in 0.04 seconds, found 3 clusters.
[2026-01-20 20:47:22,545] INFO spindle_dev.index:  Using epsilon-net clustering for block 9
[2026-01-20 20:47:22,577] INFO spindle_dev.index:  Finished block 9 in 0.03 seconds, found 2 clusters.
[2026-01-20 20:47:22,577] INFO spindle_dev.index:  Using epsilon-net clustering for block 10
[2026-01-20 20:47:22,614] INFO spindle_dev.index:  Finished block 10 in 0.04 seconds, found 2 clusters.
[2026-01-20 20:47:22,614] INFO spindle_dev.index:  Using epsilon-net clustering for block 11
[2026-01-20 20:47:22,687] INFO spindle_dev.index:  Finished block 11 in 0.07 seconds, found 4 clusters.
[2026-01-20 20:47:22,687] INFO spindle_dev.index:  Using epsilon-net clustering for block 12
[2026-01-20 20:47:22,721] INFO spindle_dev.index:  Finished block 12 in 0.03 seconds, found 2 clusters.
[2026-01-20 20:47:22,721] INFO spindle_dev.index:  Using epsilon-net clustering for block 13
[2026-01-20 20:47:22,755] INFO spindle_dev.index:  Finished block 13 in 0.03 seconds, found 2 clusters.
[2026-01-20 20:47:22,755] INFO spindle_dev.index:  Using epsilon-net clustering for block 14
[2026-01-20 20:47:22,790] INFO spindle_dev.index:  Finished block 14 in 0.04 seconds, found 1 clusters.
[2026-01-20 20:47:22,791] INFO spindle_dev.index:  Using epsilon-net clustering for block 15
[2026-01-20 20:47:22,816] INFO spindle_dev.index:  Finished block 15 in 0.03 seconds, found 1 clusters.
[2026-01-20 20:47:22,817] INFO spindle_dev.index:  Using epsilon-net clustering for block 16
[2026-01-20 20:47:22,843] INFO spindle_dev.index:  Finished block 16 in 0.03 seconds, found 1 clusters.
[2026-01-20 20:47:22,844] INFO spindle_dev.index:  Using epsilon-net clustering for block 17
[2026-01-20 20:47:22,869] INFO spindle_dev.index:  Finished block 17 in 0.03 seconds, found 1 clusters.
[2026-01-20 20:47:22,870] INFO spindle_dev.index:  Using epsilon-net clustering for block 18
[2026-01-20 20:47:23,328] INFO spindle_dev.index:  Finished block 18 in 0.46 seconds, found 58 clusters.
[2026-01-20 20:47:23,329] INFO spindle_dev.index:  Using epsilon-net clustering for block 19
[2026-01-20 20:47:23,365] INFO spindle_dev.index:  Finished block 19 in 0.04 seconds, found 1 clusters.
[2026-01-20 20:47:23,365] INFO spindle_dev.index:  Using epsilon-net clustering for block 20
[2026-01-20 20:47:23,442] INFO spindle_dev.index:  Finished block 20 in 0.08 seconds, found 3 clusters.
[2026-01-20 20:47:23,443] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters.
[2026-01-20 20:47:23,443] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by
[2026-01-20 20:47:23,443] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them?
[2026-01-20 20:47:23,443] INFO spindle_dev.index: We will use triangle inequality to order clusters.
[2026-01-20 20:47:23,444] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs.
[2026-01-20 20:47:23,451] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list
[2026-01-20 20:47:23,452] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances.
[2026-01-20 20:47:23,452] INFO spindle_dev.index: Processing cluster 3
[2026-01-20 20:47:23,452] INFO spindle_dev.index: Building SPD index with epsilon=6.034811805274417
[2026-01-20 20:47:23,453] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices.
[2026-01-20 20:47:23,475] INFO spindle_dev.index: Cluster 3: 270 SPDs, 24 blocks
[2026-01-20 20:47:23,476] INFO spindle_dev.index:  Using epsilon-net clustering for block 0
[2026-01-20 20:47:23,509] INFO spindle_dev.index:  Finished block 0 in 0.03 seconds, found 7 clusters.
[2026-01-20 20:47:23,510] INFO spindle_dev.index:  Using epsilon-net clustering for block 1
[2026-01-20 20:47:23,559] INFO spindle_dev.index:  Finished block 1 in 0.05 seconds, found 12 clusters.
[2026-01-20 20:47:23,560] INFO spindle_dev.index:  Using epsilon-net clustering for block 2
[2026-01-20 20:47:23,630] INFO spindle_dev.index:  Finished block 2 in 0.07 seconds, found 19 clusters.
[2026-01-20 20:47:23,631] INFO spindle_dev.index:  Using epsilon-net clustering for block 3
[2026-01-20 20:47:23,699] INFO spindle_dev.index:  Finished block 3 in 0.07 seconds, found 18 clusters.
[2026-01-20 20:47:23,699] INFO spindle_dev.index:  Using epsilon-net clustering for block 4
[2026-01-20 20:47:23,772] INFO spindle_dev.index:  Finished block 4 in 0.07 seconds, found 20 clusters.
[2026-01-20 20:47:23,773] INFO spindle_dev.index:  Using epsilon-net clustering for block 5
[2026-01-20 20:47:23,834] INFO spindle_dev.index:  Finished block 5 in 0.06 seconds, found 16 clusters.
[2026-01-20 20:47:23,835] INFO spindle_dev.index:  Using epsilon-net clustering for block 6
[2026-01-20 20:47:23,867] INFO spindle_dev.index:  Finished block 6 in 0.03 seconds, found 6 clusters.
[2026-01-20 20:47:23,867] INFO spindle_dev.index:  Using epsilon-net clustering for block 7
[2026-01-20 20:47:23,890] INFO spindle_dev.index:  Finished block 7 in 0.02 seconds, found 3 clusters.
[2026-01-20 20:47:23,890] INFO spindle_dev.index:  Using epsilon-net clustering for block 8
[2026-01-20 20:47:23,910] INFO spindle_dev.index:  Finished block 8 in 0.02 seconds, found 2 clusters.
[2026-01-20 20:47:23,910] INFO spindle_dev.index:  Using epsilon-net clustering for block 9
[2026-01-20 20:47:23,942] INFO spindle_dev.index:  Finished block 9 in 0.03 seconds, found 6 clusters.
[2026-01-20 20:47:23,943] INFO spindle_dev.index:  Using epsilon-net clustering for block 10
[2026-01-20 20:47:23,970] INFO spindle_dev.index:  Finished block 10 in 0.03 seconds, found 2 clusters.
[2026-01-20 20:47:23,971] INFO spindle_dev.index:  Using epsilon-net clustering for block 11
[2026-01-20 20:47:23,994] INFO spindle_dev.index:  Finished block 11 in 0.02 seconds, found 1 clusters.
[2026-01-20 20:47:23,995] INFO spindle_dev.index:  Using epsilon-net clustering for block 12
[2026-01-20 20:47:24,027] INFO spindle_dev.index:  Finished block 12 in 0.03 seconds, found 6 clusters.
[2026-01-20 20:47:24,028] INFO spindle_dev.index:  Using epsilon-net clustering for block 13
[2026-01-20 20:47:24,053] INFO spindle_dev.index:  Finished block 13 in 0.03 seconds, found 4 clusters.
[2026-01-20 20:47:24,054] INFO spindle_dev.index:  Using epsilon-net clustering for block 14
[2026-01-20 20:47:24,071] INFO spindle_dev.index:  Finished block 14 in 0.02 seconds, found 1 clusters.
[2026-01-20 20:47:24,071] INFO spindle_dev.index:  Using epsilon-net clustering for block 15
[2026-01-20 20:47:24,095] INFO spindle_dev.index:  Finished block 15 in 0.02 seconds, found 3 clusters.
[2026-01-20 20:47:24,096] INFO spindle_dev.index:  Using epsilon-net clustering for block 16
[2026-01-20 20:47:24,116] INFO spindle_dev.index:  Finished block 16 in 0.02 seconds, found 2 clusters.
[2026-01-20 20:47:24,116] INFO spindle_dev.index:  Using epsilon-net clustering for block 17
[2026-01-20 20:47:24,148] INFO spindle_dev.index:  Finished block 17 in 0.03 seconds, found 4 clusters.
[2026-01-20 20:47:24,148] INFO spindle_dev.index:  Using epsilon-net clustering for block 18
[2026-01-20 20:47:24,168] INFO spindle_dev.index:  Finished block 18 in 0.02 seconds, found 2 clusters.
[2026-01-20 20:47:24,168] INFO spindle_dev.index:  Using epsilon-net clustering for block 19
[2026-01-20 20:47:24,185] INFO spindle_dev.index:  Finished block 19 in 0.02 seconds, found 1 clusters.
[2026-01-20 20:47:24,186] INFO spindle_dev.index:  Using epsilon-net clustering for block 20
[2026-01-20 20:47:24,213] INFO spindle_dev.index:  Finished block 20 in 0.03 seconds, found 1 clusters.
[2026-01-20 20:47:24,213] INFO spindle_dev.index:  Using epsilon-net clustering for block 21
[2026-01-20 20:47:24,231] INFO spindle_dev.index:  Finished block 21 in 0.02 seconds, found 1 clusters.
[2026-01-20 20:47:24,231] INFO spindle_dev.index:  Using epsilon-net clustering for block 22
[2026-01-20 20:47:24,248] INFO spindle_dev.index:  Finished block 22 in 0.02 seconds, found 1 clusters.
[2026-01-20 20:47:24,248] INFO spindle_dev.index:  Using epsilon-net clustering for block 23
[2026-01-20 20:47:24,380] INFO spindle_dev.index:  Finished block 23 in 0.13 seconds, found 11 clusters.
[2026-01-20 20:47:24,380] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters.
[2026-01-20 20:47:24,380] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by
[2026-01-20 20:47:24,381] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them?
[2026-01-20 20:47:24,381] INFO spindle_dev.index: We will use triangle inequality to order clusters.
[2026-01-20 20:47:24,381] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs.
[2026-01-20 20:47:24,389] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list
[2026-01-20 20:47:24,389] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances.
[2026-01-20 20:47:24,389] INFO spindle_dev.index: Processing cluster 4
[2026-01-20 20:47:24,390] INFO spindle_dev.index: Building SPD index with epsilon=7.355224122437038
[2026-01-20 20:47:24,390] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices.
[2026-01-20 20:47:24,411] INFO spindle_dev.index: Cluster 4: 266 SPDs, 23 blocks
[2026-01-20 20:47:24,412] INFO spindle_dev.index:  Using epsilon-net clustering for block 0
[2026-01-20 20:47:24,434] INFO spindle_dev.index:  Finished block 0 in 0.02 seconds, found 2 clusters.
[2026-01-20 20:47:24,434] INFO spindle_dev.index:  Using epsilon-net clustering for block 1
[2026-01-20 20:47:24,458] INFO spindle_dev.index:  Finished block 1 in 0.02 seconds, found 4 clusters.
[2026-01-20 20:47:24,459] INFO spindle_dev.index:  Using epsilon-net clustering for block 2
[2026-01-20 20:47:24,486] INFO spindle_dev.index:  Finished block 2 in 0.03 seconds, found 5 clusters.
[2026-01-20 20:47:24,486] INFO spindle_dev.index:  Using epsilon-net clustering for block 3
[2026-01-20 20:47:24,513] INFO spindle_dev.index:  Finished block 3 in 0.03 seconds, found 5 clusters.
[2026-01-20 20:47:24,513] INFO spindle_dev.index:  Using epsilon-net clustering for block 4
[2026-01-20 20:47:24,543] INFO spindle_dev.index:  Finished block 4 in 0.03 seconds, found 6 clusters.
[2026-01-20 20:47:24,544] INFO spindle_dev.index:  Using epsilon-net clustering for block 5
[2026-01-20 20:47:24,577] INFO spindle_dev.index:  Finished block 5 in 0.03 seconds, found 7 clusters.
[2026-01-20 20:47:24,578] INFO spindle_dev.index:  Using epsilon-net clustering for block 6
[2026-01-20 20:47:24,612] INFO spindle_dev.index:  Finished block 6 in 0.03 seconds, found 7 clusters.
[2026-01-20 20:47:24,613] INFO spindle_dev.index:  Using epsilon-net clustering for block 7
[2026-01-20 20:47:24,644] INFO spindle_dev.index:  Finished block 7 in 0.03 seconds, found 6 clusters.
[2026-01-20 20:47:24,644] INFO spindle_dev.index:  Using epsilon-net clustering for block 8
[2026-01-20 20:47:24,673] INFO spindle_dev.index:  Finished block 8 in 0.03 seconds, found 5 clusters.
[2026-01-20 20:47:24,673] INFO spindle_dev.index:  Using epsilon-net clustering for block 9
[2026-01-20 20:47:24,710] INFO spindle_dev.index:  Finished block 9 in 0.04 seconds, found 7 clusters.
[2026-01-20 20:47:24,711] INFO spindle_dev.index:  Using epsilon-net clustering for block 10
[2026-01-20 20:47:24,736] INFO spindle_dev.index:  Finished block 10 in 0.03 seconds, found 4 clusters.
[2026-01-20 20:47:24,736] INFO spindle_dev.index:  Using epsilon-net clustering for block 11
[2026-01-20 20:47:24,759] INFO spindle_dev.index:  Finished block 11 in 0.02 seconds, found 3 clusters.
[2026-01-20 20:47:24,759] INFO spindle_dev.index:  Using epsilon-net clustering for block 12
[2026-01-20 20:47:24,788] INFO spindle_dev.index:  Finished block 12 in 0.03 seconds, found 5 clusters.
[2026-01-20 20:47:24,788] INFO spindle_dev.index:  Using epsilon-net clustering for block 13
[2026-01-20 20:47:24,810] INFO spindle_dev.index:  Finished block 13 in 0.02 seconds, found 2 clusters.
[2026-01-20 20:47:24,810] INFO spindle_dev.index:  Using epsilon-net clustering for block 14
[2026-01-20 20:47:24,839] INFO spindle_dev.index:  Finished block 14 in 0.03 seconds, found 2 clusters.
[2026-01-20 20:47:24,839] INFO spindle_dev.index:  Using epsilon-net clustering for block 15
[2026-01-20 20:47:24,862] INFO spindle_dev.index:  Finished block 15 in 0.02 seconds, found 3 clusters.
[2026-01-20 20:47:24,862] INFO spindle_dev.index:  Using epsilon-net clustering for block 16
[2026-01-20 20:47:24,879] INFO spindle_dev.index:  Finished block 16 in 0.02 seconds, found 1 clusters.
[2026-01-20 20:47:24,879] INFO spindle_dev.index:  Using epsilon-net clustering for block 17
[2026-01-20 20:47:24,898] INFO spindle_dev.index:  Finished block 17 in 0.02 seconds, found 2 clusters.
[2026-01-20 20:47:24,899] INFO spindle_dev.index:  Using epsilon-net clustering for block 18
[2026-01-20 20:47:25,013] INFO spindle_dev.index:  Finished block 18 in 0.11 seconds, found 9 clusters.
[2026-01-20 20:47:25,014] INFO spindle_dev.index:  Using epsilon-net clustering for block 19
[2026-01-20 20:47:25,043] INFO spindle_dev.index:  Finished block 19 in 0.03 seconds, found 3 clusters.
[2026-01-20 20:47:25,043] INFO spindle_dev.index:  Using epsilon-net clustering for block 20
[2026-01-20 20:47:25,069] INFO spindle_dev.index:  Finished block 20 in 0.03 seconds, found 4 clusters.
[2026-01-20 20:47:25,070] INFO spindle_dev.index:  Using epsilon-net clustering for block 21
[2026-01-20 20:47:25,114] INFO spindle_dev.index:  Finished block 21 in 0.04 seconds, found 5 clusters.
[2026-01-20 20:47:25,114] INFO spindle_dev.index:  Using epsilon-net clustering for block 22
[2026-01-20 20:47:25,141] INFO spindle_dev.index:  Finished block 22 in 0.03 seconds, found 2 clusters.
[2026-01-20 20:47:25,142] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters.
[2026-01-20 20:47:25,142] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by
[2026-01-20 20:47:25,142] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them?
[2026-01-20 20:47:25,143] INFO spindle_dev.index: We will use triangle inequality to order clusters.
[2026-01-20 20:47:25,143] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs.
[2026-01-20 20:47:25,149] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list
[2026-01-20 20:47:25,149] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances.
[2026-01-20 20:47:25,150] INFO spindle_dev.index: Processing cluster 5
[2026-01-20 20:47:25,150] INFO spindle_dev.index: Building SPD index with epsilon=7.572297895556834
[2026-01-20 20:47:25,150] INFO spindle_dev.index: Step 1: Cluster blocks within each class of SPD matrices.
[2026-01-20 20:47:25,160] INFO spindle_dev.index: Cluster 5: 162 SPDs, 20 blocks
[2026-01-20 20:47:25,161] INFO spindle_dev.index:  Using epsilon-net clustering for block 0
[2026-01-20 20:47:25,173] INFO spindle_dev.index:  Finished block 0 in 0.01 seconds, found 2 clusters.
[2026-01-20 20:47:25,173] INFO spindle_dev.index:  Using epsilon-net clustering for block 1
[2026-01-20 20:47:25,187] INFO spindle_dev.index:  Finished block 1 in 0.01 seconds, found 3 clusters.
[2026-01-20 20:47:25,187] INFO spindle_dev.index:  Using epsilon-net clustering for block 2
[2026-01-20 20:47:25,202] INFO spindle_dev.index:  Finished block 2 in 0.02 seconds, found 4 clusters.
[2026-01-20 20:47:25,202] INFO spindle_dev.index:  Using epsilon-net clustering for block 3
[2026-01-20 20:47:25,216] INFO spindle_dev.index:  Finished block 3 in 0.01 seconds, found 3 clusters.
[2026-01-20 20:47:25,216] INFO spindle_dev.index:  Using epsilon-net clustering for block 4
[2026-01-20 20:47:25,231] INFO spindle_dev.index:  Finished block 4 in 0.02 seconds, found 4 clusters.
[2026-01-20 20:47:25,232] INFO spindle_dev.index:  Using epsilon-net clustering for block 5
[2026-01-20 20:47:25,247] INFO spindle_dev.index:  Finished block 5 in 0.02 seconds, found 3 clusters.
[2026-01-20 20:47:25,247] INFO spindle_dev.index:  Using epsilon-net clustering for block 6
[2026-01-20 20:47:25,263] INFO spindle_dev.index:  Finished block 6 in 0.02 seconds, found 4 clusters.
[2026-01-20 20:47:25,263] INFO spindle_dev.index:  Using epsilon-net clustering for block 7
[2026-01-20 20:47:25,277] INFO spindle_dev.index:  Finished block 7 in 0.01 seconds, found 3 clusters.
[2026-01-20 20:47:25,277] INFO spindle_dev.index:  Using epsilon-net clustering for block 8
[2026-01-20 20:47:25,291] INFO spindle_dev.index:  Finished block 8 in 0.01 seconds, found 3 clusters.
[2026-01-20 20:47:25,292] INFO spindle_dev.index:  Using epsilon-net clustering for block 9
[2026-01-20 20:47:25,306] INFO spindle_dev.index:  Finished block 9 in 0.01 seconds, found 3 clusters.
[2026-01-20 20:47:25,306] INFO spindle_dev.index:  Using epsilon-net clustering for block 10
[2026-01-20 20:47:25,330] INFO spindle_dev.index:  Finished block 10 in 0.02 seconds, found 5 clusters.
[2026-01-20 20:47:25,330] INFO spindle_dev.index:  Using epsilon-net clustering for block 11
[2026-01-20 20:47:25,347] INFO spindle_dev.index:  Finished block 11 in 0.02 seconds, found 2 clusters.
[2026-01-20 20:47:25,348] INFO spindle_dev.index:  Using epsilon-net clustering for block 12
[2026-01-20 20:47:25,390] INFO spindle_dev.index:  Finished block 12 in 0.04 seconds, found 2 clusters.
[2026-01-20 20:47:25,391] INFO spindle_dev.index:  Using epsilon-net clustering for block 13
[2026-01-20 20:47:25,409] INFO spindle_dev.index:  Finished block 13 in 0.02 seconds, found 2 clusters.
[2026-01-20 20:47:25,409] INFO spindle_dev.index:  Using epsilon-net clustering for block 14
[2026-01-20 20:47:25,422] INFO spindle_dev.index:  Finished block 14 in 0.01 seconds, found 2 clusters.
[2026-01-20 20:47:25,422] INFO spindle_dev.index:  Using epsilon-net clustering for block 15
[2026-01-20 20:47:25,435] INFO spindle_dev.index:  Finished block 15 in 0.01 seconds, found 2 clusters.
[2026-01-20 20:47:25,435] INFO spindle_dev.index:  Using epsilon-net clustering for block 16
[2026-01-20 20:47:25,464] INFO spindle_dev.index:  Finished block 16 in 0.03 seconds, found 2 clusters.
[2026-01-20 20:47:25,464] INFO spindle_dev.index:  Using epsilon-net clustering for block 17
[2026-01-20 20:47:25,482] INFO spindle_dev.index:  Finished block 17 in 0.02 seconds, found 4 clusters.
[2026-01-20 20:47:25,483] INFO spindle_dev.index:  Using epsilon-net clustering for block 18
[2026-01-20 20:47:25,513] INFO spindle_dev.index:  Finished block 18 in 0.03 seconds, found 2 clusters.
[2026-01-20 20:47:25,514] INFO spindle_dev.index:  Using epsilon-net clustering for block 19
[2026-01-20 20:47:25,534] INFO spindle_dev.index:  Finished block 19 in 0.02 seconds, found 4 clusters.
[2026-01-20 20:47:25,535] INFO spindle_dev.index: Step 2: Build DAG connections between block clusters.
[2026-01-20 20:47:25,535] INFO spindle_dev.index: Step 2.1: For each layer order the block-clusters by
[2026-01-20 20:47:25,535] INFO spindle_dev.index: Not implemented: ordering block-clusters ? How to order them?
[2026-01-20 20:47:25,535] INFO spindle_dev.index: We will use triangle inequality to order clusters.
[2026-01-20 20:47:25,536] INFO spindle_dev.index: Step 2.2: Connect block-clusters between layers based on co-occurrence in SPDs.
[2026-01-20 20:47:25,539] INFO spindle_dev.index: Check if node global_node_id matches index in nodes list
[2026-01-20 20:47:25,539] INFO spindle_dev.index: Step 2.3: Ordering block-clusters within each layer using log-Euclidean distances.
In [65]:
# Create colors for clusters
import numpy as np
import matplotlib.pyplot as plt
n_clusters = len(set(data.labels))
# Use Scanpy's default_20 palette for consistency
palette = sc.pl.palettes.default_20
cluster_colors = [palette[lab] for lab in data.labels]
plt.scatter(data.latent['umap'][:,0], data.latent['umap'][:,1], s=6, lw=0, alpha=0.8, c=cluster_colors)
# Write label on the plot
# with backgrond circle for better visibility
# and bold font
for i in range(n_clusters):
    cluster_points = data.latent['umap'][data.labels == i]
    if len(cluster_points) == 0:
        continue
    x_mean = np.mean(cluster_points[:,0])
    y_mean = np.mean(cluster_points[:,1])
    plt.text(x_mean, y_mean, str(i), color='black', fontsize=10, ha='center', va='center', fontweight='bold',
             bbox=dict(facecolor='white', edgecolor='black', boxstyle='circle,pad=0.5', alpha=0.7))
# take away top and right border
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# remove axis ticks
ax.set_xticks([])
ax.set_yticks([])
#plt.savefig(f"{result_dir}/umap_plot_with_cluster_labels.png", dpi=300)
plt.show()
No description has been provided for this image
In [67]:
import matplotlib.pylab as plt
from matplotlib.patches import Rectangle    
from matplotlib.collections import PatchCollection
In [69]:
import numpy as np
palette = sc.pl.palettes.default_20
point_colors = [palette[lab] for lab in data.spot_label.values()]
labels = data.labels
indices = np.array([int(i) for i in data.spot_label.keys()])
coords = adata.obsm['spatial']
plt.scatter(coords[indices,0], coords[indices,1], s=1, c=point_colors, lw=0, alpha=0.8)
patches = []
for t in tiles:
    bbox = t.bbox if hasattr(t, "bbox") else t["bbox"]  # (xmin,ymin,xmax,ymax)
    x0, y0, x1, y1 = bbox
    patches.append(Rectangle((x0, y0), x1 - x0, y1 - y0, fill=False))
pc = PatchCollection(patches, match_original=True, linewidths=0.2, edgecolors='b', alpha=0.3)
# add collection
ax = plt.gca()
ax.add_collection(pc)
ax.invert_yaxis()
ax.axis('off')
#ax.set_aspect('equal')
#plt.savefig(f"{result_dir}/spatial_plot_with_cluster.png", dpi=300)
plt.show()
No description has been provided for this image
In [76]:
# Save spatial plot to results directory
# sc.pl.spatial(adata, color="Cluster", groups=["DCIS_1", "DCIS_2"], spot_size=12, frameon=False, 
#               save="_dcis_groups.png")
# This saves to scanpy's default figures directory

# Or save to a specific location:
import matplotlib.pyplot as plt
sc.pl.spatial(adata, color="Cluster", groups=["DCIS_1", "DCIS_2"], spot_size=12, frameon=False, show=False)
plt.savefig('/data/sarkar_lab/Projects/spindle_dev/results/hbreast_10X_wo_unlabeled/spatial_dcis_groups.png', dpi=300, bbox_inches='tight')
plt.show()
/tmp/ipykernel_101252/2522203223.py:8: FutureWarning: Use `squidpy.pl.spatial_scatter` instead.
  sc.pl.spatial(adata, color="Cluster", groups=["DCIS_1", "DCIS_2"], spot_size=12, frameon=False, show=False)
No description has been provided for this image
In [ ]:
import numpy as np
palette = sc.pl.palettes.default_20
point_colors = [palette[lab] for lab in data.spot_label.values()]
labels = data.labels
indices = np.array([int(i) for i in data.spot_label.keys()])
coords = adata.obsm['spatial']
# 
plt.scatter(coords[indices,0], coords[indices,1], s=1, c=point_colors, lw=0, alpha=0.8)
# patches = []
# for t in tiles:
#     bbox = t.bbox if hasattr(t, "bbox") else t["bbox"]  # (xmin,ymin,xmax,ymax)
#     x0, y0, x1, y1 = bbox
#     patches.append(Rectangle((x0, y0), x1 - x0, y1 - y0, fill=False))
# pc = PatchCollection(patches, match_original=True, linewidths=0.2, edgecolors='b', alpha=0.3)
# add collection
ax = plt.gca()
ax.add_collection(pc)
ax.invert_yaxis()
ax.axis('off')
#ax.set_aspect('equal')
#plt.savefig(f"{result_dir}/spatial_plot_with_cluster.png", dpi=300)
plt.show()
In [ ]:
# Number of nodes in dag_dict
In [ ]:
n_queries = 10
seed = 40
rng = np.random.default_rng(seed)
all_indices = np.arange(len(data.spd_matrices))
valid_clusters = list(dag_dict.keys())
mask = np.isin(data.labels, valid_clusters)
candidate_indices = all_indices[mask]

query_indices = rng.choice(candidate_indices, size=n_queries, replace=False)

dag_dict, stat, dist_list = index.index_spds(data, config=config)
gt_paths = test.create_ground_truth_paths(dag_dict)
query_matrices = [data.spd_matrices[i] for i in query_indices]
In [12]:
import spindle_dev.metrics as metrics
importlib.reload(metrics)
Out[12]:
<module 'spindle_dev.metrics' from '/data/sarkar_lab/Projects/spindle_dev/src/spindle_dev/metrics.py'>
In [14]:
gt_paths = test.create_ground_truth_paths(dag_dict)
[2026-01-20 17:50:11,714] INFO spindle_dev.test: Built ground-truth paths for 760 SPDs in cluster 0 across 22 blocks.
[2026-01-20 17:50:12,020] INFO spindle_dev.test: Built ground-truth paths for 515 SPDs in cluster 1 across 22 blocks.
[2026-01-20 17:50:12,258] INFO spindle_dev.test: Built ground-truth paths for 431 SPDs in cluster 2 across 23 blocks.
[2026-01-20 17:50:12,354] INFO spindle_dev.test: Built ground-truth paths for 260 SPDs in cluster 3 across 24 blocks.
[2026-01-20 17:50:12,386] INFO spindle_dev.test: Built ground-truth paths for 156 SPDs in cluster 4 across 19 blocks.
[2026-01-20 17:50:12,395] INFO spindle_dev.test: Built ground-truth paths for 66 SPDs in cluster 5 across 25 blocks.
In [20]:
seed = 40
n_queries_noise = 200
rng = np.random.default_rng(seed)
all_indices = np.arange(len(data.spd_matrices))
valid_clusters = list(dag_dict.keys())
mask = np.isin(data.labels, valid_clusters)
candidate_indices = all_indices[mask]
query_indices = rng.choice(candidate_indices, size=n_queries_noise, replace=False)

# Create noisy query matrices
query_matrices_clean = [data.spd_matrices[i] for i in query_indices]
In [21]:
import tqdm
In [22]:
# catche eigen value eigen vectors for reuse
# --- cache eigendecompositions for the clean query matrices ---
def _as_array(matrix):
    return np.asarray(matrix, dtype=np.float64)
eig_floor = 1e-8  # same clamp you used before

query_cache = []
for j, A in tqdm.tqdm(enumerate(query_matrices_clean)):
    A = _as_array(A)
    A = 0.5 * (A + A.T)
    w, V = np.linalg.eigh(A)
    w = np.maximum(w, eig_floor)
    query_cache.append({
        "idx": int(query_indices[j]),   # original index in data.spd_matrices (optional)
        "V": V,                         # eigenvectors
        "logw": np.log(w),              # cached log-eigenvalues
        "p": A.shape[0],
    })
200it [00:28,  6.96it/s]
In [23]:
def sample_noisy_from_cache_with_le(cache, noise_level=0.1, seed=None):
    rng = np.random.default_rng(seed)
    mats = []
    d_le = []
    for c in cache:
        eps = rng.normal(0.0, noise_level, size=c["logw"].shape)
        noisy_w = np.exp(c["logw"] + eps)
        A_noisy = c["V"] @ (noisy_w[:, None] * c["V"].T)  # V diag(noisy_w) V^T
        A_noisy = 0.5 * (A_noisy + A_noisy.T)

        p = c["p"]
        dist = float(np.linalg.norm(eps) / np.sqrt(p))

        mats.append(A_noisy)
        d_le.append(dist)

    return mats, np.array(d_le, dtype=float)
In [24]:
true_clusters = [int(data.labels[i]) for i in query_indices]
In [27]:
true_paths = [gt_paths.get(true_cluster, {}).get(qid) for qid, true_cluster in zip(query_indices, true_clusters)]
In [35]:
# Test robustness with noisy query matrices
# Add varying levels of noise and see if we can still retrieve the correct cluster

noise_levels = [0.0, 0.05, 0.1, 0.2, 0.5, 1.0]
n_queries_noise = 10
search_cfg = search.SearchConfig(max_results=2, debug=False, max_failed_starts=5, max_failed_paths=10)
results_by_noise = []

for noise_level in noise_levels:
    print(f"\nTesting with noise level: {noise_level}")
    noise_start = time.time()
    
    # Generate noisy matrices
    noise_gen_start = time.time()
    if noise_level > 0:
        query_matrices_noisy, le_distances = sample_noisy_from_cache_with_le(
            query_cache, noise_level=noise_level, seed=seed
        )
        print(f"  Generated LE distances - min: {le_distances.min():.4f}, max: {le_distances.max():.4f}, mean: {le_distances.mean():.4f}")
    else:
        query_matrices_noisy = query_matrices_clean
        le_distances = np.array([0.0] * len(query_matrices_clean))
    noise_gen_time = time.time() - noise_gen_start
    print(f"  Time to generate noisy matrices: {noise_gen_time:.3f}s")
    
    # Predict clusters using noisy queries
    cluster_pred_start = time.time()
    predicted_clusters = search.assign_clusters_to_new_spds(query_matrices_noisy, data)
    cluster_pred_time = time.time() - cluster_pred_start
    print(f"  Time to predict clusters: {cluster_pred_time:.3f}s")

    # Search index for each query
    search_start = time.time()
    matched_list = []
    matched_leaf_list = []
    budget_list = []
    matched_budget_list = []
    
    for j, cluster_id in enumerate(predicted_clusters):
        true_cluster = true_clusters[j]
        gt_path = true_paths[j]
        
        index_handle = dag_dict[cluster_id]
        num_blocks = len(index_handle.sorted_blocks)
        epsilon = config.epsilon_dict[cluster_id]
        budget = float(epsilon) * float(num_blocks) * 2

        q_spd = query_matrices_noisy[j]
        perm = data.perm_list[cluster_id]
        q_spd_perm = q_spd[np.ix_(perm, perm)]
        query_block_runs = data.block_dict[cluster_id]
        num_blocks = len(query_block_runs) if query_block_runs is not None else 1
        results = search.search_index(
                index_handle,
                q_spd_perm,
                [],
                query_block_runs,
                budget,
                config=search_cfg,
            )
        
        if cluster_id == true_cluster:
            matched = False
            matched_leaf = False
            matched_budget = None
            for path in results.paths:
                if path.node_path == gt_path:
                    matched = True
                    matched_leaf = True
                    matched_budget = path.total_distance
                    break

            # If no exact path match, see if any result shares the same leaf.
            if not matched:
                gt_leaf_node = gt_path[-1]
                for path in results.paths:
                    if path.node_path and path.node_path[-1] == gt_leaf_node:
                        matched_leaf = True
                        matched_budget = path.total_distance
                        break
        else:
            budget_used = results.paths[0].total_distance if results.paths else None
            matched = False
            matched_leaf = False
            matched_budget = budget_used
        matched_list += [matched]
        matched_leaf_list += [matched_leaf]
        budget_list += [budget]
        matched_budget_list += [matched_budget]
    
    search_time = time.time() - search_start
    print(f"  Time to search index: {search_time:.3f}s ({search_time/len(predicted_clusters):.4f}s per query)")
    
    # Store individual results for each query
    for q_idx, le_dist, true_clust, pred_clust, matched, matched_leaf, budget, matched_budget in zip(
        query_indices, le_distances, true_clusters, predicted_clusters, 
        matched_list, matched_leaf_list, budget_list, matched_budget_list
    ):
        results_by_noise.append({
            'noise_level': noise_level,
            'query_index': int(q_idx),
            'le_distance': float(le_dist),
            'true_cluster': true_clust,
            'predicted_cluster': pred_clust,
            'correct': true_clust == pred_clust,
            "matched": matched,
            "matched_leaf": matched_leaf,
            "budget": matched_budget,
            "query_budget": budget
        })
    
    # Print summary for this noise level
    correct = sum(1 for t, p in zip(true_clusters, predicted_clusters) if t == p)
    accuracy = correct / len(true_clusters)
    avg_le_distance = float(le_distances.mean())
    
    total_time = time.time() - noise_start
    print(f"  Accuracy: {accuracy:.3f} ({correct}/{len(true_clusters)})")
    print(f"  Avg LE distance: {avg_le_distance:.4f}")
    print(f"  Total time for noise level: {total_time:.3f}s")
Testing with noise level: 0.0
  Time to generate noisy matrices: 0.000s
  Time to predict clusters: 5.870s
  Time to search index: 25.987s (0.1299s per query)
  Accuracy: 0.990 (198/200)
  Avg LE distance: 0.0000
  Total time for noise level: 31.859s

Testing with noise level: 0.05
  Generated LE distances - min: 0.0444, max: 0.0566, mean: 0.0499
  Time to generate noisy matrices: 3.989s
  Time to predict clusters: 6.112s
  Time to search index: 24.988s (0.1249s per query)
  Accuracy: 0.990 (198/200)
  Avg LE distance: 0.0499
  Total time for noise level: 35.091s

Testing with noise level: 0.1
  Generated LE distances - min: 0.0888, max: 0.1131, mean: 0.0998
  Time to generate noisy matrices: 9.051s
  Time to predict clusters: 7.363s
  Time to search index: 24.335s (0.1217s per query)
  Accuracy: 0.990 (198/200)
  Avg LE distance: 0.0998
  Total time for noise level: 40.750s

Testing with noise level: 0.2
  Generated LE distances - min: 0.1776, max: 0.2263, mean: 0.1997
  Time to generate noisy matrices: 1.333s
  Time to predict clusters: 1.867s
  Time to search index: 25.278s (0.1264s per query)
  Accuracy: 0.980 (196/200)
  Avg LE distance: 0.1997
  Total time for noise level: 28.480s

Testing with noise level: 0.5
  Generated LE distances - min: 0.4441, max: 0.5657, mean: 0.4992
  Time to generate noisy matrices: 1.669s
  Time to predict clusters: 1.884s
  Time to search index: 23.828s (0.1191s per query)
  Accuracy: 0.900 (180/200)
  Avg LE distance: 0.4992
  Total time for noise level: 27.382s

Testing with noise level: 1.0
  Generated LE distances - min: 0.8882, max: 1.1315, mean: 0.9985
  Time to generate noisy matrices: 5.120s
  Time to predict clusters: 1.718s
  Time to search index: 17.994s (0.0900s per query)
  Accuracy: 0.665 (133/200)
  Avg LE distance: 0.9985
  Total time for noise level: 24.833s
In [ ]:
 
In [36]:
# Compile results into DataFrame
results_df = pd.DataFrame(results_by_noise)
In [37]:
results_df
Out[37]:
noise_level query_index le_distance true_cluster predicted_cluster correct matched matched_leaf budget query_budget
0 0.0 501 0.000000 0 0 True True True 67.695243 320.917748
1 0.0 845 0.000000 0 0 True True True 70.041807 320.917748
2 0.0 1281 0.000000 1 1 True True True 72.495370 283.410554
3 0.0 1394 0.000000 1 1 True True True 63.988327 283.410554
4 0.0 2036 0.000000 3 3 True True True 93.941560 350.702153
... ... ... ... ... ... ... ... ... ... ...
1195 1.0 269 1.011084 0 0 True True True 84.290473 320.917748
1196 1.0 670 0.967996 0 2 False False False 78.951327 330.083902
1197 1.0 626 0.953492 4 2 False False False 93.235828 330.083902
1198 1.0 1874 1.026542 3 1 False False False 106.153718 283.410554
1199 1.0 1315 0.974375 2 2 True True True 77.521555 330.083902

1200 rows × 10 columns

In [53]:
# Simple bar plot: accuracy fraction by noise level
import matplotlib.pyplot as plt
import seaborn as sns

# Calculate accuracy for each noise level
accuracy_summary = results_df.groupby('noise_level')['correct'].mean().reset_index()
accuracy_summary.columns = ['noise_level', 'accuracy']

plt.figure(figsize=(4, 5))

# Use categorical positions for bars
x_pos = range(len(accuracy_summary))
plt.bar(x_pos, accuracy_summary['accuracy'], 
        alpha=0.7, color='steelblue', edgecolor='black', linewidth=1)
sns.barplot(data=accuracy_summary, x='noise_level', y='accuracy')
plt.xticks(x_pos, accuracy_summary['noise_level'])
plt.xlabel('Noise Level', fontsize=12)
plt.ylabel('Accuracy (Fraction Correct)', fontsize=12)
plt.title('')
plt.ylim(0, 1)
#plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
sns.despine()

plt.savefig('/data/sarkar_lab/Projects/spindle_dev/results/plots/cross_cluster_noise.png', dpi=300, bbox_inches='tight')
plt.show()
No description has been provided for this image
In [44]:
print("\nAccuracy by Noise Level:")
print(accuracy_summary.to_string(index=False))
Accuracy by Noise Level:
 noise_level  accuracy
        0.00     0.990
        0.05     0.990
        0.10     0.990
        0.20     0.980
        0.50     0.900
        1.00     0.665
In [38]:
# Find out for how many case true cluster is found but matched_leaf is false 
results_df.loc[results_df['correct'] & ~results_df['matched_leaf']]
Out[38]:
noise_level query_index le_distance true_cluster predicted_cluster correct matched matched_leaf budget query_budget
In [ ]:
# Create linearized DataFrame with individual query results
noise_df = pd.DataFrame(results_by_noise)
print(f"Total records: {len(noise_df)}")
print(f"\nSummary by noise level:")
summary = noise_df.groupby('noise_level').agg({
    'correct': ['sum', 'count', 'mean'],
    'le_distance': ['mean', 'std']
}).round(4)
summary.columns = ['correct', 'total', 'accuracy', 'avg_le_distance', 'std_le_distance']
print(summary)
noise_df.head(15)
In [ ]:
# Plot accuracy vs LE distance
import matplotlib.pyplot as plt

# Aggregate for plotting
plot_df = noise_df.groupby('noise_level').agg({
    'correct': 'mean',
    'le_distance': ['mean', 'std']
}).reset_index()
plot_df.columns = ['noise_level', 'accuracy', 'avg_le_distance', 'std_le_distance']

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: Accuracy vs Noise Level
ax1.plot(plot_df['noise_level'], plot_df['accuracy'], marker='o', linewidth=2, markersize=8, color='steelblue')
ax1.set_xlabel('Noise Level (std dev in log-eigenvalue space)', fontsize=11)
ax1.set_ylabel('Cluster Assignment Accuracy', fontsize=11)
ax1.set_title('Accuracy vs Noise Level', fontsize=12, fontweight='bold')
ax1.grid(True, alpha=0.3)
ax1.set_ylim([0, 1.05])

# Plot 2: Accuracy vs Average LE Distance
ax2.errorbar(plot_df['avg_le_distance'], plot_df['accuracy'], 
             xerr=plot_df['std_le_distance'], 
             marker='o', linewidth=2, markersize=8, capsize=5, color='darkorange')
ax2.set_xlabel('Average LE Distance from Original', fontsize=11)
ax2.set_ylabel('Cluster Assignment Accuracy', fontsize=11)
ax2.set_title('Accuracy vs LE Distance', fontsize=12, fontweight='bold')
ax2.grid(True, alpha=0.3)
ax2.set_ylim([0, 1.05])

plt.tight_layout()
plt.show()
In [ ]:
seed = 40
rng = np.random.default_rng(seed)
all_indices = np.arange(len(data.spd_matrices))
valid_clusters = list(dag_dict.keys())
mask = np.isin(data.labels, valid_clusters)
candidate_indices = all_indices[mask]

query_indices = rng.choice(candidate_indices, size=n_queries, replace=False)

dag_dict, stat, dist_list = index.index_spds(data, config=config)
gt_paths = test.create_ground_truth_paths(dag_dict)
query_matrices = [data.spd_matrices[i] for i in query_indices]



true_clusters = [int(data.labels[i]) for i in query_indices]
predicted_clusters = search.assign_clusters_to_new_spds(query_matrices, data)
predicted_df = pd.DataFrame({'True Cluster': true_clusters, 'Predicted Cluster': predicted_clusters})
print(f"Done with search")
return predicted_df