spatialcheckpoint 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. spatialcheckpoint-0.1.0/PKG-INFO +462 -0
  2. spatialcheckpoint-0.1.0/README.md +420 -0
  3. spatialcheckpoint-0.1.0/pyproject.toml +65 -0
  4. spatialcheckpoint-0.1.0/setup.cfg +4 -0
  5. spatialcheckpoint-0.1.0/src/spatialcheckpoint/__init__.py +39 -0
  6. spatialcheckpoint-0.1.0/src/spatialcheckpoint/analysis/__init__.py +1 -0
  7. spatialcheckpoint-0.1.0/src/spatialcheckpoint/analysis/colocalization.py +516 -0
  8. spatialcheckpoint-0.1.0/src/spatialcheckpoint/analysis/domain_annotation.py +472 -0
  9. spatialcheckpoint-0.1.0/src/spatialcheckpoint/analysis/gradient.py +211 -0
  10. spatialcheckpoint-0.1.0/src/spatialcheckpoint/analysis/spatial_expression.py +320 -0
  11. spatialcheckpoint-0.1.0/src/spatialcheckpoint/analysis/spatial_features.py +673 -0
  12. spatialcheckpoint-0.1.0/src/spatialcheckpoint/cli.py +270 -0
  13. spatialcheckpoint-0.1.0/src/spatialcheckpoint/configs/checkpoint_panel.yaml +80 -0
  14. spatialcheckpoint-0.1.0/src/spatialcheckpoint/configs/spatial_datasets.yaml +204 -0
  15. spatialcheckpoint-0.1.0/src/spatialcheckpoint/data/__init__.py +1 -0
  16. spatialcheckpoint-0.1.0/src/spatialcheckpoint/data/download.py +702 -0
  17. spatialcheckpoint-0.1.0/src/spatialcheckpoint/data/loader.py +199 -0
  18. spatialcheckpoint-0.1.0/src/spatialcheckpoint/data/preprocess.py +506 -0
  19. spatialcheckpoint-0.1.0/src/spatialcheckpoint/model/__init__.py +1 -0
  20. spatialcheckpoint-0.1.0/src/spatialcheckpoint/model/archetype_discovery.py +545 -0
  21. spatialcheckpoint-0.1.0/src/spatialcheckpoint/model/classifier.py +726 -0
  22. spatialcheckpoint-0.1.0/src/spatialcheckpoint/model/explainer.py +136 -0
  23. spatialcheckpoint-0.1.0/src/spatialcheckpoint/model/trainer.py +136 -0
  24. spatialcheckpoint-0.1.0/src/spatialcheckpoint/utils/__init__.py +1 -0
  25. spatialcheckpoint-0.1.0/src/spatialcheckpoint/utils/gene_sets.py +171 -0
  26. spatialcheckpoint-0.1.0/src/spatialcheckpoint/utils/metrics.py +233 -0
  27. spatialcheckpoint-0.1.0/src/spatialcheckpoint/validation/__init__.py +1 -0
  28. spatialcheckpoint-0.1.0/src/spatialcheckpoint/validation/bulk_mapping.py +90 -0
  29. spatialcheckpoint-0.1.0/src/spatialcheckpoint/validation/clinical_association.py +825 -0
  30. spatialcheckpoint-0.1.0/src/spatialcheckpoint/visualization/__init__.py +1 -0
  31. spatialcheckpoint-0.1.0/src/spatialcheckpoint/visualization/paper_figures.py +1489 -0
  32. spatialcheckpoint-0.1.0/src/spatialcheckpoint/visualization/spatial_plots.py +227 -0
  33. spatialcheckpoint-0.1.0/src/spatialcheckpoint.egg-info/PKG-INFO +462 -0
  34. spatialcheckpoint-0.1.0/src/spatialcheckpoint.egg-info/SOURCES.txt +39 -0
  35. spatialcheckpoint-0.1.0/src/spatialcheckpoint.egg-info/dependency_links.txt +1 -0
  36. spatialcheckpoint-0.1.0/src/spatialcheckpoint.egg-info/entry_points.txt +2 -0
  37. spatialcheckpoint-0.1.0/src/spatialcheckpoint.egg-info/requires.txt +27 -0
  38. spatialcheckpoint-0.1.0/src/spatialcheckpoint.egg-info/top_level.txt +1 -0
  39. spatialcheckpoint-0.1.0/tests/test_analysis.py +193 -0
  40. spatialcheckpoint-0.1.0/tests/test_data.py +126 -0
  41. spatialcheckpoint-0.1.0/tests/test_model.py +316 -0
@@ -0,0 +1,462 @@
1
+ Metadata-Version: 2.4
2
+ Name: spatialcheckpoint
3
+ Version: 0.1.0
4
+ Summary: Spatial heterogeneity profiling of immune checkpoints in spatial transcriptomics
5
+ License-Expression: MIT
6
+ Keywords: spatial transcriptomics,immune checkpoint,bioinformatics,single-cell,machine learning
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
14
+ Requires-Python: >=3.10
15
+ Description-Content-Type: text/markdown
16
+ Requires-Dist: scanpy>=1.10
17
+ Requires-Dist: squidpy>=1.4
18
+ Requires-Dist: anndata>=0.10
19
+ Requires-Dist: pandas>=2.0
20
+ Requires-Dist: numpy>=1.24
21
+ Requires-Dist: scikit-learn>=1.4
22
+ Requires-Dist: lightgbm>=4.0
23
+ Requires-Dist: xgboost>=2.0
24
+ Requires-Dist: shap>=0.45
25
+ Requires-Dist: lifelines>=0.28
26
+ Requires-Dist: matplotlib>=3.8
27
+ Requires-Dist: seaborn>=0.13
28
+ Requires-Dist: pyyaml>=6.0
29
+ Requires-Dist: typer>=0.9
30
+ Requires-Dist: rich>=13.0
31
+ Requires-Dist: tqdm>=4.66
32
+ Requires-Dist: optuna>=3.0
33
+ Requires-Dist: scipy>=1.11
34
+ Requires-Dist: imbalanced-learn>=0.11
35
+ Requires-Dist: requests>=2.28
36
+ Provides-Extra: dev
37
+ Requires-Dist: pytest; extra == "dev"
38
+ Requires-Dist: pytest-cov; extra == "dev"
39
+ Requires-Dist: ruff; extra == "dev"
40
+ Requires-Dist: build; extra == "dev"
41
+ Requires-Dist: twine; extra == "dev"
42
+
43
+ # SpatialCheckpoint
44
+
45
+ [![PyPI version](https://img.shields.io/pypi/v/spatialcheckpoint.svg)](https://pypi.org/project/spatialcheckpoint/)
46
+ [![Python](https://img.shields.io/pypi/pyversions/spatialcheckpoint.svg)](https://pypi.org/project/spatialcheckpoint/)
47
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
48
+
49
+ **Spatial heterogeneity profiling of immune checkpoints in spatial transcriptomics data.**
50
+
51
+ SpatialCheckpoint is a bioinformatics pipeline that integrates spatial gene expression profiling, consensus clustering, ensemble ML classification, SHAP interpretability, and clinical survival analysis to characterize immune checkpoint heterogeneity across the tumor microenvironment.
52
+
53
+ ---
54
+
55
+ ## Features
56
+
57
+ - **Spatial profiling** — region-based checkpoint expression across tumor core, invasive margin, stroma, and immune-enriched zones
58
+ - **80+ spatial features** — co-localization scores, spatial gradients, Moran's I autocorrelation, region ratios
59
+ - **Archetype discovery** — consensus KMeans + NMF across 6 fixed immune archetypes
60
+ - **Ensemble classification** — LightGBM + XGBoost + MLP + Random Forest with SMOTE and Optuna HPO
61
+ - **SHAP interpretability** — global and per-class feature importance
62
+ - **Clinical associations** — Kaplan-Meier curves, Cox proportional hazards, logistic regression on OS/PFS
63
+ - **Bundled gene panel** — 44 curated immune checkpoint genes across 6 functional categories
64
+
65
+ ---
66
+
67
+ ## Installation
68
+
69
+ ```bash
70
+ pip install spatialcheckpoint
71
+ ```
72
+
73
+ For development:
74
+
75
+ ```bash
76
+ git clone https://github.com/yourorg/SpatialCheckpoint.git
77
+ cd SpatialCheckpoint
78
+ pip install -e ".[dev]"
79
+ ```
80
+
81
+ **Requirements:** Python ≥ 3.10
82
+
83
+ ---
84
+
85
+ ## Quick Start
86
+
87
+ ### 5-Minute Demo (Synthetic Data)
88
+
89
+ The following demo runs entirely on synthetic data — no real Visium files required.
90
+
91
+ ```python
92
+ import numpy as np
93
+ import pandas as pd
94
+ import scanpy as sc
95
+ import spatialcheckpoint as scp
96
+
97
+ print(f"SpatialCheckpoint v{scp.__version__}")
98
+
99
+ # ── 1. Gene panel ────────────────────────────────────────────────────────────
100
+ genes = scp.get_all_checkpoint_genes()
101
+ print(f"Checkpoint panel: {len(genes)} genes")
102
+ print(f" e.g. {genes[:5]}")
103
+
104
+ pd1_pathway = scp.get_category_genes("co_inhibitory_receptors")
105
+ print(f"PD-1 pathway genes: {pd1_pathway}")
106
+
107
+ # ── 2. Synthetic Visium slide ────────────────────────────────────────────────
108
+ rng = np.random.default_rng(42)
109
+ n_spots, n_genes = 200, 100
110
+ checkpoint_genes_subset = genes[:8]
111
+ random_genes = [f"GENE{i:04d}" for i in range(n_genes - len(checkpoint_genes_subset))]
112
+ all_genes = random_genes + checkpoint_genes_subset
113
+
114
+ X = rng.negative_binomial(n=2, p=0.5, size=(n_spots, n_genes)).astype(float)
115
+ adata = sc.AnnData(X=X)
116
+ adata.var_names = pd.Index(all_genes)
117
+
118
+ # Spatial coordinates (20 × 10 grid)
119
+ gx, gy = np.meshgrid(np.arange(20), np.arange(10))
120
+ coords = np.column_stack([gx.ravel(), gy.ravel()]).astype(float)
121
+ coords += rng.uniform(-0.1, 0.1, size=coords.shape)
122
+ adata.obsm["spatial"] = coords
123
+
124
+ # Region annotations
125
+ regions = ["tumor_core", "invasive_margin", "stroma", "immune_enriched", "necrotic"]
126
+ region_list = []
127
+ for x, y in coords:
128
+ if x < 5 and y < 5: region_list.append("tumor_core")
129
+ elif x < 10 and y < 8: region_list.append("invasive_margin")
130
+ elif x >= 15: region_list.append("immune_enriched")
131
+ elif y >= 8: region_list.append("necrotic")
132
+ else: region_list.append("stroma")
133
+ adata.obs["region_type"] = pd.Categorical(region_list, categories=regions)
134
+
135
+ # ── 3. Spatial feature extraction ───────────────────────────────────────────
136
+ engineer = scp.SpatialFeatureEngineer(adata, checkpoint_genes_subset)
137
+ features = engineer.extract_all_features(sample_id="demo_sample")
138
+ print(f"\nFeature matrix: {features.shape[0]} samples × {features.shape[1]} features")
139
+ print(f" Feature columns (first 5): {list(features.columns[:5])}")
140
+
141
+ # ── 4. Archetype discovery ───────────────────────────────────────────────────
142
+ # Build a multi-sample feature matrix (simulate 30 samples)
143
+ n_samples, n_feats = 30, features.shape[1]
144
+ feat_data = rng.standard_normal((n_samples, n_feats))
145
+ sample_ids = [f"sample_{i:03d}" for i in range(n_samples)]
146
+ feature_matrix = pd.DataFrame(feat_data, index=sample_ids, columns=features.columns)
147
+
148
+ cancer_types = rng.choice(["BRCA", "CRC", "NSCLC"], size=n_samples)
149
+ metadata = pd.DataFrame({"cancer_type": cancer_types}, index=sample_ids)
150
+
151
+ discovery = scp.SpatialArchetypeDiscovery(feature_matrix, metadata)
152
+ result = discovery.consensus_clustering(k_range=(2, 5), n_iterations=30)
153
+
154
+ print(f"\nConsensus clustering:")
155
+ print(f" Optimal k = {result['optimal_k']}")
156
+ print(f" Label distribution: {dict(pd.Series(result['labels']).value_counts())}")
157
+
158
+ char_df = discovery.characterize_archetypes(result["labels"])
159
+ print(f"\nArchetype characterization:")
160
+ print(char_df[["archetype_name", "n_samples"]].to_string())
161
+
162
+ # ── 5. NMF soft membership ───────────────────────────────────────────────────
163
+ nmf_result = discovery.run_nmf(k=result["optimal_k"])
164
+ print(f"\nNMF decomposition:")
165
+ print(f" W (membership weights): {nmf_result['W'].shape}")
166
+ print(f" H (archetype profiles): {nmf_result['H'].shape}")
167
+ print(f" Explained variance: {nmf_result['explained_variance']:.3f}")
168
+ ```
169
+
170
+ ---
171
+
172
+ ### Python API
173
+
174
+ #### 1. Data Preprocessing
175
+
176
+ ```python
177
+ import spatialcheckpoint as scp
178
+
179
+ # From Space Ranger output directory
180
+ preprocessor = scp.SpatialDataPreprocessor(spaceranger_out_path="path/to/spaceranger/output")
181
+ adata = preprocessor.load_visium()
182
+ adata = preprocessor.quality_control(adata, min_genes=200, max_mt_pct=25.0)
183
+ adata = preprocessor.normalize(adata)
184
+ adata.write_h5ad("data/processed/sample01_preprocessed.h5ad")
185
+
186
+ # Or from an existing H5AD
187
+ preprocessor = scp.SpatialDataPreprocessor(h5_path="existing_data.h5ad")
188
+ ```
189
+
190
+ #### 2. Load & Cache
191
+
192
+ ```python
193
+ loader = scp.SpatialDataLoader(processed_dir="data/processed/")
194
+ adata = loader.load("sample01") # returns cached .h5ad if present
195
+ ```
196
+
197
+ #### 3. Checkpoint Profiling
198
+
199
+ ```python
200
+ genes = scp.get_all_checkpoint_genes() # 44 genes, 6 functional categories
201
+
202
+ profiler = scp.SpatialCheckpointProfiler(adata, genes)
203
+ region_expr = profiler.expression_by_region() # DataFrame: region × gene
204
+ hotspots = profiler.checkpoint_hotspot_detection() # Moran's I per gene
205
+ ```
206
+
207
+ #### 4. Spatial Feature Engineering
208
+
209
+ ```python
210
+ engineer = scp.SpatialFeatureEngineer(adata, genes)
211
+ features = engineer.extract_all_features(sample_id="sample01")
212
+ # → DataFrame with 80+ columns: co-localization, gradients, Moran's I, region ratios
213
+ ```
214
+
215
+ #### 5. Co-localization Analysis
216
+
217
+ ```python
218
+ lr_pairs = scp.get_ligand_receptor_pairs() # [{ligand, receptor, alias}]
219
+ analyzer = scp.CheckpointColocalizationAnalyzer(adata, genes)
220
+ coloc_df = analyzer.compute_colocalization()
221
+ ```
222
+
223
+ #### 6. Archetype Discovery
224
+
225
+ ```python
226
+ # feature_matrix: DataFrame (n_samples × n_features)
227
+ # sample_metadata: DataFrame with 'cancer_type' column, same index as feature_matrix
228
+ discovery = scp.SpatialArchetypeDiscovery(feature_matrix, sample_metadata)
229
+
230
+ cc = discovery.consensus_clustering(k_range=(2, 8), n_iterations=100)
231
+ labels = cc["labels"] # integer cluster labels
232
+ char = discovery.characterize_archetypes(labels) # archetype names, top features
233
+
234
+ nmf = discovery.run_nmf(k=cc["optimal_k"])
235
+ # nmf["W"] → (n_samples, k) soft membership weights
236
+ # nmf["H"] → (k, n_features) archetype profiles
237
+ ```
238
+
239
+ #### 7. Train the Ensemble Classifier
240
+
241
+ ```python
242
+ trainer = scp.ArchetypeModelTrainer(
243
+ feature_matrix=feature_matrix,
244
+ archetype_labels=labels,
245
+ output_dir="models/",
246
+ )
247
+ results = trainer.run(n_optuna_trials=30)
248
+ # results["model"] → trained ensemble
249
+ # results["test_metrics"] → accuracy, F1, AUC
250
+ ```
251
+
252
+ #### 8. SHAP Explanations
253
+
254
+ ```python
255
+ explainer = scp.ArchetypeExplainer(results["model"], feature_matrix)
256
+ shap_df = explainer.global_feature_importance() # DataFrame: feature × archetype
257
+ ```
258
+
259
+ ---
260
+
261
+ ### CLI
262
+
263
+ ```bash
264
+ # Download a registered dataset
265
+ spatialcheckpoint download BRCA_visium_10x
266
+
267
+ # Download all BRCA datasets
268
+ spatialcheckpoint download all --cancer-type BRCA
269
+
270
+ # Preprocess raw Visium output or H5AD
271
+ spatialcheckpoint preprocess path/to/spaceranger/ data/processed/
272
+ spatialcheckpoint preprocess sample.h5ad data/processed/
273
+
274
+ # Run full spatial analysis on a preprocessed sample
275
+ spatialcheckpoint analyze sample01
276
+
277
+ # Discover archetypes from a feature matrix CSV
278
+ spatialcheckpoint discover results/sample01/features.csv --k-min 2 --k-max 8
279
+
280
+ # Train the archetype classifier
281
+ spatialcheckpoint classify features.csv archetype_labels.csv --model-dir models/
282
+
283
+ # Generate publication figures (requires prior analyze run)
284
+ spatialcheckpoint figures --results-dir results/ --output-dir paper/figures/
285
+ ```
286
+
287
+ ---
288
+
289
+ ## Gene Panel
290
+
291
+ The bundled panel covers **44 genes** across 6 functional categories:
292
+
293
+ | Category | Genes (examples) |
294
+ |----------|-----------------|
295
+ | Co-inhibitory receptors | `PDCD1` (PD-1), `CTLA4`, `LAG3`, `HAVCR2` (TIM-3), `TIGIT` |
296
+ | Co-inhibitory ligands | `CD274` (PD-L1), `PDCD1LG2` (PD-L2), `LGALS9` (Galectin-9) |
297
+ | Novel checkpoints | `VSIR` (VISTA), `CD276` (B7-H3), `VTCN1` (B7-H4) |
298
+ | Innate checkpoints | `CD47`, `SIRPA`, `LILRB1`, `LILRB2` |
299
+ | Immune enzymes | `IDO1`, `ENTPD1` (CD39), `NT5E` (CD73), `ARG1` |
300
+ | Co-stimulatory reference | `CD28`, `ICOS`, `TNFRSF4` (OX40), `TNFRSF9` (4-1BB) |
301
+
302
+ ```python
303
+ import spatialcheckpoint as scp
304
+
305
+ all_genes = scp.get_all_checkpoint_genes() # 44 genes sorted
306
+ pd1_pathway = scp.get_category_genes("co_inhibitory_receptors") # 9 genes
307
+ cell_markers = scp.get_immune_cell_markers() # {cell_type: [genes]}
308
+ lr_pairs = scp.get_ligand_receptor_pairs() # [{ligand, receptor, alias}]
309
+ ```
310
+
311
+ ---
312
+
313
+ ## Archetypes
314
+
315
+ Six fixed spatial immune archetypes are inferred by consensus clustering:
316
+
317
+ | Archetype | Spatial signature |
318
+ |-----------|------------------|
319
+ | `Checkpoint-Hot` | High checkpoint expression, high immune infiltration, strong spatial co-localization |
320
+ | `Checkpoint-Cold` | Low checkpoint and immune activity throughout the tissue |
321
+ | `Checkpoint-Excluded` | Checkpoint expression concentrated at invasive margin; immune cells at periphery |
322
+ | `Checkpoint-Mismatch` | Checkpoint and immune signals spatially separated (non-overlapping) |
323
+ | `Innate-Dominant` | CD47/SIRPα axis dominant over adaptive checkpoints |
324
+ | `Novel-Enriched` | VISTA / B7-H3 / B7-H4 enriched over canonical PD-1/PD-L1 axis |
325
+
326
+ ---
327
+
328
+ ## Pipeline Architecture
329
+
330
+ ```
331
+ Raw Visium data (Space Ranger dir or H5AD)
332
+ → SpatialDataPreprocessor QC, normalize → 'counts' / 'log1p' layers
333
+ → SpatialDataLoader cache-aware H5AD loader
334
+ → SpatialCheckpointProfiler region-based expression
335
+ (tumor_core, invasive_margin, stroma,
336
+ immune_enriched, necrotic)
337
+ → SpatialFeatureEngineer 80+ features per slide:
338
+ co-localization, gradients, Moran's I,
339
+ region expression ratios
340
+ → SpatialArchetypeDiscovery consensus KMeans + delta-area k-selection
341
+ + NMF soft membership
342
+ → ArchetypeModelTrainer LightGBM + XGBoost + MLP + RF ensemble,
343
+ SMOTE oversampling, RFECV feature selection,
344
+ Optuna hyperparameter optimization
345
+ → ArchetypeExplainer SHAP global / per-class feature importance
346
+ → ClinicalAssociationAnalyzer KM curves, Cox PH, logistic regression (OS/PFS)
347
+ → Visualization spatial plots, publication-ready figures
348
+ ```
349
+
350
+ **Key data contracts:**
351
+ - Spatial coordinates in `adata.obsm['spatial']`
352
+ - Region annotations in `adata.obs['region_type']` (categorical)
353
+ - Preprocessed files: `data/processed/{sample_id}_preprocessed.h5ad`
354
+
355
+ ---
356
+
357
+ ## Output Files
358
+
359
+ | Path | Contents |
360
+ |------|----------|
361
+ | `results/{sample_id}/features.csv` | 80+ spatial features |
362
+ | `results/{sample_id}/region_expression.csv` | Region × gene expression stats |
363
+ | `results/{sample_id}/hotspots.csv` | Moran's I per gene |
364
+ | `results/{sample_id}/colocalization.csv` | Ligand-receptor co-occurrence |
365
+ | `results/archetypes/archetype_labels.csv` | Sample → archetype assignment |
366
+ | `results/archetypes/archetype_characteristics.csv` | Per-archetype feature profiles |
367
+ | `results/archetypes/nmf_W.csv`, `nmf_H.csv` | NMF basis / coefficient matrices |
368
+ | `models/archetype_classifier.joblib` | Serialized ensemble model |
369
+ | `paper/figures/` | Publication-ready PDF/PNG plots |
370
+ | `paper/tables/` | Feature importance and archetype CSV tables |
371
+
372
+ ---
373
+
374
+ ## API Reference
375
+
376
+ ### Gene Set Utilities
377
+
378
+ | Function | Description |
379
+ |----------|-------------|
380
+ | `get_all_checkpoint_genes()` | Sorted list of 44 checkpoint gene symbols |
381
+ | `get_category_genes(category)` | Genes for a specific functional category |
382
+ | `get_immune_cell_markers()` | `{cell_type: [genes]}` reference marker dictionary |
383
+ | `get_ligand_receptor_pairs()` | List of `{ligand, receptor, alias}` pairs |
384
+
385
+ ### Core Classes
386
+
387
+ | Class | Module | Purpose |
388
+ |-------|--------|---------|
389
+ | `SpatialDataPreprocessor` | `data.preprocess` | QC, normalize, dual-input (Space Ranger or H5AD) |
390
+ | `SpatialDataLoader` | `data.loader` | Cache-aware loader for preprocessed H5ADs |
391
+ | `SpatialCheckpointProfiler` | `analysis.spatial_expression` | Region-based expression, hotspot detection |
392
+ | `SpatialFeatureEngineer` | `analysis.spatial_features` | 80+ spatial feature extraction |
393
+ | `CheckpointColocalizationAnalyzer` | `analysis.colocalization` | Ligand-receptor spatial co-occurrence |
394
+ | `SpatialArchetypeDiscovery` | `model.archetype_discovery` | Consensus clustering + NMF |
395
+ | `SpatialArchetypeClassifier` | `model.classifier` | Ensemble classifier (LGBM+XGB+MLP+RF) |
396
+ | `ArchetypeModelTrainer` | `model.trainer` | Full train pipeline with HPO |
397
+ | `ArchetypeExplainer` | `model.explainer` | SHAP global/per-class importance |
398
+
399
+ ---
400
+
401
+ ## Development
402
+
403
+ ```bash
404
+ # Clone and install in dev mode
405
+ git clone https://github.com/yourorg/SpatialCheckpoint.git
406
+ cd SpatialCheckpoint
407
+ pip install -e ".[dev]"
408
+
409
+ # Run tests (uses synthetic fixtures — no real data needed)
410
+ pytest tests/ -v
411
+
412
+ # Lint
413
+ ruff check src/
414
+ ```
415
+
416
+ ### Testing with synthetic data
417
+
418
+ All tests use synthetic fixtures from `tests/conftest.py`. No real Visium files are required:
419
+
420
+ ```python
421
+ # 200-spot × 100-gene AnnData with spatial coords and region labels
422
+ # 50-sample × 80-feature DataFrame
423
+ # Clinical data with OS, PFS, ICI response
424
+ ```
425
+
426
+ ---
427
+
428
+ ## Dependencies
429
+
430
+ Core: `scanpy`, `squidpy`, `anndata`, `pandas`, `numpy`, `scipy`, `scikit-learn`
431
+
432
+ ML: `lightgbm`, `xgboost`, `shap`, `imbalanced-learn`, `optuna`
433
+
434
+ Stats: `lifelines`
435
+
436
+ Viz: `matplotlib`, `seaborn`
437
+
438
+ CLI: `typer`, `rich`
439
+
440
+ Heavy dependencies (`squidpy`, `lightgbm`, `xgboost`, `lifelines`, `optuna`, `shap`, `imbalanced-learn`) are imported with `try/except` fallbacks — partial functionality is available even when these are not installed.
441
+
442
+ ---
443
+
444
+ ## Citation
445
+
446
+ If you use SpatialCheckpoint in your research, please cite:
447
+
448
+ ```bibtex
449
+ @article{spatialcheckpoint2025,
450
+ title = {SpatialCheckpoint: Spatial heterogeneity profiling of immune checkpoints
451
+ in spatial transcriptomics},
452
+ author = {},
453
+ journal = {},
454
+ year = {2025},
455
+ }
456
+ ```
457
+
458
+ ---
459
+
460
+ ## License
461
+
462
+ MIT License — see [LICENSE](LICENSE) for details.