spatialcheckpoint 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spatialcheckpoint-0.1.0/PKG-INFO +462 -0
- spatialcheckpoint-0.1.0/README.md +420 -0
- spatialcheckpoint-0.1.0/pyproject.toml +65 -0
- spatialcheckpoint-0.1.0/setup.cfg +4 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/__init__.py +39 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/analysis/__init__.py +1 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/analysis/colocalization.py +516 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/analysis/domain_annotation.py +472 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/analysis/gradient.py +211 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/analysis/spatial_expression.py +320 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/analysis/spatial_features.py +673 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/cli.py +270 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/configs/checkpoint_panel.yaml +80 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/configs/spatial_datasets.yaml +204 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/data/__init__.py +1 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/data/download.py +702 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/data/loader.py +199 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/data/preprocess.py +506 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/model/__init__.py +1 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/model/archetype_discovery.py +545 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/model/classifier.py +726 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/model/explainer.py +136 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/model/trainer.py +136 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/utils/__init__.py +1 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/utils/gene_sets.py +171 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/utils/metrics.py +233 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/validation/__init__.py +1 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/validation/bulk_mapping.py +90 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/validation/clinical_association.py +825 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/visualization/__init__.py +1 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/visualization/paper_figures.py +1489 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint/visualization/spatial_plots.py +227 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint.egg-info/PKG-INFO +462 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint.egg-info/SOURCES.txt +39 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint.egg-info/dependency_links.txt +1 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint.egg-info/entry_points.txt +2 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint.egg-info/requires.txt +27 -0
- spatialcheckpoint-0.1.0/src/spatialcheckpoint.egg-info/top_level.txt +1 -0
- spatialcheckpoint-0.1.0/tests/test_analysis.py +193 -0
- spatialcheckpoint-0.1.0/tests/test_data.py +126 -0
- spatialcheckpoint-0.1.0/tests/test_model.py +316 -0
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: spatialcheckpoint
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Spatial heterogeneity profiling of immune checkpoints in spatial transcriptomics
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Keywords: spatial transcriptomics,immune checkpoint,bioinformatics,single-cell,machine learning
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Intended Audience :: Science/Research
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
Requires-Dist: scanpy>=1.10
|
|
17
|
+
Requires-Dist: squidpy>=1.4
|
|
18
|
+
Requires-Dist: anndata>=0.10
|
|
19
|
+
Requires-Dist: pandas>=2.0
|
|
20
|
+
Requires-Dist: numpy>=1.24
|
|
21
|
+
Requires-Dist: scikit-learn>=1.4
|
|
22
|
+
Requires-Dist: lightgbm>=4.0
|
|
23
|
+
Requires-Dist: xgboost>=2.0
|
|
24
|
+
Requires-Dist: shap>=0.45
|
|
25
|
+
Requires-Dist: lifelines>=0.28
|
|
26
|
+
Requires-Dist: matplotlib>=3.8
|
|
27
|
+
Requires-Dist: seaborn>=0.13
|
|
28
|
+
Requires-Dist: pyyaml>=6.0
|
|
29
|
+
Requires-Dist: typer>=0.9
|
|
30
|
+
Requires-Dist: rich>=13.0
|
|
31
|
+
Requires-Dist: tqdm>=4.66
|
|
32
|
+
Requires-Dist: optuna>=3.0
|
|
33
|
+
Requires-Dist: scipy>=1.11
|
|
34
|
+
Requires-Dist: imbalanced-learn>=0.11
|
|
35
|
+
Requires-Dist: requests>=2.28
|
|
36
|
+
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: pytest; extra == "dev"
|
|
38
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
39
|
+
Requires-Dist: ruff; extra == "dev"
|
|
40
|
+
Requires-Dist: build; extra == "dev"
|
|
41
|
+
Requires-Dist: twine; extra == "dev"
|
|
42
|
+
|
|
43
|
+
# SpatialCheckpoint
|
|
44
|
+
|
|
45
|
+
[](https://pypi.org/project/spatialcheckpoint/)
|
|
46
|
+
[](https://pypi.org/project/spatialcheckpoint/)
|
|
47
|
+
[](LICENSE)
|
|
48
|
+
|
|
49
|
+
**Spatial heterogeneity profiling of immune checkpoints in spatial transcriptomics data.**
|
|
50
|
+
|
|
51
|
+
SpatialCheckpoint is a bioinformatics pipeline that integrates spatial gene expression profiling, consensus clustering, ensemble ML classification, SHAP interpretability, and clinical survival analysis to characterize immune checkpoint heterogeneity across the tumor microenvironment.
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Features
|
|
56
|
+
|
|
57
|
+
- **Spatial profiling** — region-based checkpoint expression across tumor core, invasive margin, stroma, and immune-enriched zones
|
|
58
|
+
- **80+ spatial features** — co-localization scores, spatial gradients, Moran's I autocorrelation, region ratios
|
|
59
|
+
- **Archetype discovery** — consensus KMeans + NMF across 6 fixed immune archetypes
|
|
60
|
+
- **Ensemble classification** — LightGBM + XGBoost + MLP + Random Forest with SMOTE and Optuna HPO
|
|
61
|
+
- **SHAP interpretability** — global and per-class feature importance
|
|
62
|
+
- **Clinical associations** — Kaplan-Meier curves, Cox proportional hazards, logistic regression on OS/PFS
|
|
63
|
+
- **Bundled gene panel** — 44 curated immune checkpoint genes across 6 functional categories
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Installation
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install spatialcheckpoint
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
For development:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
git clone https://github.com/yourorg/SpatialCheckpoint.git
|
|
77
|
+
cd SpatialCheckpoint
|
|
78
|
+
pip install -e ".[dev]"
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
**Requirements:** Python ≥ 3.10
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## Quick Start
|
|
86
|
+
|
|
87
|
+
### 5-Minute Demo (Synthetic Data)
|
|
88
|
+
|
|
89
|
+
The following demo runs entirely on synthetic data — no real Visium files required.
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
import numpy as np
|
|
93
|
+
import pandas as pd
|
|
94
|
+
import scanpy as sc
|
|
95
|
+
import spatialcheckpoint as scp
|
|
96
|
+
|
|
97
|
+
print(f"SpatialCheckpoint v{scp.__version__}")
|
|
98
|
+
|
|
99
|
+
# ── 1. Gene panel ────────────────────────────────────────────────────────────
|
|
100
|
+
genes = scp.get_all_checkpoint_genes()
|
|
101
|
+
print(f"Checkpoint panel: {len(genes)} genes")
|
|
102
|
+
print(f" e.g. {genes[:5]}")
|
|
103
|
+
|
|
104
|
+
pd1_pathway = scp.get_category_genes("co_inhibitory_receptors")
|
|
105
|
+
print(f"PD-1 pathway genes: {pd1_pathway}")
|
|
106
|
+
|
|
107
|
+
# ── 2. Synthetic Visium slide ────────────────────────────────────────────────
|
|
108
|
+
rng = np.random.default_rng(42)
|
|
109
|
+
n_spots, n_genes = 200, 100
|
|
110
|
+
checkpoint_genes_subset = genes[:8]
|
|
111
|
+
random_genes = [f"GENE{i:04d}" for i in range(n_genes - len(checkpoint_genes_subset))]
|
|
112
|
+
all_genes = random_genes + checkpoint_genes_subset
|
|
113
|
+
|
|
114
|
+
X = rng.negative_binomial(n=2, p=0.5, size=(n_spots, n_genes)).astype(float)
|
|
115
|
+
adata = sc.AnnData(X=X)
|
|
116
|
+
adata.var_names = pd.Index(all_genes)
|
|
117
|
+
|
|
118
|
+
# Spatial coordinates (20 × 10 grid)
|
|
119
|
+
gx, gy = np.meshgrid(np.arange(20), np.arange(10))
|
|
120
|
+
coords = np.column_stack([gx.ravel(), gy.ravel()]).astype(float)
|
|
121
|
+
coords += rng.uniform(-0.1, 0.1, size=coords.shape)
|
|
122
|
+
adata.obsm["spatial"] = coords
|
|
123
|
+
|
|
124
|
+
# Region annotations
|
|
125
|
+
regions = ["tumor_core", "invasive_margin", "stroma", "immune_enriched", "necrotic"]
|
|
126
|
+
region_list = []
|
|
127
|
+
for x, y in coords:
|
|
128
|
+
if x < 5 and y < 5: region_list.append("tumor_core")
|
|
129
|
+
elif x < 10 and y < 8: region_list.append("invasive_margin")
|
|
130
|
+
elif x >= 15: region_list.append("immune_enriched")
|
|
131
|
+
elif y >= 8: region_list.append("necrotic")
|
|
132
|
+
else: region_list.append("stroma")
|
|
133
|
+
adata.obs["region_type"] = pd.Categorical(region_list, categories=regions)
|
|
134
|
+
|
|
135
|
+
# ── 3. Spatial feature extraction ───────────────────────────────────────────
|
|
136
|
+
engineer = scp.SpatialFeatureEngineer(adata, checkpoint_genes_subset)
|
|
137
|
+
features = engineer.extract_all_features(sample_id="demo_sample")
|
|
138
|
+
print(f"\nFeature matrix: {features.shape[0]} samples × {features.shape[1]} features")
|
|
139
|
+
print(f" Feature columns (first 5): {list(features.columns[:5])}")
|
|
140
|
+
|
|
141
|
+
# ── 4. Archetype discovery ───────────────────────────────────────────────────
|
|
142
|
+
# Build a multi-sample feature matrix (simulate 30 samples)
|
|
143
|
+
n_samples, n_feats = 30, features.shape[1]
|
|
144
|
+
feat_data = rng.standard_normal((n_samples, n_feats))
|
|
145
|
+
sample_ids = [f"sample_{i:03d}" for i in range(n_samples)]
|
|
146
|
+
feature_matrix = pd.DataFrame(feat_data, index=sample_ids, columns=features.columns)
|
|
147
|
+
|
|
148
|
+
cancer_types = rng.choice(["BRCA", "CRC", "NSCLC"], size=n_samples)
|
|
149
|
+
metadata = pd.DataFrame({"cancer_type": cancer_types}, index=sample_ids)
|
|
150
|
+
|
|
151
|
+
discovery = scp.SpatialArchetypeDiscovery(feature_matrix, metadata)
|
|
152
|
+
result = discovery.consensus_clustering(k_range=(2, 5), n_iterations=30)
|
|
153
|
+
|
|
154
|
+
print(f"\nConsensus clustering:")
|
|
155
|
+
print(f" Optimal k = {result['optimal_k']}")
|
|
156
|
+
print(f" Label distribution: {dict(pd.Series(result['labels']).value_counts())}")
|
|
157
|
+
|
|
158
|
+
char_df = discovery.characterize_archetypes(result["labels"])
|
|
159
|
+
print(f"\nArchetype characterization:")
|
|
160
|
+
print(char_df[["archetype_name", "n_samples"]].to_string())
|
|
161
|
+
|
|
162
|
+
# ── 5. NMF soft membership ───────────────────────────────────────────────────
|
|
163
|
+
nmf_result = discovery.run_nmf(k=result["optimal_k"])
|
|
164
|
+
print(f"\nNMF decomposition:")
|
|
165
|
+
print(f" W (membership weights): {nmf_result['W'].shape}")
|
|
166
|
+
print(f" H (archetype profiles): {nmf_result['H'].shape}")
|
|
167
|
+
print(f" Explained variance: {nmf_result['explained_variance']:.3f}")
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
### Python API
|
|
173
|
+
|
|
174
|
+
#### 1. Data Preprocessing
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
import spatialcheckpoint as scp
|
|
178
|
+
|
|
179
|
+
# From Space Ranger output directory
|
|
180
|
+
preprocessor = scp.SpatialDataPreprocessor(spaceranger_out_path="path/to/spaceranger/output")
|
|
181
|
+
adata = preprocessor.load_visium()
|
|
182
|
+
adata = preprocessor.quality_control(adata, min_genes=200, max_mt_pct=25.0)
|
|
183
|
+
adata = preprocessor.normalize(adata)
|
|
184
|
+
adata.write_h5ad("data/processed/sample01_preprocessed.h5ad")
|
|
185
|
+
|
|
186
|
+
# Or from an existing H5AD
|
|
187
|
+
preprocessor = scp.SpatialDataPreprocessor(h5_path="existing_data.h5ad")
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
#### 2. Load & Cache
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
loader = scp.SpatialDataLoader(processed_dir="data/processed/")
|
|
194
|
+
adata = loader.load("sample01") # returns cached .h5ad if present
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
#### 3. Checkpoint Profiling
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
genes = scp.get_all_checkpoint_genes() # 44 genes, 6 functional categories
|
|
201
|
+
|
|
202
|
+
profiler = scp.SpatialCheckpointProfiler(adata, genes)
|
|
203
|
+
region_expr = profiler.expression_by_region() # DataFrame: region × gene
|
|
204
|
+
hotspots = profiler.checkpoint_hotspot_detection() # Moran's I per gene
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
#### 4. Spatial Feature Engineering
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
engineer = scp.SpatialFeatureEngineer(adata, genes)
|
|
211
|
+
features = engineer.extract_all_features(sample_id="sample01")
|
|
212
|
+
# → DataFrame with 80+ columns: co-localization, gradients, Moran's I, region ratios
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
#### 5. Co-localization Analysis
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
lr_pairs = scp.get_ligand_receptor_pairs() # [{ligand, receptor, alias}]
|
|
219
|
+
analyzer = scp.CheckpointColocalizationAnalyzer(adata, genes)
|
|
220
|
+
coloc_df = analyzer.compute_colocalization()
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
#### 6. Archetype Discovery
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
# feature_matrix: DataFrame (n_samples × n_features)
|
|
227
|
+
# sample_metadata: DataFrame with 'cancer_type' column, same index as feature_matrix
|
|
228
|
+
discovery = scp.SpatialArchetypeDiscovery(feature_matrix, sample_metadata)
|
|
229
|
+
|
|
230
|
+
cc = discovery.consensus_clustering(k_range=(2, 8), n_iterations=100)
|
|
231
|
+
labels = cc["labels"] # integer cluster labels
|
|
232
|
+
char = discovery.characterize_archetypes(labels) # archetype names, top features
|
|
233
|
+
|
|
234
|
+
nmf = discovery.run_nmf(k=cc["optimal_k"])
|
|
235
|
+
# nmf["W"] → (n_samples, k) soft membership weights
|
|
236
|
+
# nmf["H"] → (k, n_features) archetype profiles
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
#### 7. Train the Ensemble Classifier
|
|
240
|
+
|
|
241
|
+
```python
|
|
242
|
+
trainer = scp.ArchetypeModelTrainer(
|
|
243
|
+
feature_matrix=feature_matrix,
|
|
244
|
+
archetype_labels=labels,
|
|
245
|
+
output_dir="models/",
|
|
246
|
+
)
|
|
247
|
+
results = trainer.run(n_optuna_trials=30)
|
|
248
|
+
# results["model"] → trained ensemble
|
|
249
|
+
# results["test_metrics"] → accuracy, F1, AUC
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
#### 8. SHAP Explanations
|
|
253
|
+
|
|
254
|
+
```python
|
|
255
|
+
explainer = scp.ArchetypeExplainer(results["model"], feature_matrix)
|
|
256
|
+
shap_df = explainer.global_feature_importance() # DataFrame: feature × archetype
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
### CLI
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
# Download a registered dataset
|
|
265
|
+
spatialcheckpoint download BRCA_visium_10x
|
|
266
|
+
|
|
267
|
+
# Download all BRCA datasets
|
|
268
|
+
spatialcheckpoint download all --cancer-type BRCA
|
|
269
|
+
|
|
270
|
+
# Preprocess raw Visium output or H5AD
|
|
271
|
+
spatialcheckpoint preprocess path/to/spaceranger/ data/processed/
|
|
272
|
+
spatialcheckpoint preprocess sample.h5ad data/processed/
|
|
273
|
+
|
|
274
|
+
# Run full spatial analysis on a preprocessed sample
|
|
275
|
+
spatialcheckpoint analyze sample01
|
|
276
|
+
|
|
277
|
+
# Discover archetypes from a feature matrix CSV
|
|
278
|
+
spatialcheckpoint discover results/sample01/features.csv --k-min 2 --k-max 8
|
|
279
|
+
|
|
280
|
+
# Train the archetype classifier
|
|
281
|
+
spatialcheckpoint classify features.csv archetype_labels.csv --model-dir models/
|
|
282
|
+
|
|
283
|
+
# Generate publication figures (requires prior analyze run)
|
|
284
|
+
spatialcheckpoint figures --results-dir results/ --output-dir paper/figures/
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
## Gene Panel
|
|
290
|
+
|
|
291
|
+
The bundled panel covers **44 genes** across 6 functional categories:
|
|
292
|
+
|
|
293
|
+
| Category | Genes (examples) |
|
|
294
|
+
|----------|-----------------|
|
|
295
|
+
| Co-inhibitory receptors | `PDCD1` (PD-1), `CTLA4`, `LAG3`, `HAVCR2` (TIM-3), `TIGIT` |
|
|
296
|
+
| Co-inhibitory ligands | `CD274` (PD-L1), `PDCD1LG2` (PD-L2), `LGALS9` (Galectin-9) |
|
|
297
|
+
| Novel checkpoints | `VSIR` (VISTA), `CD276` (B7-H3), `VTCN1` (B7-H4) |
|
|
298
|
+
| Innate checkpoints | `CD47`, `SIRPA`, `LILRB1`, `LILRB2` |
|
|
299
|
+
| Immune enzymes | `IDO1`, `ENTPD1` (CD39), `NT5E` (CD73), `ARG1` |
|
|
300
|
+
| Co-stimulatory reference | `CD28`, `ICOS`, `TNFRSF4` (OX40), `TNFRSF9` (4-1BB) |
|
|
301
|
+
|
|
302
|
+
```python
|
|
303
|
+
import spatialcheckpoint as scp
|
|
304
|
+
|
|
305
|
+
all_genes = scp.get_all_checkpoint_genes() # 44 genes sorted
|
|
306
|
+
pd1_pathway = scp.get_category_genes("co_inhibitory_receptors") # 9 genes
|
|
307
|
+
cell_markers = scp.get_immune_cell_markers() # {cell_type: [genes]}
|
|
308
|
+
lr_pairs = scp.get_ligand_receptor_pairs() # [{ligand, receptor, alias}]
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
---
|
|
312
|
+
|
|
313
|
+
## Archetypes
|
|
314
|
+
|
|
315
|
+
Six fixed spatial immune archetypes are inferred by consensus clustering:
|
|
316
|
+
|
|
317
|
+
| Archetype | Spatial signature |
|
|
318
|
+
|-----------|------------------|
|
|
319
|
+
| `Checkpoint-Hot` | High checkpoint expression, high immune infiltration, strong spatial co-localization |
|
|
320
|
+
| `Checkpoint-Cold` | Low checkpoint and immune activity throughout the tissue |
|
|
321
|
+
| `Checkpoint-Excluded` | Checkpoint expression concentrated at invasive margin; immune cells at periphery |
|
|
322
|
+
| `Checkpoint-Mismatch` | Checkpoint and immune signals spatially separated (non-overlapping) |
|
|
323
|
+
| `Innate-Dominant` | CD47/SIRPα axis dominant over adaptive checkpoints |
|
|
324
|
+
| `Novel-Enriched` | VISTA / B7-H3 / B7-H4 enriched over canonical PD-1/PD-L1 axis |
|
|
325
|
+
|
|
326
|
+
---
|
|
327
|
+
|
|
328
|
+
## Pipeline Architecture
|
|
329
|
+
|
|
330
|
+
```
|
|
331
|
+
Raw Visium data (Space Ranger dir or H5AD)
|
|
332
|
+
→ SpatialDataPreprocessor QC, normalize → 'counts' / 'log1p' layers
|
|
333
|
+
→ SpatialDataLoader cache-aware H5AD loader
|
|
334
|
+
→ SpatialCheckpointProfiler region-based expression
|
|
335
|
+
(tumor_core, invasive_margin, stroma,
|
|
336
|
+
immune_enriched, necrotic)
|
|
337
|
+
→ SpatialFeatureEngineer 80+ features per slide:
|
|
338
|
+
co-localization, gradients, Moran's I,
|
|
339
|
+
region expression ratios
|
|
340
|
+
→ SpatialArchetypeDiscovery consensus KMeans + delta-area k-selection
|
|
341
|
+
+ NMF soft membership
|
|
342
|
+
→ ArchetypeModelTrainer LightGBM + XGBoost + MLP + RF ensemble,
|
|
343
|
+
SMOTE oversampling, RFECV feature selection,
|
|
344
|
+
Optuna hyperparameter optimization
|
|
345
|
+
→ ArchetypeExplainer SHAP global / per-class feature importance
|
|
346
|
+
→ ClinicalAssociationAnalyzer KM curves, Cox PH, logistic regression (OS/PFS)
|
|
347
|
+
→ Visualization spatial plots, publication-ready figures
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
**Key data contracts:**
|
|
351
|
+
- Spatial coordinates in `adata.obsm['spatial']`
|
|
352
|
+
- Region annotations in `adata.obs['region_type']` (categorical)
|
|
353
|
+
- Preprocessed files: `data/processed/{sample_id}_preprocessed.h5ad`
|
|
354
|
+
|
|
355
|
+
---
|
|
356
|
+
|
|
357
|
+
## Output Files
|
|
358
|
+
|
|
359
|
+
| Path | Contents |
|
|
360
|
+
|------|----------|
|
|
361
|
+
| `results/{sample_id}/features.csv` | 80+ spatial features |
|
|
362
|
+
| `results/{sample_id}/region_expression.csv` | Region × gene expression stats |
|
|
363
|
+
| `results/{sample_id}/hotspots.csv` | Moran's I per gene |
|
|
364
|
+
| `results/{sample_id}/colocalization.csv` | Ligand-receptor co-occurrence |
|
|
365
|
+
| `results/archetypes/archetype_labels.csv` | Sample → archetype assignment |
|
|
366
|
+
| `results/archetypes/archetype_characteristics.csv` | Per-archetype feature profiles |
|
|
367
|
+
| `results/archetypes/nmf_W.csv`, `nmf_H.csv` | NMF basis / coefficient matrices |
|
|
368
|
+
| `models/archetype_classifier.joblib` | Serialized ensemble model |
|
|
369
|
+
| `paper/figures/` | Publication-ready PDF/PNG plots |
|
|
370
|
+
| `paper/tables/` | Feature importance and archetype CSV tables |
|
|
371
|
+
|
|
372
|
+
---
|
|
373
|
+
|
|
374
|
+
## API Reference
|
|
375
|
+
|
|
376
|
+
### Gene Set Utilities
|
|
377
|
+
|
|
378
|
+
| Function | Description |
|
|
379
|
+
|----------|-------------|
|
|
380
|
+
| `get_all_checkpoint_genes()` | Sorted list of 44 checkpoint gene symbols |
|
|
381
|
+
| `get_category_genes(category)` | Genes for a specific functional category |
|
|
382
|
+
| `get_immune_cell_markers()` | `{cell_type: [genes]}` reference marker dictionary |
|
|
383
|
+
| `get_ligand_receptor_pairs()` | List of `{ligand, receptor, alias}` pairs |
|
|
384
|
+
|
|
385
|
+
### Core Classes
|
|
386
|
+
|
|
387
|
+
| Class | Module | Purpose |
|
|
388
|
+
|-------|--------|---------|
|
|
389
|
+
| `SpatialDataPreprocessor` | `data.preprocess` | QC, normalize, dual-input (Space Ranger or H5AD) |
|
|
390
|
+
| `SpatialDataLoader` | `data.loader` | Cache-aware loader for preprocessed H5ADs |
|
|
391
|
+
| `SpatialCheckpointProfiler` | `analysis.spatial_expression` | Region-based expression, hotspot detection |
|
|
392
|
+
| `SpatialFeatureEngineer` | `analysis.spatial_features` | 80+ spatial feature extraction |
|
|
393
|
+
| `CheckpointColocalizationAnalyzer` | `analysis.colocalization` | Ligand-receptor spatial co-occurrence |
|
|
394
|
+
| `SpatialArchetypeDiscovery` | `model.archetype_discovery` | Consensus clustering + NMF |
|
|
395
|
+
| `SpatialArchetypeClassifier` | `model.classifier` | Ensemble classifier (LGBM+XGB+MLP+RF) |
|
|
396
|
+
| `ArchetypeModelTrainer` | `model.trainer` | Full train pipeline with HPO |
|
|
397
|
+
| `ArchetypeExplainer` | `model.explainer` | SHAP global/per-class importance |
|
|
398
|
+
|
|
399
|
+
---
|
|
400
|
+
|
|
401
|
+
## Development
|
|
402
|
+
|
|
403
|
+
```bash
|
|
404
|
+
# Clone and install in dev mode
|
|
405
|
+
git clone https://github.com/yourorg/SpatialCheckpoint.git
|
|
406
|
+
cd SpatialCheckpoint
|
|
407
|
+
pip install -e ".[dev]"
|
|
408
|
+
|
|
409
|
+
# Run tests (uses synthetic fixtures — no real data needed)
|
|
410
|
+
pytest tests/ -v
|
|
411
|
+
|
|
412
|
+
# Lint
|
|
413
|
+
ruff check src/
|
|
414
|
+
```
|
|
415
|
+
|
|
416
|
+
### Testing with synthetic data
|
|
417
|
+
|
|
418
|
+
All tests use synthetic fixtures from `tests/conftest.py`. No real Visium files are required:
|
|
419
|
+
|
|
420
|
+
```python
|
|
421
|
+
# 200-spot × 100-gene AnnData with spatial coords and region labels
|
|
422
|
+
# 50-sample × 80-feature DataFrame
|
|
423
|
+
# Clinical data with OS, PFS, ICI response
|
|
424
|
+
```
|
|
425
|
+
|
|
426
|
+
---
|
|
427
|
+
|
|
428
|
+
## Dependencies
|
|
429
|
+
|
|
430
|
+
Core: `scanpy`, `squidpy`, `anndata`, `pandas`, `numpy`, `scipy`, `scikit-learn`
|
|
431
|
+
|
|
432
|
+
ML: `lightgbm`, `xgboost`, `shap`, `imbalanced-learn`, `optuna`
|
|
433
|
+
|
|
434
|
+
Stats: `lifelines`
|
|
435
|
+
|
|
436
|
+
Viz: `matplotlib`, `seaborn`
|
|
437
|
+
|
|
438
|
+
CLI: `typer`, `rich`
|
|
439
|
+
|
|
440
|
+
Heavy dependencies (`squidpy`, `lightgbm`, `xgboost`, `lifelines`, `optuna`, `shap`, `imbalanced-learn`) are imported with `try/except` fallbacks — partial functionality is available even when these are not installed.
|
|
441
|
+
|
|
442
|
+
---
|
|
443
|
+
|
|
444
|
+
## Citation
|
|
445
|
+
|
|
446
|
+
If you use SpatialCheckpoint in your research, please cite:
|
|
447
|
+
|
|
448
|
+
```bibtex
|
|
449
|
+
@article{spatialcheckpoint2025,
|
|
450
|
+
title = {SpatialCheckpoint: Spatial heterogeneity profiling of immune checkpoints
|
|
451
|
+
in spatial transcriptomics},
|
|
452
|
+
author = {},
|
|
453
|
+
journal = {},
|
|
454
|
+
year = {2025},
|
|
455
|
+
}
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
---
|
|
459
|
+
|
|
460
|
+
## License
|
|
461
|
+
|
|
462
|
+
MIT License — see [LICENSE](LICENSE) for details.
|