spatialcore 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spatialcore/__init__.py +122 -0
- spatialcore/annotation/__init__.py +253 -0
- spatialcore/annotation/acquisition.py +529 -0
- spatialcore/annotation/annotate.py +603 -0
- spatialcore/annotation/cellxgene.py +365 -0
- spatialcore/annotation/confidence.py +802 -0
- spatialcore/annotation/discovery.py +529 -0
- spatialcore/annotation/expression.py +363 -0
- spatialcore/annotation/loading.py +529 -0
- spatialcore/annotation/markers.py +297 -0
- spatialcore/annotation/ontology.py +1282 -0
- spatialcore/annotation/patterns.py +247 -0
- spatialcore/annotation/pipeline.py +620 -0
- spatialcore/annotation/synapse.py +380 -0
- spatialcore/annotation/training.py +1457 -0
- spatialcore/annotation/validation.py +422 -0
- spatialcore/core/__init__.py +34 -0
- spatialcore/core/cache.py +118 -0
- spatialcore/core/logging.py +135 -0
- spatialcore/core/metadata.py +149 -0
- spatialcore/core/utils.py +768 -0
- spatialcore/data/gene_mappings/ensembl_to_hugo_human.tsv +86372 -0
- spatialcore/data/markers/canonical_markers.json +83 -0
- spatialcore/data/ontology_mappings/ontology_index.json +63865 -0
- spatialcore/plotting/__init__.py +109 -0
- spatialcore/plotting/benchmark.py +477 -0
- spatialcore/plotting/celltype.py +329 -0
- spatialcore/plotting/confidence.py +413 -0
- spatialcore/plotting/spatial.py +505 -0
- spatialcore/plotting/utils.py +411 -0
- spatialcore/plotting/validation.py +1342 -0
- spatialcore-0.1.9.dist-info/METADATA +213 -0
- spatialcore-0.1.9.dist-info/RECORD +36 -0
- spatialcore-0.1.9.dist-info/WHEEL +5 -0
- spatialcore-0.1.9.dist-info/licenses/LICENSE +201 -0
- spatialcore-0.1.9.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Canonical markers for cell type validation.
|
|
3
|
+
|
|
4
|
+
This module provides canonical marker gene definitions for common cell types.
|
|
5
|
+
These markers are used for validation of cell type annotations, typically
|
|
6
|
+
via GMM-3 thresholding (see spatialcore.stats.classify).
|
|
7
|
+
|
|
8
|
+
Marker genes are curated from literature and validated on spatial
|
|
9
|
+
transcriptomics platforms (Xenium, CosMx).
|
|
10
|
+
|
|
11
|
+
References:
|
|
12
|
+
- Domínguez Conde et al., Science (2022) - Immune cell markers
|
|
13
|
+
- Tabula Sapiens Consortium (2022) - Pan-tissue markers
|
|
14
|
+
- Human Cell Atlas marker databases
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Dict, List, Optional
|
|
19
|
+
import json
|
|
20
|
+
|
|
21
|
+
from spatialcore.core.logging import get_logger
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
# Default path for canonical markers (package data directory)
|
|
26
|
+
DEFAULT_MARKERS_PATH = Path(__file__).parent.parent / "data" / "markers" / "canonical_markers.json"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# ============================================================================
|
|
30
|
+
# Canonical Marker Definitions
|
|
31
|
+
# ============================================================================
|
|
32
|
+
# NOTE: All canonical markers are now defined in a single source of truth:
|
|
33
|
+
# src/spatialcore/data/markers/canonical_markers.json
|
|
34
|
+
#
|
|
35
|
+
# Use load_canonical_markers() to access them. The JSON file contains 75+
|
|
36
|
+
# cell types with curated marker genes from literature.
|
|
37
|
+
# ============================================================================
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ============================================================================
|
|
41
|
+
# Marker Loading and Lookup
|
|
42
|
+
# ============================================================================
|
|
43
|
+
|
|
44
|
+
def load_canonical_markers(
|
|
45
|
+
config_path: Optional[Path] = None,
|
|
46
|
+
) -> Dict[str, List[str]]:
|
|
47
|
+
"""
|
|
48
|
+
Load canonical markers from JSON config.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
config_path : Path, optional
|
|
53
|
+
Path to JSON file with custom marker definitions. If None,
|
|
54
|
+
loads from the default canonical_markers.json in the package
|
|
55
|
+
data directory.
|
|
56
|
+
|
|
57
|
+
JSON can be either:
|
|
58
|
+
- Simple format: ``{"cell_type": ["GENE1", "GENE2"]}``
|
|
59
|
+
- Extended format: ``{"cell_type": {"index_marker": ["GENE1"], "description": "..."}}``
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
Dict[str, List[str]]
|
|
64
|
+
Dictionary mapping cell type names to marker gene lists.
|
|
65
|
+
|
|
66
|
+
Raises
|
|
67
|
+
------
|
|
68
|
+
FileNotFoundError
|
|
69
|
+
If the markers JSON file does not exist.
|
|
70
|
+
|
|
71
|
+
Notes
|
|
72
|
+
-----
|
|
73
|
+
Cell type names should be in lowercase and match Cell Ontology (CL)
|
|
74
|
+
naming conventions where possible (e.g., "cd4-positive, alpha-beta t cell").
|
|
75
|
+
|
|
76
|
+
All canonical markers are defined in a single source of truth:
|
|
77
|
+
``src/spatialcore/data/markers/canonical_markers.json``
|
|
78
|
+
|
|
79
|
+
The function supports two JSON formats:
|
|
80
|
+
|
|
81
|
+
Simple format (list of genes):
|
|
82
|
+
|
|
83
|
+
.. code-block:: json
|
|
84
|
+
|
|
85
|
+
{
|
|
86
|
+
"my custom type": ["GENE1", "GENE2", "GENE3"],
|
|
87
|
+
"another type": ["GENE4", "GENE5"]
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
Extended format (with metadata):
|
|
91
|
+
|
|
92
|
+
.. code-block:: json
|
|
93
|
+
|
|
94
|
+
{
|
|
95
|
+
"my custom type": {
|
|
96
|
+
"index_marker": ["GENE1", "GENE2"],
|
|
97
|
+
"description": "Description text"
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
Examples
|
|
102
|
+
--------
|
|
103
|
+
>>> from spatialcore.annotation.markers import load_canonical_markers
|
|
104
|
+
>>> markers = load_canonical_markers()
|
|
105
|
+
>>> print(markers["macrophage"])
|
|
106
|
+
['CD163', 'CD68', 'MARCO', 'CSF1R', 'MERTK', 'C1QA', 'C1QB', 'C1QC', 'MRC1']
|
|
107
|
+
>>> # Load custom markers from a different file
|
|
108
|
+
>>> markers = load_canonical_markers(Path("custom_markers.json"))
|
|
109
|
+
"""
|
|
110
|
+
# Determine which file to load
|
|
111
|
+
markers_path = config_path if config_path is not None else DEFAULT_MARKERS_PATH
|
|
112
|
+
|
|
113
|
+
if not markers_path.exists():
|
|
114
|
+
raise FileNotFoundError(
|
|
115
|
+
f"Canonical markers file not found: {markers_path}. "
|
|
116
|
+
"Ensure the package data directory contains canonical_markers.json"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
markers = _load_markers_from_json(markers_path)
|
|
120
|
+
logger.debug(f"Loaded {len(markers)} markers from {markers_path}")
|
|
121
|
+
|
|
122
|
+
return markers
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _load_markers_from_json(path: Path) -> Dict[str, List[str]]:
|
|
126
|
+
"""
|
|
127
|
+
Load markers from a JSON file, supporting multiple formats.
|
|
128
|
+
|
|
129
|
+
Supported formats:
|
|
130
|
+
1. Simple: ``{"cell_type": ["GENE1", "GENE2"]}``
|
|
131
|
+
2. Extended: ``{"cell_type": {"index_marker": ["GENE1"], ...}}``
|
|
132
|
+
3. Wrapped: ``{"metadata": {...}, "markers": {"cell_type": [...]}}``
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
path : Path
|
|
137
|
+
Path to JSON file.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
Dict[str, List[str]]
|
|
142
|
+
Dictionary mapping cell type names to marker gene lists.
|
|
143
|
+
"""
|
|
144
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
145
|
+
data = json.load(f)
|
|
146
|
+
|
|
147
|
+
# Handle wrapped format with "markers" key
|
|
148
|
+
if "markers" in data and isinstance(data["markers"], dict):
|
|
149
|
+
data = data["markers"]
|
|
150
|
+
|
|
151
|
+
markers = {}
|
|
152
|
+
for key, value in data.items():
|
|
153
|
+
# Skip metadata keys
|
|
154
|
+
if key.startswith("_") or key == "metadata":
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
if isinstance(value, list):
|
|
158
|
+
# Simple format: {"cell_type": ["GENE1", "GENE2"]}
|
|
159
|
+
if not all(isinstance(g, str) for g in value):
|
|
160
|
+
raise ValueError(f"All marker genes must be strings for '{key}'")
|
|
161
|
+
markers[key] = value
|
|
162
|
+
elif isinstance(value, dict):
|
|
163
|
+
# Extended format: {"cell_type": {"index_marker": [...], "description": "..."}}
|
|
164
|
+
if "index_marker" in value:
|
|
165
|
+
gene_list = value["index_marker"]
|
|
166
|
+
if not isinstance(gene_list, list):
|
|
167
|
+
raise ValueError(
|
|
168
|
+
f"'index_marker' must be a list for '{key}'"
|
|
169
|
+
)
|
|
170
|
+
if not all(isinstance(g, str) for g in gene_list):
|
|
171
|
+
raise ValueError(
|
|
172
|
+
f"All marker genes must be strings for '{key}'"
|
|
173
|
+
)
|
|
174
|
+
markers[key] = gene_list
|
|
175
|
+
else:
|
|
176
|
+
logger.warning(
|
|
177
|
+
f"Skipping '{key}': dict format requires 'index_marker' key"
|
|
178
|
+
)
|
|
179
|
+
else:
|
|
180
|
+
raise ValueError(
|
|
181
|
+
f"Marker config values must be lists or dicts, got {type(value)} for '{key}'"
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
return markers
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def match_to_canonical(
|
|
188
|
+
cell_type: str,
|
|
189
|
+
markers: Optional[Dict[str, List[str]]] = None,
|
|
190
|
+
) -> Optional[str]:
|
|
191
|
+
"""
|
|
192
|
+
Match a cell type name to a canonical cell type in the markers dictionary.
|
|
193
|
+
|
|
194
|
+
Uses exact case-insensitive matching only. No fuzzy/substring matching.
|
|
195
|
+
|
|
196
|
+
Parameters
|
|
197
|
+
----------
|
|
198
|
+
cell_type : str
|
|
199
|
+
Cell type name to match.
|
|
200
|
+
markers : Dict[str, List[str]], optional
|
|
201
|
+
Marker dictionary. If None, loads from canonical_markers.json.
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
str or None
|
|
206
|
+
Canonical cell type name if matched, None otherwise.
|
|
207
|
+
|
|
208
|
+
Examples
|
|
209
|
+
--------
|
|
210
|
+
>>> from spatialcore.annotation.markers import match_to_canonical
|
|
211
|
+
>>> match_to_canonical("Macrophage")
|
|
212
|
+
'macrophage'
|
|
213
|
+
>>> match_to_canonical("B cell")
|
|
214
|
+
'b cell'
|
|
215
|
+
>>> match_to_canonical("some unknown type")
|
|
216
|
+
None
|
|
217
|
+
"""
|
|
218
|
+
if cell_type in ["Unassigned", "Unknown", "unknown", "cell", None, ""]:
|
|
219
|
+
return None
|
|
220
|
+
|
|
221
|
+
if markers is None:
|
|
222
|
+
markers = load_canonical_markers()
|
|
223
|
+
|
|
224
|
+
cell_type_lower = cell_type.lower().strip()
|
|
225
|
+
|
|
226
|
+
for canonical_name in markers.keys():
|
|
227
|
+
if canonical_name.lower().strip() == cell_type_lower:
|
|
228
|
+
return canonical_name
|
|
229
|
+
|
|
230
|
+
return None
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def get_markers_for_type(
|
|
234
|
+
cell_type: str,
|
|
235
|
+
markers: Optional[Dict[str, List[str]]] = None,
|
|
236
|
+
) -> List[str]:
|
|
237
|
+
"""
|
|
238
|
+
Get marker genes for a specific cell type.
|
|
239
|
+
|
|
240
|
+
Uses exact case-insensitive matching only.
|
|
241
|
+
|
|
242
|
+
Parameters
|
|
243
|
+
----------
|
|
244
|
+
cell_type : str
|
|
245
|
+
Cell type name to look up.
|
|
246
|
+
markers : Dict[str, List[str]], optional
|
|
247
|
+
Marker dictionary. If None, loads from canonical_markers.json.
|
|
248
|
+
|
|
249
|
+
Returns
|
|
250
|
+
-------
|
|
251
|
+
List[str]
|
|
252
|
+
Marker gene list. Empty list if no match found.
|
|
253
|
+
|
|
254
|
+
Examples
|
|
255
|
+
--------
|
|
256
|
+
>>> from spatialcore.annotation.markers import get_markers_for_type
|
|
257
|
+
>>> markers = get_markers_for_type("Macrophage")
|
|
258
|
+
>>> print(markers)
|
|
259
|
+
['CD68', 'CD163', 'CD14', 'FCGR3A', 'CSF1R']
|
|
260
|
+
"""
|
|
261
|
+
if markers is None:
|
|
262
|
+
markers = load_canonical_markers()
|
|
263
|
+
|
|
264
|
+
matched = match_to_canonical(cell_type, markers)
|
|
265
|
+
if matched is not None:
|
|
266
|
+
return markers[matched]
|
|
267
|
+
|
|
268
|
+
return []
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def list_available_cell_types(
|
|
272
|
+
markers: Optional[Dict[str, List[str]]] = None,
|
|
273
|
+
) -> List[str]:
|
|
274
|
+
"""
|
|
275
|
+
List all cell types with defined markers.
|
|
276
|
+
|
|
277
|
+
Parameters
|
|
278
|
+
----------
|
|
279
|
+
markers : Dict[str, List[str]], optional
|
|
280
|
+
Marker dictionary. If None, loads from canonical_markers.json.
|
|
281
|
+
|
|
282
|
+
Returns
|
|
283
|
+
-------
|
|
284
|
+
List[str]
|
|
285
|
+
Sorted list of cell type names.
|
|
286
|
+
|
|
287
|
+
Examples
|
|
288
|
+
--------
|
|
289
|
+
>>> from spatialcore.annotation.markers import list_available_cell_types
|
|
290
|
+
>>> types = list_available_cell_types()
|
|
291
|
+
>>> print(f"Available types: {len(types)}")
|
|
292
|
+
>>> print(types[:5])
|
|
293
|
+
"""
|
|
294
|
+
if markers is None:
|
|
295
|
+
markers = load_canonical_markers()
|
|
296
|
+
|
|
297
|
+
return sorted(markers.keys())
|