spatialcore 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. spatialcore/__init__.py +122 -0
  2. spatialcore/annotation/__init__.py +253 -0
  3. spatialcore/annotation/acquisition.py +529 -0
  4. spatialcore/annotation/annotate.py +603 -0
  5. spatialcore/annotation/cellxgene.py +365 -0
  6. spatialcore/annotation/confidence.py +802 -0
  7. spatialcore/annotation/discovery.py +529 -0
  8. spatialcore/annotation/expression.py +363 -0
  9. spatialcore/annotation/loading.py +529 -0
  10. spatialcore/annotation/markers.py +297 -0
  11. spatialcore/annotation/ontology.py +1282 -0
  12. spatialcore/annotation/patterns.py +247 -0
  13. spatialcore/annotation/pipeline.py +620 -0
  14. spatialcore/annotation/synapse.py +380 -0
  15. spatialcore/annotation/training.py +1457 -0
  16. spatialcore/annotation/validation.py +422 -0
  17. spatialcore/core/__init__.py +34 -0
  18. spatialcore/core/cache.py +118 -0
  19. spatialcore/core/logging.py +135 -0
  20. spatialcore/core/metadata.py +149 -0
  21. spatialcore/core/utils.py +768 -0
  22. spatialcore/data/gene_mappings/ensembl_to_hugo_human.tsv +86372 -0
  23. spatialcore/data/markers/canonical_markers.json +83 -0
  24. spatialcore/data/ontology_mappings/ontology_index.json +63865 -0
  25. spatialcore/plotting/__init__.py +109 -0
  26. spatialcore/plotting/benchmark.py +477 -0
  27. spatialcore/plotting/celltype.py +329 -0
  28. spatialcore/plotting/confidence.py +413 -0
  29. spatialcore/plotting/spatial.py +505 -0
  30. spatialcore/plotting/utils.py +411 -0
  31. spatialcore/plotting/validation.py +1342 -0
  32. spatialcore-0.1.9.dist-info/METADATA +213 -0
  33. spatialcore-0.1.9.dist-info/RECORD +36 -0
  34. spatialcore-0.1.9.dist-info/WHEEL +5 -0
  35. spatialcore-0.1.9.dist-info/licenses/LICENSE +201 -0
  36. spatialcore-0.1.9.dist-info/top_level.txt +1 -0
@@ -0,0 +1,297 @@
1
+ """
2
+ Canonical markers for cell type validation.
3
+
4
+ This module provides canonical marker gene definitions for common cell types.
5
+ These markers are used for validation of cell type annotations, typically
6
+ via GMM-3 thresholding (see spatialcore.stats.classify).
7
+
8
+ Marker genes are curated from literature and validated on spatial
9
+ transcriptomics platforms (Xenium, CosMx).
10
+
11
+ References:
12
+ - Domínguez Conde et al., Science (2022) - Immune cell markers
13
+ - Tabula Sapiens Consortium (2022) - Pan-tissue markers
14
+ - Human Cell Atlas marker databases
15
+ """
16
+
17
+ from pathlib import Path
18
+ from typing import Dict, List, Optional
19
+ import json
20
+
21
+ from spatialcore.core.logging import get_logger
22
+
23
+ logger = get_logger(__name__)
24
+
25
+ # Default path for canonical markers (package data directory)
26
+ DEFAULT_MARKERS_PATH = Path(__file__).parent.parent / "data" / "markers" / "canonical_markers.json"
27
+
28
+
29
+ # ============================================================================
30
+ # Canonical Marker Definitions
31
+ # ============================================================================
32
+ # NOTE: All canonical markers are now defined in a single source of truth:
33
+ # src/spatialcore/data/markers/canonical_markers.json
34
+ #
35
+ # Use load_canonical_markers() to access them. The JSON file contains 75+
36
+ # cell types with curated marker genes from literature.
37
+ # ============================================================================
38
+
39
+
40
+ # ============================================================================
41
+ # Marker Loading and Lookup
42
+ # ============================================================================
43
+
44
+ def load_canonical_markers(
45
+ config_path: Optional[Path] = None,
46
+ ) -> Dict[str, List[str]]:
47
+ """
48
+ Load canonical markers from JSON config.
49
+
50
+ Parameters
51
+ ----------
52
+ config_path : Path, optional
53
+ Path to JSON file with custom marker definitions. If None,
54
+ loads from the default canonical_markers.json in the package
55
+ data directory.
56
+
57
+ JSON can be either:
58
+ - Simple format: ``{"cell_type": ["GENE1", "GENE2"]}``
59
+ - Extended format: ``{"cell_type": {"index_marker": ["GENE1"], "description": "..."}}``
60
+
61
+ Returns
62
+ -------
63
+ Dict[str, List[str]]
64
+ Dictionary mapping cell type names to marker gene lists.
65
+
66
+ Raises
67
+ ------
68
+ FileNotFoundError
69
+ If the markers JSON file does not exist.
70
+
71
+ Notes
72
+ -----
73
+ Cell type names should be in lowercase and match Cell Ontology (CL)
74
+ naming conventions where possible (e.g., "cd4-positive, alpha-beta t cell").
75
+
76
+ All canonical markers are defined in a single source of truth:
77
+ ``src/spatialcore/data/markers/canonical_markers.json``
78
+
79
+ The function supports two JSON formats:
80
+
81
+ Simple format (list of genes):
82
+
83
+ .. code-block:: json
84
+
85
+ {
86
+ "my custom type": ["GENE1", "GENE2", "GENE3"],
87
+ "another type": ["GENE4", "GENE5"]
88
+ }
89
+
90
+ Extended format (with metadata):
91
+
92
+ .. code-block:: json
93
+
94
+ {
95
+ "my custom type": {
96
+ "index_marker": ["GENE1", "GENE2"],
97
+ "description": "Description text"
98
+ }
99
+ }
100
+
101
+ Examples
102
+ --------
103
+ >>> from spatialcore.annotation.markers import load_canonical_markers
104
+ >>> markers = load_canonical_markers()
105
+ >>> print(markers["macrophage"])
106
+ ['CD163', 'CD68', 'MARCO', 'CSF1R', 'MERTK', 'C1QA', 'C1QB', 'C1QC', 'MRC1']
107
+ >>> # Load custom markers from a different file
108
+ >>> markers = load_canonical_markers(Path("custom_markers.json"))
109
+ """
110
+ # Determine which file to load
111
+ markers_path = config_path if config_path is not None else DEFAULT_MARKERS_PATH
112
+
113
+ if not markers_path.exists():
114
+ raise FileNotFoundError(
115
+ f"Canonical markers file not found: {markers_path}. "
116
+ "Ensure the package data directory contains canonical_markers.json"
117
+ )
118
+
119
+ markers = _load_markers_from_json(markers_path)
120
+ logger.debug(f"Loaded {len(markers)} markers from {markers_path}")
121
+
122
+ return markers
123
+
124
+
125
+ def _load_markers_from_json(path: Path) -> Dict[str, List[str]]:
126
+ """
127
+ Load markers from a JSON file, supporting multiple formats.
128
+
129
+ Supported formats:
130
+ 1. Simple: ``{"cell_type": ["GENE1", "GENE2"]}``
131
+ 2. Extended: ``{"cell_type": {"index_marker": ["GENE1"], ...}}``
132
+ 3. Wrapped: ``{"metadata": {...}, "markers": {"cell_type": [...]}}``
133
+
134
+ Parameters
135
+ ----------
136
+ path : Path
137
+ Path to JSON file.
138
+
139
+ Returns
140
+ -------
141
+ Dict[str, List[str]]
142
+ Dictionary mapping cell type names to marker gene lists.
143
+ """
144
+ with open(path, "r", encoding="utf-8") as f:
145
+ data = json.load(f)
146
+
147
+ # Handle wrapped format with "markers" key
148
+ if "markers" in data and isinstance(data["markers"], dict):
149
+ data = data["markers"]
150
+
151
+ markers = {}
152
+ for key, value in data.items():
153
+ # Skip metadata keys
154
+ if key.startswith("_") or key == "metadata":
155
+ continue
156
+
157
+ if isinstance(value, list):
158
+ # Simple format: {"cell_type": ["GENE1", "GENE2"]}
159
+ if not all(isinstance(g, str) for g in value):
160
+ raise ValueError(f"All marker genes must be strings for '{key}'")
161
+ markers[key] = value
162
+ elif isinstance(value, dict):
163
+ # Extended format: {"cell_type": {"index_marker": [...], "description": "..."}}
164
+ if "index_marker" in value:
165
+ gene_list = value["index_marker"]
166
+ if not isinstance(gene_list, list):
167
+ raise ValueError(
168
+ f"'index_marker' must be a list for '{key}'"
169
+ )
170
+ if not all(isinstance(g, str) for g in gene_list):
171
+ raise ValueError(
172
+ f"All marker genes must be strings for '{key}'"
173
+ )
174
+ markers[key] = gene_list
175
+ else:
176
+ logger.warning(
177
+ f"Skipping '{key}': dict format requires 'index_marker' key"
178
+ )
179
+ else:
180
+ raise ValueError(
181
+ f"Marker config values must be lists or dicts, got {type(value)} for '{key}'"
182
+ )
183
+
184
+ return markers
185
+
186
+
187
+ def match_to_canonical(
188
+ cell_type: str,
189
+ markers: Optional[Dict[str, List[str]]] = None,
190
+ ) -> Optional[str]:
191
+ """
192
+ Match a cell type name to a canonical cell type in the markers dictionary.
193
+
194
+ Uses exact case-insensitive matching only. No fuzzy/substring matching.
195
+
196
+ Parameters
197
+ ----------
198
+ cell_type : str
199
+ Cell type name to match.
200
+ markers : Dict[str, List[str]], optional
201
+ Marker dictionary. If None, loads from canonical_markers.json.
202
+
203
+ Returns
204
+ -------
205
+ str or None
206
+ Canonical cell type name if matched, None otherwise.
207
+
208
+ Examples
209
+ --------
210
+ >>> from spatialcore.annotation.markers import match_to_canonical
211
+ >>> match_to_canonical("Macrophage")
212
+ 'macrophage'
213
+ >>> match_to_canonical("B cell")
214
+ 'b cell'
215
+ >>> match_to_canonical("some unknown type")
216
+ None
217
+ """
218
+ if cell_type in ["Unassigned", "Unknown", "unknown", "cell", None, ""]:
219
+ return None
220
+
221
+ if markers is None:
222
+ markers = load_canonical_markers()
223
+
224
+ cell_type_lower = cell_type.lower().strip()
225
+
226
+ for canonical_name in markers.keys():
227
+ if canonical_name.lower().strip() == cell_type_lower:
228
+ return canonical_name
229
+
230
+ return None
231
+
232
+
233
+ def get_markers_for_type(
234
+ cell_type: str,
235
+ markers: Optional[Dict[str, List[str]]] = None,
236
+ ) -> List[str]:
237
+ """
238
+ Get marker genes for a specific cell type.
239
+
240
+ Uses exact case-insensitive matching only.
241
+
242
+ Parameters
243
+ ----------
244
+ cell_type : str
245
+ Cell type name to look up.
246
+ markers : Dict[str, List[str]], optional
247
+ Marker dictionary. If None, loads from canonical_markers.json.
248
+
249
+ Returns
250
+ -------
251
+ List[str]
252
+ Marker gene list. Empty list if no match found.
253
+
254
+ Examples
255
+ --------
256
+ >>> from spatialcore.annotation.markers import get_markers_for_type
257
+ >>> markers = get_markers_for_type("Macrophage")
258
+ >>> print(markers)
259
+ ['CD68', 'CD163', 'CD14', 'FCGR3A', 'CSF1R']
260
+ """
261
+ if markers is None:
262
+ markers = load_canonical_markers()
263
+
264
+ matched = match_to_canonical(cell_type, markers)
265
+ if matched is not None:
266
+ return markers[matched]
267
+
268
+ return []
269
+
270
+
271
+ def list_available_cell_types(
272
+ markers: Optional[Dict[str, List[str]]] = None,
273
+ ) -> List[str]:
274
+ """
275
+ List all cell types with defined markers.
276
+
277
+ Parameters
278
+ ----------
279
+ markers : Dict[str, List[str]], optional
280
+ Marker dictionary. If None, loads from canonical_markers.json.
281
+
282
+ Returns
283
+ -------
284
+ List[str]
285
+ Sorted list of cell type names.
286
+
287
+ Examples
288
+ --------
289
+ >>> from spatialcore.annotation.markers import list_available_cell_types
290
+ >>> types = list_available_cell_types()
291
+ >>> print(f"Available types: {len(types)}")
292
+ >>> print(types[:5])
293
+ """
294
+ if markers is None:
295
+ markers = load_canonical_markers()
296
+
297
+ return sorted(markers.keys())