spatialcore 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. spatialcore/__init__.py +122 -0
  2. spatialcore/annotation/__init__.py +253 -0
  3. spatialcore/annotation/acquisition.py +529 -0
  4. spatialcore/annotation/annotate.py +603 -0
  5. spatialcore/annotation/cellxgene.py +365 -0
  6. spatialcore/annotation/confidence.py +802 -0
  7. spatialcore/annotation/discovery.py +529 -0
  8. spatialcore/annotation/expression.py +363 -0
  9. spatialcore/annotation/loading.py +529 -0
  10. spatialcore/annotation/markers.py +297 -0
  11. spatialcore/annotation/ontology.py +1282 -0
  12. spatialcore/annotation/patterns.py +247 -0
  13. spatialcore/annotation/pipeline.py +620 -0
  14. spatialcore/annotation/synapse.py +380 -0
  15. spatialcore/annotation/training.py +1457 -0
  16. spatialcore/annotation/validation.py +422 -0
  17. spatialcore/core/__init__.py +34 -0
  18. spatialcore/core/cache.py +118 -0
  19. spatialcore/core/logging.py +135 -0
  20. spatialcore/core/metadata.py +149 -0
  21. spatialcore/core/utils.py +768 -0
  22. spatialcore/data/gene_mappings/ensembl_to_hugo_human.tsv +86372 -0
  23. spatialcore/data/markers/canonical_markers.json +83 -0
  24. spatialcore/data/ontology_mappings/ontology_index.json +63865 -0
  25. spatialcore/plotting/__init__.py +109 -0
  26. spatialcore/plotting/benchmark.py +477 -0
  27. spatialcore/plotting/celltype.py +329 -0
  28. spatialcore/plotting/confidence.py +413 -0
  29. spatialcore/plotting/spatial.py +505 -0
  30. spatialcore/plotting/utils.py +411 -0
  31. spatialcore/plotting/validation.py +1342 -0
  32. spatialcore-0.1.9.dist-info/METADATA +213 -0
  33. spatialcore-0.1.9.dist-info/RECORD +36 -0
  34. spatialcore-0.1.9.dist-info/WHEEL +5 -0
  35. spatialcore-0.1.9.dist-info/licenses/LICENSE +201 -0
  36. spatialcore-0.1.9.dist-info/top_level.txt +1 -0
@@ -0,0 +1,247 @@
1
+ """
2
+ Cell type pattern definitions for ontology mapping.
3
+
4
+ These patterns canonicalize common cell type label variations to their
5
+ Cell Ontology (CL) standard names before fuzzy matching.
6
+
7
+ The patterns are applied in Tier 0 (pattern canonicalization) before
8
+ exact or fuzzy matching is attempted.
9
+
10
+ Pattern format: regex pattern -> canonical CL term name
11
+ """
12
+
13
+ # Cell type patterns: regex -> canonical Cell Ontology term
14
+ # Patterns are checked in order; first match wins
15
+ CELL_TYPE_PATTERNS = {
16
+ # =========================================================================
17
+ # LYMPHOID LINEAGE
18
+ # =========================================================================
19
+
20
+ # T cell CD markers
21
+ r"t\s*cells?,?\s*cd4\+?|cd4\+?\s*t|cd4\s*positive": "cd4-positive, alpha-beta t cell",
22
+ r"t\s*cells?,?\s*cd8\+?|cd8\+?\s*t|cd8\s*positive": "cd8-positive, alpha-beta t cell",
23
+
24
+ # T cell subtypes
25
+ r"t.*helper.*17|th17": "t-helper 17 cell",
26
+ r"t.*helper.*1\b|th1\b": "t-helper 1 cell",
27
+ r"t.*helper.*2\b|th2\b": "t-helper 2 cell",
28
+ r"regulatory.*t|t.*regulatory|treg": "regulatory t cell",
29
+ r"gamma.*delta.*t|gammadelta.*t|gdt|gdT": "gamma-delta t cell",
30
+ r"mait|mucosal.*invariant": "mucosal invariant t cell",
31
+ r"nkt|natural.*killer.*t|inkt": "invariant natural killer t-cell",
32
+ r"cytotoxic.*t|ctl": "cytotoxic t cell",
33
+ r"memory.*t.*cd4|cd4.*memory": "cd4-positive, alpha-beta memory t cell",
34
+ r"memory.*t.*cd8|cd8.*memory": "cd8-positive, alpha-beta memory t cell",
35
+ r"naive.*t.*cd4|cd4.*naive": "naive thymus-derived cd4-positive, alpha-beta t cell",
36
+ r"naive.*t.*cd8|cd8.*naive": "naive thymus-derived cd8-positive, alpha-beta t cell",
37
+ r"effector.*memory|\btem\b": "effector memory t cell",
38
+ r"central.*memory|\btcm\b": "central memory t cell",
39
+ r"exhausted.*t": "exhausted t cell",
40
+ r"^t\s*cell|^t\s+cells": "t cell",
41
+
42
+ # B cells
43
+ r"cd19.*cd20.*b\b|cd20.*cd19.*b\b": "b cell",
44
+ r"cd19.*b\b": "b cell",
45
+ r"cd20.*b\b": "b cell",
46
+ r"germinal.*center.*b|gc.*b\s*cell": "germinal center b cell",
47
+ r"memory.*b": "memory b cell",
48
+ r"naive.*b": "naive b cell",
49
+ r"plasma.*blast|plasmablast": "plasmablast",
50
+ r"^b\s*cell|^b\s+cells?$": "b cell",
51
+
52
+ # Plasma cells with immunoglobulin types
53
+ r"iga\+?\s*plasma": "iga plasmacyte",
54
+ r"igg\+?\s*plasma": "igg plasmacyte",
55
+ r"igm\+?\s*plasma": "igm plasmacyte",
56
+ r"^plasma\s*cell|^plasma\s*$": "plasma cell",
57
+
58
+ # NK cells
59
+ r"\bnk\s*cell|\bnatural\s*killer": "natural killer cell",
60
+ r"cd56.*bright|bright.*nk": "cd56-bright natural killer cell",
61
+ r"cd56.*dim|dim.*nk": "cd56-dim natural killer cell",
62
+
63
+ # Innate lymphoid cells
64
+ r"ilc1|innate.*lymphoid.*1|group.*1.*ilc": "group 1 innate lymphoid cell",
65
+ r"ilc2|innate.*lymphoid.*2|group.*2.*ilc": "group 2 innate lymphoid cell",
66
+ r"ilc3|innate.*lymphoid.*3|group.*3.*ilc": "group 3 innate lymphoid cell",
67
+ r"innate.*lymphoid|^ilc\b": "innate lymphoid cell",
68
+
69
+ # =========================================================================
70
+ # MYELOID LINEAGE
71
+ # =========================================================================
72
+
73
+ # General myeloid (at top so specific types override)
74
+ r"^myeloid\b|myeloid\s*cell": "myeloid cell",
75
+
76
+ # Monocytes (specific patterns BEFORE general ones)
77
+ r"non.*classical.*mono": "non-classical monocyte", # MUST be before classical
78
+ r"classical.*mono": "classical monocyte",
79
+ r"intermediate.*mono": "intermediate monocyte",
80
+ r"monocyte": "monocyte",
81
+
82
+ # Macrophages
83
+ r"m1.*macrophage|macrophage.*m1": "inflammatory macrophage",
84
+ r"m2.*macrophage|macrophage.*m2": "alternatively activated macrophage",
85
+ r"alveolar.*macrophage|alveolar.*\bmph\b": "alveolar macrophage",
86
+ r"kupffer": "kupffer cell",
87
+ r"tissue.*resident.*macro": "tissue-resident macrophage",
88
+ r"macrophages?|\bmph\b": "macrophage",
89
+
90
+ # Dendritic cells
91
+ r"^pdc\b|plasmacytoid\s*dc|plasmacytoid\s*dendritic": "plasmacytoid dendritic cell",
92
+ r"cdc1|conventional.*dc.*1|myeloid.*dc.*1": "conventional dendritic cell type 1",
93
+ r"cdc2|conventional.*dc.*2|myeloid.*dc.*2": "conventional dendritic cell type 2",
94
+ r"migratory.*dc|migratory.*dendritic": "migratory dendritic cell",
95
+ r"langerhans": "langerhans cell",
96
+ r"dendritic\s*cells?|\bdc[s\d]?\b": "dendritic cell",
97
+
98
+ # Granulocytes
99
+ r"neutrophils?": "neutrophil",
100
+ r"basophils?": "basophil",
101
+ r"eosinophils?": "eosinophil",
102
+ r"mast\s*cell": "mast cell",
103
+
104
+ # =========================================================================
105
+ # STROMAL CELLS
106
+ # =========================================================================
107
+
108
+ r"myofibroblast": "myofibroblast cell",
109
+ r"cancer.*associated.*fibro|caf": "cancer associated fibroblast",
110
+ r"fibroblasts?|reticular\s+fibroblast": "fibroblast",
111
+ r"smooth\s*muscle": "smooth muscle cell",
112
+ r"pericyte": "pericyte",
113
+ r"mesenchymal.*stem|^msc\b": "mesenchymal stem cell",
114
+ r"stromal": "stromal cell",
115
+
116
+ # =========================================================================
117
+ # ENDOTHELIAL CELLS
118
+ # =========================================================================
119
+
120
+ r"lymphatic.*ec|lymphatic.*endothel|lec\b": "lymphatic endothelial cell",
121
+ r"arterial.*ec|arterial.*endothel": "arterial endothelial cell",
122
+ r"venous.*ec|venous.*endothel": "venous endothelial cell",
123
+ r"capillary.*ec|capillary.*endothel": "capillary endothelial cell",
124
+ r"tip.*ec|tip.*endothel": "tip cell",
125
+ r"stalk.*ec|stalk.*endothel": "stalk cell",
126
+ r"endotheli|^ve\b|^ec\b|ecs\b": "endothelial cell",
127
+
128
+ # =========================================================================
129
+ # EPITHELIAL CELLS
130
+ # =========================================================================
131
+
132
+ # Intestinal/Colon (specific patterns BEFORE general ones)
133
+ r"enteroendocrine|ee\s*cell": "enteroendocrine cell", # MUST be before enterocyte
134
+ r"enterocytes?": "enterocyte", # No longer matches "entero" prefix
135
+ r"colonocytes?": "colon glandular cell", # No longer matches "colono" prefix
136
+ r"goblet": "goblet cell",
137
+ r"paneth": "paneth cell",
138
+ r"tuft|brush": "tuft cell",
139
+ r"transit.*amplifying|ta\s+cell": "transit amplifying cell of colon",
140
+ r"stem.*cell.*intestin|intestin.*stem|lgr5": "intestinal crypt stem cell",
141
+
142
+ # Lung/Airway
143
+ r"ciliated": "ciliated epithelial cell",
144
+ r"\bclub\b|clara": "club cell",
145
+ r"alveolar.*type.*1|at1|pneumocyte.*type.*1": "type i pneumocyte",
146
+ r"alveolar.*type.*2|at2|pneumocyte.*type.*2": "type ii pneumocyte",
147
+ r"basal.*epithelial|basal\s*cell": "basal cell",
148
+ r"secretory.*epitheli": "secretory cell",
149
+
150
+ # Liver
151
+ r"hepatocyte": "hepatocyte",
152
+ r"cholangiocyte|bile.*duct.*epitheli": "cholangiocyte",
153
+ r"hepatic.*stellate|stellate.*cell": "hepatic stellate cell",
154
+
155
+ # Skin
156
+ r"keratinocyte": "keratinocyte",
157
+ r"melanocyte": "melanocyte",
158
+
159
+ # General epithelial
160
+ r"squamous": "squamous epithelial cell",
161
+ r"columnar": "columnar cell",
162
+ r"epitheli": "epithelial cell",
163
+
164
+ # =========================================================================
165
+ # NEURAL / GLIAL
166
+ # =========================================================================
167
+
168
+ r"astrocytes?": "astrocyte",
169
+ r"oligodendrocyte": "oligodendrocyte",
170
+ r"microglia": "microglial cell",
171
+ r"schwann": "schwann cell",
172
+ r"glia": "glial cell",
173
+ r"neuron|neural\s*cell": "neuron",
174
+
175
+ # =========================================================================
176
+ # STEM / PROGENITOR
177
+ # =========================================================================
178
+
179
+ r"^hsc\b|hematopoietic.*stem": "hematopoietic stem cell",
180
+ r"^msc\b|mesenchymal.*stem": "mesenchymal stem cell",
181
+ r"^cmp\b|common.*myeloid.*prog": "common myeloid progenitor",
182
+ r"^gmp\b|granulocyte.*monocyte.*prog": "granulocyte monocyte progenitor",
183
+ r"^mep\b|megakaryocyte.*erythrocyte.*prog": "megakaryocyte-erythroid progenitor cell",
184
+ r"progenitor|precursor": "progenitor cell",
185
+ r"stem": "stem cell",
186
+
187
+ # =========================================================================
188
+ # OTHER
189
+ # =========================================================================
190
+
191
+ r"adipocyte|adipose": "adipocyte",
192
+ r"platelets?|thrombocyte": "platelet",
193
+ r"^rbc\b|red\s*blood\s*cell|erythrocyte": "erythrocyte",
194
+ r"megakaryocyte": "megakaryocyte",
195
+
196
+ # =========================================================================
197
+ # NEOPLASTIC / TUMOR CELLS
198
+ # =========================================================================
199
+ # Map to "malignant cell" (CL:0001064) - the CL term for tumor/cancer cells
200
+ # This matches CellxGene's convention for tumor cell annotations
201
+
202
+ r"tumor\s*cell|tumour\s*cell": "malignant cell",
203
+ r"cancer\s*cell": "malignant cell",
204
+ r"malignant\s*cell|malignant": "malignant cell",
205
+ r"neoplastic\s*cell|neoplastic": "malignant cell",
206
+ r"carcinoma\s*cell|carcinoma": "malignant cell",
207
+ # Common tumor type abbreviations (lung, breast, etc.)
208
+ r"\bluad\b|\blusc\b|\bnsclc\b": "malignant cell", # Lung cancer types
209
+ r"\bbrca\b": "malignant cell", # Breast cancer
210
+ r"\bhcc\b": "malignant cell", # Hepatocellular carcinoma
211
+ r"\bcrc\b": "malignant cell", # Colorectal cancer
212
+ r"tumor|tumour|cancer": "malignant cell", # Catch-all for remaining
213
+ }
214
+
215
+
216
+ def get_canonical_term(label: str) -> str | None:
217
+ """
218
+ Get canonical Cell Ontology term for a cell type label.
219
+
220
+ Parameters
221
+ ----------
222
+ label : str
223
+ Cell type label to canonicalize.
224
+
225
+ Returns
226
+ -------
227
+ str or None
228
+ Canonical CL term name, or None if no pattern matches.
229
+
230
+ Examples
231
+ --------
232
+ >>> get_canonical_term("CD4+ T cells")
233
+ 'cd4-positive, alpha-beta t cell'
234
+ >>> get_canonical_term("NK cells")
235
+ 'natural killer cell'
236
+ >>> get_canonical_term("Unknown")
237
+ None
238
+ """
239
+ import re
240
+
241
+ label_lower = label.lower().strip()
242
+
243
+ for pattern, canonical_term in CELL_TYPE_PATTERNS.items():
244
+ if re.search(pattern, label_lower):
245
+ return canonical_term
246
+
247
+ return None