spatialcore 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spatialcore/__init__.py +122 -0
- spatialcore/annotation/__init__.py +253 -0
- spatialcore/annotation/acquisition.py +529 -0
- spatialcore/annotation/annotate.py +603 -0
- spatialcore/annotation/cellxgene.py +365 -0
- spatialcore/annotation/confidence.py +802 -0
- spatialcore/annotation/discovery.py +529 -0
- spatialcore/annotation/expression.py +363 -0
- spatialcore/annotation/loading.py +529 -0
- spatialcore/annotation/markers.py +297 -0
- spatialcore/annotation/ontology.py +1282 -0
- spatialcore/annotation/patterns.py +247 -0
- spatialcore/annotation/pipeline.py +620 -0
- spatialcore/annotation/synapse.py +380 -0
- spatialcore/annotation/training.py +1457 -0
- spatialcore/annotation/validation.py +422 -0
- spatialcore/core/__init__.py +34 -0
- spatialcore/core/cache.py +118 -0
- spatialcore/core/logging.py +135 -0
- spatialcore/core/metadata.py +149 -0
- spatialcore/core/utils.py +768 -0
- spatialcore/data/gene_mappings/ensembl_to_hugo_human.tsv +86372 -0
- spatialcore/data/markers/canonical_markers.json +83 -0
- spatialcore/data/ontology_mappings/ontology_index.json +63865 -0
- spatialcore/plotting/__init__.py +109 -0
- spatialcore/plotting/benchmark.py +477 -0
- spatialcore/plotting/celltype.py +329 -0
- spatialcore/plotting/confidence.py +413 -0
- spatialcore/plotting/spatial.py +505 -0
- spatialcore/plotting/utils.py +411 -0
- spatialcore/plotting/validation.py +1342 -0
- spatialcore-0.1.9.dist-info/METADATA +213 -0
- spatialcore-0.1.9.dist-info/RECORD +36 -0
- spatialcore-0.1.9.dist-info/WHEEL +5 -0
- spatialcore-0.1.9.dist-info/licenses/LICENSE +201 -0
- spatialcore-0.1.9.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cell type pattern definitions for ontology mapping.
|
|
3
|
+
|
|
4
|
+
These patterns canonicalize common cell type label variations to their
|
|
5
|
+
Cell Ontology (CL) standard names before fuzzy matching.
|
|
6
|
+
|
|
7
|
+
The patterns are applied in Tier 0 (pattern canonicalization) before
|
|
8
|
+
exact or fuzzy matching is attempted.
|
|
9
|
+
|
|
10
|
+
Pattern format: regex pattern -> canonical CL term name
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
# Cell type patterns: regex -> canonical Cell Ontology term
|
|
14
|
+
# Patterns are checked in order; first match wins
|
|
15
|
+
CELL_TYPE_PATTERNS = {
|
|
16
|
+
# =========================================================================
|
|
17
|
+
# LYMPHOID LINEAGE
|
|
18
|
+
# =========================================================================
|
|
19
|
+
|
|
20
|
+
# T cell CD markers
|
|
21
|
+
r"t\s*cells?,?\s*cd4\+?|cd4\+?\s*t|cd4\s*positive": "cd4-positive, alpha-beta t cell",
|
|
22
|
+
r"t\s*cells?,?\s*cd8\+?|cd8\+?\s*t|cd8\s*positive": "cd8-positive, alpha-beta t cell",
|
|
23
|
+
|
|
24
|
+
# T cell subtypes
|
|
25
|
+
r"t.*helper.*17|th17": "t-helper 17 cell",
|
|
26
|
+
r"t.*helper.*1\b|th1\b": "t-helper 1 cell",
|
|
27
|
+
r"t.*helper.*2\b|th2\b": "t-helper 2 cell",
|
|
28
|
+
r"regulatory.*t|t.*regulatory|treg": "regulatory t cell",
|
|
29
|
+
r"gamma.*delta.*t|gammadelta.*t|gdt|gdT": "gamma-delta t cell",
|
|
30
|
+
r"mait|mucosal.*invariant": "mucosal invariant t cell",
|
|
31
|
+
r"nkt|natural.*killer.*t|inkt": "invariant natural killer t-cell",
|
|
32
|
+
r"cytotoxic.*t|ctl": "cytotoxic t cell",
|
|
33
|
+
r"memory.*t.*cd4|cd4.*memory": "cd4-positive, alpha-beta memory t cell",
|
|
34
|
+
r"memory.*t.*cd8|cd8.*memory": "cd8-positive, alpha-beta memory t cell",
|
|
35
|
+
r"naive.*t.*cd4|cd4.*naive": "naive thymus-derived cd4-positive, alpha-beta t cell",
|
|
36
|
+
r"naive.*t.*cd8|cd8.*naive": "naive thymus-derived cd8-positive, alpha-beta t cell",
|
|
37
|
+
r"effector.*memory|\btem\b": "effector memory t cell",
|
|
38
|
+
r"central.*memory|\btcm\b": "central memory t cell",
|
|
39
|
+
r"exhausted.*t": "exhausted t cell",
|
|
40
|
+
r"^t\s*cell|^t\s+cells": "t cell",
|
|
41
|
+
|
|
42
|
+
# B cells
|
|
43
|
+
r"cd19.*cd20.*b\b|cd20.*cd19.*b\b": "b cell",
|
|
44
|
+
r"cd19.*b\b": "b cell",
|
|
45
|
+
r"cd20.*b\b": "b cell",
|
|
46
|
+
r"germinal.*center.*b|gc.*b\s*cell": "germinal center b cell",
|
|
47
|
+
r"memory.*b": "memory b cell",
|
|
48
|
+
r"naive.*b": "naive b cell",
|
|
49
|
+
r"plasma.*blast|plasmablast": "plasmablast",
|
|
50
|
+
r"^b\s*cell|^b\s+cells?$": "b cell",
|
|
51
|
+
|
|
52
|
+
# Plasma cells with immunoglobulin types
|
|
53
|
+
r"iga\+?\s*plasma": "iga plasmacyte",
|
|
54
|
+
r"igg\+?\s*plasma": "igg plasmacyte",
|
|
55
|
+
r"igm\+?\s*plasma": "igm plasmacyte",
|
|
56
|
+
r"^plasma\s*cell|^plasma\s*$": "plasma cell",
|
|
57
|
+
|
|
58
|
+
# NK cells
|
|
59
|
+
r"\bnk\s*cell|\bnatural\s*killer": "natural killer cell",
|
|
60
|
+
r"cd56.*bright|bright.*nk": "cd56-bright natural killer cell",
|
|
61
|
+
r"cd56.*dim|dim.*nk": "cd56-dim natural killer cell",
|
|
62
|
+
|
|
63
|
+
# Innate lymphoid cells
|
|
64
|
+
r"ilc1|innate.*lymphoid.*1|group.*1.*ilc": "group 1 innate lymphoid cell",
|
|
65
|
+
r"ilc2|innate.*lymphoid.*2|group.*2.*ilc": "group 2 innate lymphoid cell",
|
|
66
|
+
r"ilc3|innate.*lymphoid.*3|group.*3.*ilc": "group 3 innate lymphoid cell",
|
|
67
|
+
r"innate.*lymphoid|^ilc\b": "innate lymphoid cell",
|
|
68
|
+
|
|
69
|
+
# =========================================================================
|
|
70
|
+
# MYELOID LINEAGE
|
|
71
|
+
# =========================================================================
|
|
72
|
+
|
|
73
|
+
# General myeloid (at top so specific types override)
|
|
74
|
+
r"^myeloid\b|myeloid\s*cell": "myeloid cell",
|
|
75
|
+
|
|
76
|
+
# Monocytes (specific patterns BEFORE general ones)
|
|
77
|
+
r"non.*classical.*mono": "non-classical monocyte", # MUST be before classical
|
|
78
|
+
r"classical.*mono": "classical monocyte",
|
|
79
|
+
r"intermediate.*mono": "intermediate monocyte",
|
|
80
|
+
r"monocyte": "monocyte",
|
|
81
|
+
|
|
82
|
+
# Macrophages
|
|
83
|
+
r"m1.*macrophage|macrophage.*m1": "inflammatory macrophage",
|
|
84
|
+
r"m2.*macrophage|macrophage.*m2": "alternatively activated macrophage",
|
|
85
|
+
r"alveolar.*macrophage|alveolar.*\bmph\b": "alveolar macrophage",
|
|
86
|
+
r"kupffer": "kupffer cell",
|
|
87
|
+
r"tissue.*resident.*macro": "tissue-resident macrophage",
|
|
88
|
+
r"macrophages?|\bmph\b": "macrophage",
|
|
89
|
+
|
|
90
|
+
# Dendritic cells
|
|
91
|
+
r"^pdc\b|plasmacytoid\s*dc|plasmacytoid\s*dendritic": "plasmacytoid dendritic cell",
|
|
92
|
+
r"cdc1|conventional.*dc.*1|myeloid.*dc.*1": "conventional dendritic cell type 1",
|
|
93
|
+
r"cdc2|conventional.*dc.*2|myeloid.*dc.*2": "conventional dendritic cell type 2",
|
|
94
|
+
r"migratory.*dc|migratory.*dendritic": "migratory dendritic cell",
|
|
95
|
+
r"langerhans": "langerhans cell",
|
|
96
|
+
r"dendritic\s*cells?|\bdc[s\d]?\b": "dendritic cell",
|
|
97
|
+
|
|
98
|
+
# Granulocytes
|
|
99
|
+
r"neutrophils?": "neutrophil",
|
|
100
|
+
r"basophils?": "basophil",
|
|
101
|
+
r"eosinophils?": "eosinophil",
|
|
102
|
+
r"mast\s*cell": "mast cell",
|
|
103
|
+
|
|
104
|
+
# =========================================================================
|
|
105
|
+
# STROMAL CELLS
|
|
106
|
+
# =========================================================================
|
|
107
|
+
|
|
108
|
+
r"myofibroblast": "myofibroblast cell",
|
|
109
|
+
r"cancer.*associated.*fibro|caf": "cancer associated fibroblast",
|
|
110
|
+
r"fibroblasts?|reticular\s+fibroblast": "fibroblast",
|
|
111
|
+
r"smooth\s*muscle": "smooth muscle cell",
|
|
112
|
+
r"pericyte": "pericyte",
|
|
113
|
+
r"mesenchymal.*stem|^msc\b": "mesenchymal stem cell",
|
|
114
|
+
r"stromal": "stromal cell",
|
|
115
|
+
|
|
116
|
+
# =========================================================================
|
|
117
|
+
# ENDOTHELIAL CELLS
|
|
118
|
+
# =========================================================================
|
|
119
|
+
|
|
120
|
+
r"lymphatic.*ec|lymphatic.*endothel|lec\b": "lymphatic endothelial cell",
|
|
121
|
+
r"arterial.*ec|arterial.*endothel": "arterial endothelial cell",
|
|
122
|
+
r"venous.*ec|venous.*endothel": "venous endothelial cell",
|
|
123
|
+
r"capillary.*ec|capillary.*endothel": "capillary endothelial cell",
|
|
124
|
+
r"tip.*ec|tip.*endothel": "tip cell",
|
|
125
|
+
r"stalk.*ec|stalk.*endothel": "stalk cell",
|
|
126
|
+
r"endotheli|^ve\b|^ec\b|ecs\b": "endothelial cell",
|
|
127
|
+
|
|
128
|
+
# =========================================================================
|
|
129
|
+
# EPITHELIAL CELLS
|
|
130
|
+
# =========================================================================
|
|
131
|
+
|
|
132
|
+
# Intestinal/Colon (specific patterns BEFORE general ones)
|
|
133
|
+
r"enteroendocrine|ee\s*cell": "enteroendocrine cell", # MUST be before enterocyte
|
|
134
|
+
r"enterocytes?": "enterocyte", # No longer matches "entero" prefix
|
|
135
|
+
r"colonocytes?": "colon glandular cell", # No longer matches "colono" prefix
|
|
136
|
+
r"goblet": "goblet cell",
|
|
137
|
+
r"paneth": "paneth cell",
|
|
138
|
+
r"tuft|brush": "tuft cell",
|
|
139
|
+
r"transit.*amplifying|ta\s+cell": "transit amplifying cell of colon",
|
|
140
|
+
r"stem.*cell.*intestin|intestin.*stem|lgr5": "intestinal crypt stem cell",
|
|
141
|
+
|
|
142
|
+
# Lung/Airway
|
|
143
|
+
r"ciliated": "ciliated epithelial cell",
|
|
144
|
+
r"\bclub\b|clara": "club cell",
|
|
145
|
+
r"alveolar.*type.*1|at1|pneumocyte.*type.*1": "type i pneumocyte",
|
|
146
|
+
r"alveolar.*type.*2|at2|pneumocyte.*type.*2": "type ii pneumocyte",
|
|
147
|
+
r"basal.*epithelial|basal\s*cell": "basal cell",
|
|
148
|
+
r"secretory.*epitheli": "secretory cell",
|
|
149
|
+
|
|
150
|
+
# Liver
|
|
151
|
+
r"hepatocyte": "hepatocyte",
|
|
152
|
+
r"cholangiocyte|bile.*duct.*epitheli": "cholangiocyte",
|
|
153
|
+
r"hepatic.*stellate|stellate.*cell": "hepatic stellate cell",
|
|
154
|
+
|
|
155
|
+
# Skin
|
|
156
|
+
r"keratinocyte": "keratinocyte",
|
|
157
|
+
r"melanocyte": "melanocyte",
|
|
158
|
+
|
|
159
|
+
# General epithelial
|
|
160
|
+
r"squamous": "squamous epithelial cell",
|
|
161
|
+
r"columnar": "columnar cell",
|
|
162
|
+
r"epitheli": "epithelial cell",
|
|
163
|
+
|
|
164
|
+
# =========================================================================
|
|
165
|
+
# NEURAL / GLIAL
|
|
166
|
+
# =========================================================================
|
|
167
|
+
|
|
168
|
+
r"astrocytes?": "astrocyte",
|
|
169
|
+
r"oligodendrocyte": "oligodendrocyte",
|
|
170
|
+
r"microglia": "microglial cell",
|
|
171
|
+
r"schwann": "schwann cell",
|
|
172
|
+
r"glia": "glial cell",
|
|
173
|
+
r"neuron|neural\s*cell": "neuron",
|
|
174
|
+
|
|
175
|
+
# =========================================================================
|
|
176
|
+
# STEM / PROGENITOR
|
|
177
|
+
# =========================================================================
|
|
178
|
+
|
|
179
|
+
r"^hsc\b|hematopoietic.*stem": "hematopoietic stem cell",
|
|
180
|
+
r"^msc\b|mesenchymal.*stem": "mesenchymal stem cell",
|
|
181
|
+
r"^cmp\b|common.*myeloid.*prog": "common myeloid progenitor",
|
|
182
|
+
r"^gmp\b|granulocyte.*monocyte.*prog": "granulocyte monocyte progenitor",
|
|
183
|
+
r"^mep\b|megakaryocyte.*erythrocyte.*prog": "megakaryocyte-erythroid progenitor cell",
|
|
184
|
+
r"progenitor|precursor": "progenitor cell",
|
|
185
|
+
r"stem": "stem cell",
|
|
186
|
+
|
|
187
|
+
# =========================================================================
|
|
188
|
+
# OTHER
|
|
189
|
+
# =========================================================================
|
|
190
|
+
|
|
191
|
+
r"adipocyte|adipose": "adipocyte",
|
|
192
|
+
r"platelets?|thrombocyte": "platelet",
|
|
193
|
+
r"^rbc\b|red\s*blood\s*cell|erythrocyte": "erythrocyte",
|
|
194
|
+
r"megakaryocyte": "megakaryocyte",
|
|
195
|
+
|
|
196
|
+
# =========================================================================
|
|
197
|
+
# NEOPLASTIC / TUMOR CELLS
|
|
198
|
+
# =========================================================================
|
|
199
|
+
# Map to "malignant cell" (CL:0001064) - the CL term for tumor/cancer cells
|
|
200
|
+
# This matches CellxGene's convention for tumor cell annotations
|
|
201
|
+
|
|
202
|
+
r"tumor\s*cell|tumour\s*cell": "malignant cell",
|
|
203
|
+
r"cancer\s*cell": "malignant cell",
|
|
204
|
+
r"malignant\s*cell|malignant": "malignant cell",
|
|
205
|
+
r"neoplastic\s*cell|neoplastic": "malignant cell",
|
|
206
|
+
r"carcinoma\s*cell|carcinoma": "malignant cell",
|
|
207
|
+
# Common tumor type abbreviations (lung, breast, etc.)
|
|
208
|
+
r"\bluad\b|\blusc\b|\bnsclc\b": "malignant cell", # Lung cancer types
|
|
209
|
+
r"\bbrca\b": "malignant cell", # Breast cancer
|
|
210
|
+
r"\bhcc\b": "malignant cell", # Hepatocellular carcinoma
|
|
211
|
+
r"\bcrc\b": "malignant cell", # Colorectal cancer
|
|
212
|
+
r"tumor|tumour|cancer": "malignant cell", # Catch-all for remaining
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def get_canonical_term(label: str) -> str | None:
|
|
217
|
+
"""
|
|
218
|
+
Get canonical Cell Ontology term for a cell type label.
|
|
219
|
+
|
|
220
|
+
Parameters
|
|
221
|
+
----------
|
|
222
|
+
label : str
|
|
223
|
+
Cell type label to canonicalize.
|
|
224
|
+
|
|
225
|
+
Returns
|
|
226
|
+
-------
|
|
227
|
+
str or None
|
|
228
|
+
Canonical CL term name, or None if no pattern matches.
|
|
229
|
+
|
|
230
|
+
Examples
|
|
231
|
+
--------
|
|
232
|
+
>>> get_canonical_term("CD4+ T cells")
|
|
233
|
+
'cd4-positive, alpha-beta t cell'
|
|
234
|
+
>>> get_canonical_term("NK cells")
|
|
235
|
+
'natural killer cell'
|
|
236
|
+
>>> get_canonical_term("Unknown")
|
|
237
|
+
None
|
|
238
|
+
"""
|
|
239
|
+
import re
|
|
240
|
+
|
|
241
|
+
label_lower = label.lower().strip()
|
|
242
|
+
|
|
243
|
+
for pattern, canonical_term in CELL_TYPE_PATTERNS.items():
|
|
244
|
+
if re.search(pattern, label_lower):
|
|
245
|
+
return canonical_term
|
|
246
|
+
|
|
247
|
+
return None
|