spatialcore 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spatialcore/__init__.py +122 -0
- spatialcore/annotation/__init__.py +253 -0
- spatialcore/annotation/acquisition.py +529 -0
- spatialcore/annotation/annotate.py +603 -0
- spatialcore/annotation/cellxgene.py +365 -0
- spatialcore/annotation/confidence.py +802 -0
- spatialcore/annotation/discovery.py +529 -0
- spatialcore/annotation/expression.py +363 -0
- spatialcore/annotation/loading.py +529 -0
- spatialcore/annotation/markers.py +297 -0
- spatialcore/annotation/ontology.py +1282 -0
- spatialcore/annotation/patterns.py +247 -0
- spatialcore/annotation/pipeline.py +620 -0
- spatialcore/annotation/synapse.py +380 -0
- spatialcore/annotation/training.py +1457 -0
- spatialcore/annotation/validation.py +422 -0
- spatialcore/core/__init__.py +34 -0
- spatialcore/core/cache.py +118 -0
- spatialcore/core/logging.py +135 -0
- spatialcore/core/metadata.py +149 -0
- spatialcore/core/utils.py +768 -0
- spatialcore/data/gene_mappings/ensembl_to_hugo_human.tsv +86372 -0
- spatialcore/data/markers/canonical_markers.json +83 -0
- spatialcore/data/ontology_mappings/ontology_index.json +63865 -0
- spatialcore/plotting/__init__.py +109 -0
- spatialcore/plotting/benchmark.py +477 -0
- spatialcore/plotting/celltype.py +329 -0
- spatialcore/plotting/confidence.py +413 -0
- spatialcore/plotting/spatial.py +505 -0
- spatialcore/plotting/utils.py +411 -0
- spatialcore/plotting/validation.py +1342 -0
- spatialcore-0.1.9.dist-info/METADATA +213 -0
- spatialcore-0.1.9.dist-info/RECORD +36 -0
- spatialcore-0.1.9.dist-info/WHEEL +5 -0
- spatialcore-0.1.9.dist-info/licenses/LICENSE +201 -0
- spatialcore-0.1.9.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Confidence score visualization.
|
|
3
|
+
|
|
4
|
+
This module provides functions for visualizing prediction confidence
|
|
5
|
+
distributions and comparing confidence across cell types.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, List, Optional, Union
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import matplotlib.pyplot as plt
|
|
14
|
+
from matplotlib.figure import Figure
|
|
15
|
+
import anndata as ad
|
|
16
|
+
|
|
17
|
+
from spatialcore.core.logging import get_logger
|
|
18
|
+
from spatialcore.plotting.utils import (
|
|
19
|
+
generate_celltype_palette,
|
|
20
|
+
setup_figure,
|
|
21
|
+
setup_multi_figure,
|
|
22
|
+
save_figure,
|
|
23
|
+
despine,
|
|
24
|
+
format_axis_labels,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
logger = get_logger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def plot_confidence_histogram(
|
|
31
|
+
adata: ad.AnnData,
|
|
32
|
+
confidence_column: str,
|
|
33
|
+
bins: int = 50,
|
|
34
|
+
threshold: Optional[float] = None,
|
|
35
|
+
threshold_color: str = "#FF0000",
|
|
36
|
+
figsize: tuple = (8, 5),
|
|
37
|
+
title: Optional[str] = None,
|
|
38
|
+
save: Optional[Union[str, Path]] = None,
|
|
39
|
+
) -> Figure:
|
|
40
|
+
"""
|
|
41
|
+
Plot histogram of confidence scores.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
adata : AnnData
|
|
46
|
+
Annotated data with confidence values.
|
|
47
|
+
confidence_column : str
|
|
48
|
+
Column in adata.obs containing confidence values.
|
|
49
|
+
bins : int, default 50
|
|
50
|
+
Number of histogram bins.
|
|
51
|
+
threshold : float, optional
|
|
52
|
+
Confidence threshold to highlight with vertical line.
|
|
53
|
+
threshold_color : str, default "#FF0000"
|
|
54
|
+
Color for threshold line.
|
|
55
|
+
figsize : tuple, default (8, 5)
|
|
56
|
+
Figure size.
|
|
57
|
+
title : str, optional
|
|
58
|
+
Plot title.
|
|
59
|
+
save : str or Path, optional
|
|
60
|
+
Path to save figure.
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
Figure
|
|
65
|
+
Matplotlib figure.
|
|
66
|
+
|
|
67
|
+
Examples
|
|
68
|
+
--------
|
|
69
|
+
>>> from spatialcore.plotting.confidence import plot_confidence_histogram
|
|
70
|
+
>>> fig = plot_confidence_histogram(
|
|
71
|
+
... adata,
|
|
72
|
+
... confidence_column="celltypist_confidence",
|
|
73
|
+
... threshold=0.5,
|
|
74
|
+
... )
|
|
75
|
+
"""
|
|
76
|
+
if confidence_column not in adata.obs.columns:
|
|
77
|
+
raise ValueError(
|
|
78
|
+
f"Confidence column '{confidence_column}' not found. "
|
|
79
|
+
f"Available: {list(adata.obs.columns)}"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
values = adata.obs[confidence_column].values
|
|
83
|
+
|
|
84
|
+
fig, ax = setup_figure(figsize=figsize)
|
|
85
|
+
|
|
86
|
+
ax.hist(values, bins=bins, color="#3784FE", edgecolor="white", alpha=0.8)
|
|
87
|
+
|
|
88
|
+
if threshold is not None:
|
|
89
|
+
ax.axvline(threshold, color=threshold_color, linestyle="--", linewidth=2)
|
|
90
|
+
below = (values < threshold).sum()
|
|
91
|
+
pct = 100 * below / len(values)
|
|
92
|
+
ax.text(
|
|
93
|
+
threshold + 0.02,
|
|
94
|
+
ax.get_ylim()[1] * 0.9,
|
|
95
|
+
f"{pct:.1f}% below\nthreshold",
|
|
96
|
+
color=threshold_color,
|
|
97
|
+
fontsize=10,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
format_axis_labels(
|
|
101
|
+
ax,
|
|
102
|
+
xlabel="Confidence Score",
|
|
103
|
+
ylabel="Number of Cells",
|
|
104
|
+
)
|
|
105
|
+
despine(ax)
|
|
106
|
+
|
|
107
|
+
if title is None:
|
|
108
|
+
title = f"Confidence Distribution ({confidence_column})"
|
|
109
|
+
ax.set_title(title)
|
|
110
|
+
|
|
111
|
+
# Add statistics
|
|
112
|
+
stats_text = (
|
|
113
|
+
f"Mean: {np.mean(values):.3f}\n"
|
|
114
|
+
f"Median: {np.median(values):.3f}\n"
|
|
115
|
+
f"Std: {np.std(values):.3f}"
|
|
116
|
+
)
|
|
117
|
+
ax.text(
|
|
118
|
+
0.02,
|
|
119
|
+
0.98,
|
|
120
|
+
stats_text,
|
|
121
|
+
transform=ax.transAxes,
|
|
122
|
+
fontsize=10,
|
|
123
|
+
verticalalignment="top",
|
|
124
|
+
bbox=dict(boxstyle="round", facecolor="white", alpha=0.8),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
plt.tight_layout()
|
|
128
|
+
|
|
129
|
+
if save:
|
|
130
|
+
save_figure(fig, save)
|
|
131
|
+
|
|
132
|
+
return fig
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def plot_confidence_by_celltype(
|
|
136
|
+
adata: ad.AnnData,
|
|
137
|
+
label_column: str,
|
|
138
|
+
confidence_column: str,
|
|
139
|
+
colors: Optional[Dict[str, str]] = None,
|
|
140
|
+
top_n: Optional[int] = 20,
|
|
141
|
+
figsize: Optional[tuple] = None,
|
|
142
|
+
title: Optional[str] = None,
|
|
143
|
+
save: Optional[Union[str, Path]] = None,
|
|
144
|
+
) -> Figure:
|
|
145
|
+
"""
|
|
146
|
+
Plot confidence distribution per cell type as box plots.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
adata : AnnData
|
|
151
|
+
Annotated data with cell type labels and confidence.
|
|
152
|
+
label_column : str
|
|
153
|
+
Column in adata.obs containing cell type labels.
|
|
154
|
+
confidence_column : str
|
|
155
|
+
Column in adata.obs containing confidence values.
|
|
156
|
+
colors : Dict[str, str], optional
|
|
157
|
+
Color mapping for cell types.
|
|
158
|
+
top_n : int, optional, default 20
|
|
159
|
+
Only show top N most frequent cell types.
|
|
160
|
+
figsize : tuple, optional
|
|
161
|
+
Figure size. Auto-calculated if None.
|
|
162
|
+
title : str, optional
|
|
163
|
+
Plot title.
|
|
164
|
+
save : str or Path, optional
|
|
165
|
+
Path to save figure.
|
|
166
|
+
|
|
167
|
+
Returns
|
|
168
|
+
-------
|
|
169
|
+
Figure
|
|
170
|
+
Matplotlib figure.
|
|
171
|
+
|
|
172
|
+
Examples
|
|
173
|
+
--------
|
|
174
|
+
>>> from spatialcore.plotting.confidence import plot_confidence_by_celltype
|
|
175
|
+
>>> fig = plot_confidence_by_celltype(
|
|
176
|
+
... adata,
|
|
177
|
+
... label_column="cell_type",
|
|
178
|
+
... confidence_column="confidence",
|
|
179
|
+
... )
|
|
180
|
+
"""
|
|
181
|
+
if label_column not in adata.obs.columns:
|
|
182
|
+
raise ValueError(f"Label column '{label_column}' not found.")
|
|
183
|
+
if confidence_column not in adata.obs.columns:
|
|
184
|
+
raise ValueError(f"Confidence column '{confidence_column}' not found.")
|
|
185
|
+
|
|
186
|
+
# Get data
|
|
187
|
+
df = pd.DataFrame({
|
|
188
|
+
"cell_type": adata.obs[label_column].values,
|
|
189
|
+
"confidence": adata.obs[confidence_column].values,
|
|
190
|
+
})
|
|
191
|
+
|
|
192
|
+
# Get top N types by count
|
|
193
|
+
type_order = df["cell_type"].value_counts().index.tolist()
|
|
194
|
+
if top_n is not None:
|
|
195
|
+
type_order = type_order[:top_n]
|
|
196
|
+
df = df[df["cell_type"].isin(type_order)]
|
|
197
|
+
|
|
198
|
+
n_types = len(type_order)
|
|
199
|
+
|
|
200
|
+
# Generate colors
|
|
201
|
+
if colors is None:
|
|
202
|
+
colors = generate_celltype_palette(type_order)
|
|
203
|
+
|
|
204
|
+
# Calculate figure size
|
|
205
|
+
if figsize is None:
|
|
206
|
+
figsize = (max(8, 0.5 * n_types), 6)
|
|
207
|
+
|
|
208
|
+
fig, ax = setup_figure(figsize=figsize)
|
|
209
|
+
|
|
210
|
+
# Create box plot
|
|
211
|
+
positions = range(len(type_order))
|
|
212
|
+
box_data = [
|
|
213
|
+
df[df["cell_type"] == ct]["confidence"].values
|
|
214
|
+
for ct in type_order
|
|
215
|
+
]
|
|
216
|
+
|
|
217
|
+
bp = ax.boxplot(
|
|
218
|
+
box_data,
|
|
219
|
+
positions=positions,
|
|
220
|
+
patch_artist=True,
|
|
221
|
+
widths=0.6,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Color boxes
|
|
225
|
+
for patch, ct in zip(bp["boxes"], type_order):
|
|
226
|
+
patch.set_facecolor(colors.get(ct, "#888888"))
|
|
227
|
+
patch.set_alpha(0.7)
|
|
228
|
+
|
|
229
|
+
ax.set_xticks(positions)
|
|
230
|
+
ax.set_xticklabels(type_order, rotation=45, ha="right")
|
|
231
|
+
|
|
232
|
+
format_axis_labels(ax, ylabel="Confidence Score")
|
|
233
|
+
despine(ax)
|
|
234
|
+
|
|
235
|
+
if title is None:
|
|
236
|
+
title = "Confidence by Cell Type"
|
|
237
|
+
ax.set_title(title)
|
|
238
|
+
|
|
239
|
+
plt.tight_layout()
|
|
240
|
+
|
|
241
|
+
if save:
|
|
242
|
+
save_figure(fig, save)
|
|
243
|
+
|
|
244
|
+
return fig
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def plot_confidence_violin(
|
|
248
|
+
adata: ad.AnnData,
|
|
249
|
+
label_column: str,
|
|
250
|
+
confidence_column: str,
|
|
251
|
+
colors: Optional[Dict[str, str]] = None,
|
|
252
|
+
top_n: Optional[int] = 15,
|
|
253
|
+
figsize: Optional[tuple] = None,
|
|
254
|
+
title: Optional[str] = None,
|
|
255
|
+
save: Optional[Union[str, Path]] = None,
|
|
256
|
+
) -> Figure:
|
|
257
|
+
"""
|
|
258
|
+
Plot confidence distribution per cell type as violin plots.
|
|
259
|
+
|
|
260
|
+
Parameters
|
|
261
|
+
----------
|
|
262
|
+
adata : AnnData
|
|
263
|
+
Annotated data with cell type labels and confidence.
|
|
264
|
+
label_column : str
|
|
265
|
+
Column in adata.obs containing cell type labels.
|
|
266
|
+
confidence_column : str
|
|
267
|
+
Column in adata.obs containing confidence values.
|
|
268
|
+
colors : Dict[str, str], optional
|
|
269
|
+
Color mapping for cell types.
|
|
270
|
+
top_n : int, optional, default 15
|
|
271
|
+
Only show top N most frequent cell types.
|
|
272
|
+
figsize : tuple, optional
|
|
273
|
+
Figure size.
|
|
274
|
+
title : str, optional
|
|
275
|
+
Plot title.
|
|
276
|
+
save : str or Path, optional
|
|
277
|
+
Path to save figure.
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
Figure
|
|
282
|
+
Matplotlib figure.
|
|
283
|
+
"""
|
|
284
|
+
try:
|
|
285
|
+
import seaborn as sns
|
|
286
|
+
except ImportError:
|
|
287
|
+
raise ImportError("seaborn is required for violin plots")
|
|
288
|
+
|
|
289
|
+
if label_column not in adata.obs.columns:
|
|
290
|
+
raise ValueError(f"Label column '{label_column}' not found.")
|
|
291
|
+
if confidence_column not in adata.obs.columns:
|
|
292
|
+
raise ValueError(f"Confidence column '{confidence_column}' not found.")
|
|
293
|
+
|
|
294
|
+
df = pd.DataFrame({
|
|
295
|
+
"cell_type": adata.obs[label_column].values,
|
|
296
|
+
"confidence": adata.obs[confidence_column].values,
|
|
297
|
+
})
|
|
298
|
+
|
|
299
|
+
type_order = df["cell_type"].value_counts().index.tolist()
|
|
300
|
+
if top_n is not None:
|
|
301
|
+
type_order = type_order[:top_n]
|
|
302
|
+
df = df[df["cell_type"].isin(type_order)]
|
|
303
|
+
|
|
304
|
+
n_types = len(type_order)
|
|
305
|
+
|
|
306
|
+
if colors is None:
|
|
307
|
+
colors = generate_celltype_palette(type_order)
|
|
308
|
+
|
|
309
|
+
if figsize is None:
|
|
310
|
+
figsize = (max(8, 0.6 * n_types), 6)
|
|
311
|
+
|
|
312
|
+
fig, ax = setup_figure(figsize=figsize)
|
|
313
|
+
|
|
314
|
+
palette = {ct: colors.get(ct, "#888888") for ct in type_order}
|
|
315
|
+
|
|
316
|
+
sns.violinplot(
|
|
317
|
+
data=df,
|
|
318
|
+
x="cell_type",
|
|
319
|
+
y="confidence",
|
|
320
|
+
order=type_order,
|
|
321
|
+
palette=palette,
|
|
322
|
+
ax=ax,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
|
|
326
|
+
format_axis_labels(ax, xlabel="", ylabel="Confidence Score")
|
|
327
|
+
despine(ax)
|
|
328
|
+
|
|
329
|
+
if title is None:
|
|
330
|
+
title = "Confidence by Cell Type"
|
|
331
|
+
ax.set_title(title)
|
|
332
|
+
|
|
333
|
+
plt.tight_layout()
|
|
334
|
+
|
|
335
|
+
if save:
|
|
336
|
+
save_figure(fig, save)
|
|
337
|
+
|
|
338
|
+
return fig
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def plot_model_contribution(
|
|
342
|
+
adata: ad.AnnData,
|
|
343
|
+
model_column: str = "celltypist_model",
|
|
344
|
+
figsize: tuple = (8, 6),
|
|
345
|
+
title: Optional[str] = None,
|
|
346
|
+
save: Optional[Union[str, Path]] = None,
|
|
347
|
+
) -> Figure:
|
|
348
|
+
"""
|
|
349
|
+
Plot which model contributed each cell's prediction.
|
|
350
|
+
|
|
351
|
+
Useful for hierarchical or multi-model annotation pipelines
|
|
352
|
+
to see model coverage.
|
|
353
|
+
|
|
354
|
+
Parameters
|
|
355
|
+
----------
|
|
356
|
+
adata : AnnData
|
|
357
|
+
Annotated data with model source column.
|
|
358
|
+
model_column : str, default "celltypist_model"
|
|
359
|
+
Column in adata.obs indicating which model made the prediction.
|
|
360
|
+
figsize : tuple, default (8, 6)
|
|
361
|
+
Figure size.
|
|
362
|
+
title : str, optional
|
|
363
|
+
Plot title.
|
|
364
|
+
save : str or Path, optional
|
|
365
|
+
Path to save figure.
|
|
366
|
+
|
|
367
|
+
Returns
|
|
368
|
+
-------
|
|
369
|
+
Figure
|
|
370
|
+
Matplotlib figure.
|
|
371
|
+
"""
|
|
372
|
+
if model_column not in adata.obs.columns:
|
|
373
|
+
raise ValueError(
|
|
374
|
+
f"Model column '{model_column}' not found. "
|
|
375
|
+
f"Available: {list(adata.obs.columns)}"
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
counts = adata.obs[model_column].value_counts()
|
|
379
|
+
|
|
380
|
+
fig, ax = setup_figure(figsize=figsize)
|
|
381
|
+
|
|
382
|
+
colors = generate_celltype_palette(counts.index.tolist())
|
|
383
|
+
bar_colors = [colors.get(m, "#888888") for m in counts.index]
|
|
384
|
+
|
|
385
|
+
ax.bar(range(len(counts)), counts.values, color=bar_colors)
|
|
386
|
+
ax.set_xticks(range(len(counts)))
|
|
387
|
+
ax.set_xticklabels(counts.index, rotation=45, ha="right")
|
|
388
|
+
|
|
389
|
+
format_axis_labels(ax, ylabel="Number of Cells")
|
|
390
|
+
despine(ax)
|
|
391
|
+
|
|
392
|
+
if title is None:
|
|
393
|
+
title = "Model Contribution"
|
|
394
|
+
ax.set_title(title)
|
|
395
|
+
|
|
396
|
+
# Add percentages on bars
|
|
397
|
+
total = counts.sum()
|
|
398
|
+
for i, (count, model) in enumerate(zip(counts.values, counts.index)):
|
|
399
|
+
pct = 100 * count / total
|
|
400
|
+
ax.text(
|
|
401
|
+
i,
|
|
402
|
+
count + total * 0.01,
|
|
403
|
+
f"{pct:.1f}%",
|
|
404
|
+
ha="center",
|
|
405
|
+
fontsize=9,
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
plt.tight_layout()
|
|
409
|
+
|
|
410
|
+
if save:
|
|
411
|
+
save_figure(fig, save)
|
|
412
|
+
|
|
413
|
+
return fig
|