spatialcore 0.2.4__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {spatialcore-0.2.4 → spatialcore-0.2.5}/PKG-INFO +1 -1
- {spatialcore-0.2.4 → spatialcore-0.2.5}/pyproject.toml +1 -1
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/__init__.py +1 -1
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/acquisition.py +3 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/cellxgene.py +94 -1
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore.egg-info/PKG-INFO +1 -1
- {spatialcore-0.2.4 → spatialcore-0.2.5}/LICENSE +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/README.md +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/setup.cfg +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/__init__.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/annotate.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/confidence.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/discovery.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/expression.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/loading.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/markers.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/ontology.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/patterns.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/pipeline.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/synapse.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/training.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/validation.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/core/__init__.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/core/cache.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/core/logging.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/core/metadata.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/core/utils.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/data/gene_mappings/ensembl_to_hugo_human.tsv +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/data/markers/canonical_markers.json +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/data/ontology_mappings/ontology_index.json +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/__init__.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/benchmark.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/celltype.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/confidence.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/spatial.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/utils.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/validation.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/r_bridge/__init__.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/r_bridge/subprocess_runner.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/spatial/__init__.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/spatial/autocorrelation.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/spatial/distance.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/spatial/domains.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/spatial/neighborhoods.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/stats/__init__.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/stats/_thresholding.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/stats/classify.py +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore.egg-info/SOURCES.txt +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore.egg-info/dependency_links.txt +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore.egg-info/requires.txt +0 -0
- {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore.egg-info/top_level.txt +0 -0
|
@@ -5,7 +5,7 @@ A thin, robust wrapper around standard libraries to ensure Python and R users
|
|
|
5
5
|
get the exact same result for the same biological question.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "0.2.
|
|
8
|
+
__version__ = "0.2.5"
|
|
9
9
|
|
|
10
10
|
# Track which modules are available in this installation
|
|
11
11
|
_available_modules: list[str] = []
|
|
@@ -343,6 +343,7 @@ def acquire_reference(
|
|
|
343
343
|
Source-specific options:
|
|
344
344
|
|
|
345
345
|
- ``max_cells`` (int): Maximum cells to download (for CellxGene query)
|
|
346
|
+
- ``resolve_hierarchy`` (str): "remove_parents" to drop parent labels
|
|
346
347
|
- ``auth_token`` (str): Synapse authentication token
|
|
347
348
|
- ``tissue``, ``disease``, ``cell_type`` (str): CellxGene query filters
|
|
348
349
|
|
|
@@ -476,6 +477,8 @@ def _acquire_from_cellxgene(source: str, **kwargs) -> ad.AnnData:
|
|
|
476
477
|
assay=assay,
|
|
477
478
|
max_cells=kwargs.get("max_cells"),
|
|
478
479
|
random_state=kwargs.get("random_state", 42),
|
|
480
|
+
resolve_hierarchy=kwargs.get("resolve_hierarchy", "none"),
|
|
481
|
+
validate_labels=kwargs.get("validate_labels", True),
|
|
479
482
|
)
|
|
480
483
|
|
|
481
484
|
else:
|
|
@@ -15,7 +15,8 @@ References:
|
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
17
|
from pathlib import Path
|
|
18
|
-
from typing import Dict, List, Optional, Any, Union
|
|
18
|
+
from typing import Dict, List, Optional, Any, Union, Set
|
|
19
|
+
import re
|
|
19
20
|
|
|
20
21
|
import numpy as np
|
|
21
22
|
import pandas as pd
|
|
@@ -34,6 +35,64 @@ from spatialcore.core.utils import (
|
|
|
34
35
|
|
|
35
36
|
logger = get_logger(__name__)
|
|
36
37
|
|
|
38
|
+
_LABEL_TOKEN_PATTERN = re.compile(r"[^a-z0-9]+")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _label_tokens(label: str) -> Set[str]:
|
|
42
|
+
"""Normalize a label into lowercase alphanumeric tokens."""
|
|
43
|
+
if label is None:
|
|
44
|
+
return set()
|
|
45
|
+
normalized = _LABEL_TOKEN_PATTERN.sub(" ", str(label).lower()).strip()
|
|
46
|
+
if not normalized:
|
|
47
|
+
return set()
|
|
48
|
+
return set(token for token in normalized.split() if token)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _detect_parent_child_conflicts(
|
|
52
|
+
adata: ad.AnnData,
|
|
53
|
+
label_column: str,
|
|
54
|
+
ontology_column: str,
|
|
55
|
+
min_parent_tokens: int = 2,
|
|
56
|
+
) -> Dict[str, List[str]]:
|
|
57
|
+
"""
|
|
58
|
+
Detect parent/child conflicts based on label token containment.
|
|
59
|
+
|
|
60
|
+
Returns dict mapping parent CL IDs to list of child CL IDs present in data.
|
|
61
|
+
"""
|
|
62
|
+
pairs = adata.obs[[label_column, ontology_column]].dropna()
|
|
63
|
+
if pairs.empty:
|
|
64
|
+
return {}
|
|
65
|
+
|
|
66
|
+
pairs = pairs.astype(str)
|
|
67
|
+
valid_mask = pairs[ontology_column].str.startswith("CL:")
|
|
68
|
+
pairs = pairs.loc[valid_mask, [label_column, ontology_column]]
|
|
69
|
+
if pairs.empty:
|
|
70
|
+
return {}
|
|
71
|
+
|
|
72
|
+
# Map CL ID -> most common label for that ID
|
|
73
|
+
id_to_label = (
|
|
74
|
+
pairs.groupby(ontology_column)[label_column]
|
|
75
|
+
.agg(lambda values: values.value_counts().idxmax())
|
|
76
|
+
.to_dict()
|
|
77
|
+
)
|
|
78
|
+
id_to_tokens = {cl_id: _label_tokens(label) for cl_id, label in id_to_label.items()}
|
|
79
|
+
|
|
80
|
+
conflicts: Dict[str, List[str]] = {}
|
|
81
|
+
for parent_id, parent_tokens in id_to_tokens.items():
|
|
82
|
+
if len(parent_tokens) < min_parent_tokens:
|
|
83
|
+
continue
|
|
84
|
+
for child_id, child_tokens in id_to_tokens.items():
|
|
85
|
+
if parent_id == child_id:
|
|
86
|
+
continue
|
|
87
|
+
if parent_tokens == child_tokens:
|
|
88
|
+
continue
|
|
89
|
+
if len(child_tokens) <= len(parent_tokens):
|
|
90
|
+
continue
|
|
91
|
+
if parent_tokens.issubset(child_tokens):
|
|
92
|
+
conflicts.setdefault(parent_id, []).append(child_id)
|
|
93
|
+
|
|
94
|
+
return conflicts
|
|
95
|
+
|
|
37
96
|
# ============================================================================
|
|
38
97
|
# CellxGene Dataset Registry
|
|
39
98
|
# ============================================================================
|
|
@@ -202,6 +261,7 @@ def query_cellxgene_census(
|
|
|
202
261
|
output_path: Optional[Union[str, Path]] = None,
|
|
203
262
|
random_state: int = 42,
|
|
204
263
|
validate_labels: bool = True,
|
|
264
|
+
resolve_hierarchy: str = "none",
|
|
205
265
|
) -> ad.AnnData:
|
|
206
266
|
"""
|
|
207
267
|
Query cells from CellxGene Census with flexible filters.
|
|
@@ -236,6 +296,10 @@ def query_cellxgene_census(
|
|
|
236
296
|
validate_labels : bool, default True
|
|
237
297
|
If True, check for label-to-ontology inconsistencies in CellxGene
|
|
238
298
|
columns (cell_type vs cell_type_ontology_term_id) and log warnings.
|
|
299
|
+
resolve_hierarchy : str, default "none"
|
|
300
|
+
If "remove_parents", drop cells labeled with parent terms when any
|
|
301
|
+
child terms are present (based on label token containment). Use "none"
|
|
302
|
+
to keep current behavior.
|
|
239
303
|
|
|
240
304
|
Returns
|
|
241
305
|
-------
|
|
@@ -359,6 +423,35 @@ def query_cellxgene_census(
|
|
|
359
423
|
|
|
360
424
|
logger.info(f" Downloaded: {adata.n_obs:,} cells × {adata.n_vars:,} genes")
|
|
361
425
|
|
|
426
|
+
if resolve_hierarchy not in {"none", "remove_parents"}:
|
|
427
|
+
raise ValueError("resolve_hierarchy must be 'none' or 'remove_parents'")
|
|
428
|
+
|
|
429
|
+
if resolve_hierarchy == "remove_parents":
|
|
430
|
+
if (
|
|
431
|
+
"cell_type" not in adata.obs.columns
|
|
432
|
+
or "cell_type_ontology_term_id" not in adata.obs.columns
|
|
433
|
+
):
|
|
434
|
+
raise ValueError(
|
|
435
|
+
"resolve_hierarchy='remove_parents' requires "
|
|
436
|
+
"cell_type and cell_type_ontology_term_id in adata.obs"
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
conflicts = _detect_parent_child_conflicts(
|
|
440
|
+
adata,
|
|
441
|
+
label_column="cell_type",
|
|
442
|
+
ontology_column="cell_type_ontology_term_id",
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
if conflicts:
|
|
446
|
+
parent_ids = set(conflicts.keys())
|
|
447
|
+
parent_mask = adata.obs["cell_type_ontology_term_id"].isin(parent_ids)
|
|
448
|
+
removed = int(parent_mask.sum())
|
|
449
|
+
adata = adata[~parent_mask].copy()
|
|
450
|
+
logger.info(
|
|
451
|
+
"Removed %d parent-labeled cells due to hierarchy conflicts",
|
|
452
|
+
removed,
|
|
453
|
+
)
|
|
454
|
+
|
|
362
455
|
if validate_labels:
|
|
363
456
|
if (
|
|
364
457
|
"cell_type" in adata.obs.columns
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/data/gene_mappings/ensembl_to_hugo_human.tsv
RENAMED
|
File without changes
|
|
File without changes
|
{spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/data/ontology_mappings/ontology_index.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|