spatialcore 0.2.4__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {spatialcore-0.2.4 → spatialcore-0.2.5}/PKG-INFO +1 -1
  2. {spatialcore-0.2.4 → spatialcore-0.2.5}/pyproject.toml +1 -1
  3. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/__init__.py +1 -1
  4. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/acquisition.py +3 -0
  5. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/cellxgene.py +94 -1
  6. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore.egg-info/PKG-INFO +1 -1
  7. {spatialcore-0.2.4 → spatialcore-0.2.5}/LICENSE +0 -0
  8. {spatialcore-0.2.4 → spatialcore-0.2.5}/README.md +0 -0
  9. {spatialcore-0.2.4 → spatialcore-0.2.5}/setup.cfg +0 -0
  10. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/__init__.py +0 -0
  11. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/annotate.py +0 -0
  12. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/confidence.py +0 -0
  13. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/discovery.py +0 -0
  14. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/expression.py +0 -0
  15. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/loading.py +0 -0
  16. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/markers.py +0 -0
  17. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/ontology.py +0 -0
  18. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/patterns.py +0 -0
  19. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/pipeline.py +0 -0
  20. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/synapse.py +0 -0
  21. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/training.py +0 -0
  22. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/annotation/validation.py +0 -0
  23. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/core/__init__.py +0 -0
  24. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/core/cache.py +0 -0
  25. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/core/logging.py +0 -0
  26. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/core/metadata.py +0 -0
  27. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/core/utils.py +0 -0
  28. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/data/gene_mappings/ensembl_to_hugo_human.tsv +0 -0
  29. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/data/markers/canonical_markers.json +0 -0
  30. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/data/ontology_mappings/ontology_index.json +0 -0
  31. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/__init__.py +0 -0
  32. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/benchmark.py +0 -0
  33. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/celltype.py +0 -0
  34. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/confidence.py +0 -0
  35. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/spatial.py +0 -0
  36. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/utils.py +0 -0
  37. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/plotting/validation.py +0 -0
  38. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/r_bridge/__init__.py +0 -0
  39. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/r_bridge/subprocess_runner.py +0 -0
  40. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/spatial/__init__.py +0 -0
  41. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/spatial/autocorrelation.py +0 -0
  42. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/spatial/distance.py +0 -0
  43. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/spatial/domains.py +0 -0
  44. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/spatial/neighborhoods.py +0 -0
  45. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/stats/__init__.py +0 -0
  46. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/stats/_thresholding.py +0 -0
  47. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore/stats/classify.py +0 -0
  48. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore.egg-info/SOURCES.txt +0 -0
  49. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore.egg-info/dependency_links.txt +0 -0
  50. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore.egg-info/requires.txt +0 -0
  51. {spatialcore-0.2.4 → spatialcore-0.2.5}/src/spatialcore.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spatialcore
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: Standardized spatial statistics tools for computational biology
5
5
  Author: SpatialCore Contributors
6
6
  License-Expression: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spatialcore"
7
- version = "0.2.4"
7
+ version = "0.2.5"
8
8
  description = "Standardized spatial statistics tools for computational biology"
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -5,7 +5,7 @@ A thin, robust wrapper around standard libraries to ensure Python and R users
5
5
  get the exact same result for the same biological question.
6
6
  """
7
7
 
8
- __version__ = "0.2.4"
8
+ __version__ = "0.2.5"
9
9
 
10
10
  # Track which modules are available in this installation
11
11
  _available_modules: list[str] = []
@@ -343,6 +343,7 @@ def acquire_reference(
343
343
  Source-specific options:
344
344
 
345
345
  - ``max_cells`` (int): Maximum cells to download (for CellxGene query)
346
+ - ``resolve_hierarchy`` (str): "remove_parents" to drop parent labels
346
347
  - ``auth_token`` (str): Synapse authentication token
347
348
  - ``tissue``, ``disease``, ``cell_type`` (str): CellxGene query filters
348
349
 
@@ -476,6 +477,8 @@ def _acquire_from_cellxgene(source: str, **kwargs) -> ad.AnnData:
476
477
  assay=assay,
477
478
  max_cells=kwargs.get("max_cells"),
478
479
  random_state=kwargs.get("random_state", 42),
480
+ resolve_hierarchy=kwargs.get("resolve_hierarchy", "none"),
481
+ validate_labels=kwargs.get("validate_labels", True),
479
482
  )
480
483
 
481
484
  else:
@@ -15,7 +15,8 @@ References:
15
15
  """
16
16
 
17
17
  from pathlib import Path
18
- from typing import Dict, List, Optional, Any, Union
18
+ from typing import Dict, List, Optional, Any, Union, Set
19
+ import re
19
20
 
20
21
  import numpy as np
21
22
  import pandas as pd
@@ -34,6 +35,64 @@ from spatialcore.core.utils import (
34
35
 
35
36
  logger = get_logger(__name__)
36
37
 
38
+ _LABEL_TOKEN_PATTERN = re.compile(r"[^a-z0-9]+")
39
+
40
+
41
+ def _label_tokens(label: str) -> Set[str]:
42
+ """Normalize a label into lowercase alphanumeric tokens."""
43
+ if label is None:
44
+ return set()
45
+ normalized = _LABEL_TOKEN_PATTERN.sub(" ", str(label).lower()).strip()
46
+ if not normalized:
47
+ return set()
48
+ return set(token for token in normalized.split() if token)
49
+
50
+
51
+ def _detect_parent_child_conflicts(
52
+ adata: ad.AnnData,
53
+ label_column: str,
54
+ ontology_column: str,
55
+ min_parent_tokens: int = 2,
56
+ ) -> Dict[str, List[str]]:
57
+ """
58
+ Detect parent/child conflicts based on label token containment.
59
+
60
+ Returns dict mapping parent CL IDs to list of child CL IDs present in data.
61
+ """
62
+ pairs = adata.obs[[label_column, ontology_column]].dropna()
63
+ if pairs.empty:
64
+ return {}
65
+
66
+ pairs = pairs.astype(str)
67
+ valid_mask = pairs[ontology_column].str.startswith("CL:")
68
+ pairs = pairs.loc[valid_mask, [label_column, ontology_column]]
69
+ if pairs.empty:
70
+ return {}
71
+
72
+ # Map CL ID -> most common label for that ID
73
+ id_to_label = (
74
+ pairs.groupby(ontology_column)[label_column]
75
+ .agg(lambda values: values.value_counts().idxmax())
76
+ .to_dict()
77
+ )
78
+ id_to_tokens = {cl_id: _label_tokens(label) for cl_id, label in id_to_label.items()}
79
+
80
+ conflicts: Dict[str, List[str]] = {}
81
+ for parent_id, parent_tokens in id_to_tokens.items():
82
+ if len(parent_tokens) < min_parent_tokens:
83
+ continue
84
+ for child_id, child_tokens in id_to_tokens.items():
85
+ if parent_id == child_id:
86
+ continue
87
+ if parent_tokens == child_tokens:
88
+ continue
89
+ if len(child_tokens) <= len(parent_tokens):
90
+ continue
91
+ if parent_tokens.issubset(child_tokens):
92
+ conflicts.setdefault(parent_id, []).append(child_id)
93
+
94
+ return conflicts
95
+
37
96
  # ============================================================================
38
97
  # CellxGene Dataset Registry
39
98
  # ============================================================================
@@ -202,6 +261,7 @@ def query_cellxgene_census(
202
261
  output_path: Optional[Union[str, Path]] = None,
203
262
  random_state: int = 42,
204
263
  validate_labels: bool = True,
264
+ resolve_hierarchy: str = "none",
205
265
  ) -> ad.AnnData:
206
266
  """
207
267
  Query cells from CellxGene Census with flexible filters.
@@ -236,6 +296,10 @@ def query_cellxgene_census(
236
296
  validate_labels : bool, default True
237
297
  If True, check for label-to-ontology inconsistencies in CellxGene
238
298
  columns (cell_type vs cell_type_ontology_term_id) and log warnings.
299
+ resolve_hierarchy : str, default "none"
300
+ If "remove_parents", drop cells labeled with parent terms when any
301
+ child terms are present (based on label token containment). Use "none"
302
+ to keep current behavior.
239
303
 
240
304
  Returns
241
305
  -------
@@ -359,6 +423,35 @@ def query_cellxgene_census(
359
423
 
360
424
  logger.info(f" Downloaded: {adata.n_obs:,} cells × {adata.n_vars:,} genes")
361
425
 
426
+ if resolve_hierarchy not in {"none", "remove_parents"}:
427
+ raise ValueError("resolve_hierarchy must be 'none' or 'remove_parents'")
428
+
429
+ if resolve_hierarchy == "remove_parents":
430
+ if (
431
+ "cell_type" not in adata.obs.columns
432
+ or "cell_type_ontology_term_id" not in adata.obs.columns
433
+ ):
434
+ raise ValueError(
435
+ "resolve_hierarchy='remove_parents' requires "
436
+ "cell_type and cell_type_ontology_term_id in adata.obs"
437
+ )
438
+
439
+ conflicts = _detect_parent_child_conflicts(
440
+ adata,
441
+ label_column="cell_type",
442
+ ontology_column="cell_type_ontology_term_id",
443
+ )
444
+
445
+ if conflicts:
446
+ parent_ids = set(conflicts.keys())
447
+ parent_mask = adata.obs["cell_type_ontology_term_id"].isin(parent_ids)
448
+ removed = int(parent_mask.sum())
449
+ adata = adata[~parent_mask].copy()
450
+ logger.info(
451
+ "Removed %d parent-labeled cells due to hierarchy conflicts",
452
+ removed,
453
+ )
454
+
362
455
  if validate_labels:
363
456
  if (
364
457
  "cell_type" in adata.obs.columns
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spatialcore
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: Standardized spatial statistics tools for computational biology
5
5
  Author: SpatialCore Contributors
6
6
  License-Expression: Apache-2.0
File without changes
File without changes
File without changes