spatialcore 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {spatialcore-0.2.0 → spatialcore-0.2.2}/PKG-INFO +1 -1
- {spatialcore-0.2.0 → spatialcore-0.2.2}/pyproject.toml +1 -1
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/__init__.py +1 -1
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/cellxgene.py +37 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/training.py +36 -3
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/validation.py +73 -1
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/core/utils.py +70 -38
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore.egg-info/PKG-INFO +1 -1
- {spatialcore-0.2.0 → spatialcore-0.2.2}/LICENSE +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/README.md +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/setup.cfg +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/__init__.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/acquisition.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/annotate.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/confidence.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/discovery.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/expression.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/loading.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/markers.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/ontology.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/patterns.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/pipeline.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/annotation/synapse.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/core/__init__.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/core/cache.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/core/logging.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/core/metadata.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/data/gene_mappings/ensembl_to_hugo_human.tsv +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/data/markers/canonical_markers.json +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/data/ontology_mappings/ontology_index.json +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/plotting/__init__.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/plotting/benchmark.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/plotting/celltype.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/plotting/confidence.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/plotting/spatial.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/plotting/utils.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/plotting/validation.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/r_bridge/__init__.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/r_bridge/subprocess_runner.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/spatial/__init__.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/spatial/autocorrelation.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/spatial/distance.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/spatial/domains.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/spatial/neighborhoods.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/stats/__init__.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/stats/_thresholding.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/stats/classify.py +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore.egg-info/SOURCES.txt +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore.egg-info/dependency_links.txt +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore.egg-info/requires.txt +0 -0
- {spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore.egg-info/top_level.txt +0 -0
|
@@ -5,7 +5,7 @@ A thin, robust wrapper around standard libraries to ensure Python and R users
|
|
|
5
5
|
get the exact same result for the same biological question.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "0.2.
|
|
8
|
+
__version__ = "0.2.2"
|
|
9
9
|
|
|
10
10
|
# Track which modules are available in this installation
|
|
11
11
|
_available_modules: list[str] = []
|
|
@@ -201,6 +201,7 @@ def query_cellxgene_census(
|
|
|
201
201
|
max_cells: Optional[int] = None,
|
|
202
202
|
output_path: Optional[Union[str, Path]] = None,
|
|
203
203
|
random_state: int = 42,
|
|
204
|
+
validate_labels: bool = True,
|
|
204
205
|
) -> ad.AnnData:
|
|
205
206
|
"""
|
|
206
207
|
Query cells from CellxGene Census with flexible filters.
|
|
@@ -232,6 +233,9 @@ def query_cellxgene_census(
|
|
|
232
233
|
If provided, save result to this h5ad file.
|
|
233
234
|
random_state : int, default 42
|
|
234
235
|
Random seed for subsampling (only used when max_cells is specified).
|
|
236
|
+
validate_labels : bool, default True
|
|
237
|
+
If True, check for label-to-ontology inconsistencies in CellxGene
|
|
238
|
+
columns (cell_type vs cell_type_ontology_term_id) and log warnings.
|
|
235
239
|
|
|
236
240
|
Returns
|
|
237
241
|
-------
|
|
@@ -355,6 +359,39 @@ def query_cellxgene_census(
|
|
|
355
359
|
|
|
356
360
|
logger.info(f" Downloaded: {adata.n_obs:,} cells × {adata.n_vars:,} genes")
|
|
357
361
|
|
|
362
|
+
if validate_labels:
|
|
363
|
+
if (
|
|
364
|
+
"cell_type" in adata.obs.columns
|
|
365
|
+
and "cell_type_ontology_term_id" in adata.obs.columns
|
|
366
|
+
):
|
|
367
|
+
from spatialcore.annotation.validation import (
|
|
368
|
+
check_label_ontology_consistency,
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
consistency = check_label_ontology_consistency(
|
|
372
|
+
adata,
|
|
373
|
+
label_column="cell_type",
|
|
374
|
+
ontology_column="cell_type_ontology_term_id",
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
if consistency.n_labels_with_multiple_ids > 0:
|
|
378
|
+
examples = []
|
|
379
|
+
for label in sorted(consistency.labels_with_multiple_ids.keys())[:5]:
|
|
380
|
+
ids = ", ".join(consistency.labels_with_multiple_ids[label])
|
|
381
|
+
examples.append(f"{label} -> {ids}")
|
|
382
|
+
logger.warning(
|
|
383
|
+
"CellxGene label/ontology mismatch: %d labels map to multiple CL IDs. "
|
|
384
|
+
"Examples: %s",
|
|
385
|
+
consistency.n_labels_with_multiple_ids,
|
|
386
|
+
"; ".join(examples),
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
if consistency.n_hierarchical_labels > 0:
|
|
390
|
+
logger.warning(
|
|
391
|
+
"CellxGene labels look hierarchical (parent/child in one label): %s",
|
|
392
|
+
", ".join(sorted(consistency.hierarchical_labels)[:5]),
|
|
393
|
+
)
|
|
394
|
+
|
|
358
395
|
# Save if output path provided
|
|
359
396
|
if output_path:
|
|
360
397
|
output_path = Path(output_path)
|
|
@@ -1243,8 +1243,8 @@ def subsample_balanced(
|
|
|
1243
1243
|
max_cells_per_type : int, default 5000
|
|
1244
1244
|
Maximum cells per cell type in output.
|
|
1245
1245
|
min_cells_per_type : int, default 50
|
|
1246
|
-
|
|
1247
|
-
|
|
1246
|
+
Minimum cells required to keep a cell type. Types with fewer
|
|
1247
|
+
cells are removed before balancing.
|
|
1248
1248
|
source_column : str, optional, default "reference_source"
|
|
1249
1249
|
Column identifying which reference each cell came from.
|
|
1250
1250
|
Set to None to disable source-aware balancing (simple capping).
|
|
@@ -1387,8 +1387,41 @@ def subsample_balanced(
|
|
|
1387
1387
|
else:
|
|
1388
1388
|
cell_types = adata.obs[label_column].astype(str)
|
|
1389
1389
|
|
|
1390
|
-
unique_types = cell_types.unique()
|
|
1391
1390
|
type_counts = cell_types.value_counts()
|
|
1391
|
+
if min_cells_per_type > 0:
|
|
1392
|
+
low_count_types = type_counts[type_counts < min_cells_per_type].index.tolist()
|
|
1393
|
+
if low_count_types:
|
|
1394
|
+
n_removed = int(type_counts[type_counts < min_cells_per_type].sum())
|
|
1395
|
+
logger.info(
|
|
1396
|
+
f"\nFiltering low-count cell types (<{min_cells_per_type} cells) before balancing:"
|
|
1397
|
+
)
|
|
1398
|
+
logger.info(f" Removing {len(low_count_types)} types, {n_removed:,} cells")
|
|
1399
|
+
for ct in low_count_types[:10]:
|
|
1400
|
+
logger.info(f" {ct}: {type_counts[ct]} cells")
|
|
1401
|
+
if len(low_count_types) > 10:
|
|
1402
|
+
logger.info(f" ... and {len(low_count_types) - 10} more types")
|
|
1403
|
+
|
|
1404
|
+
keep_mask = ~cell_types.isin(low_count_types)
|
|
1405
|
+
adata = adata[keep_mask].copy()
|
|
1406
|
+
|
|
1407
|
+
if group_by_column is not None:
|
|
1408
|
+
cell_types = adata.obs[group_by_column].astype(str)
|
|
1409
|
+
else:
|
|
1410
|
+
cell_types = adata.obs[label_column].astype(str)
|
|
1411
|
+
|
|
1412
|
+
if props:
|
|
1413
|
+
dropped = sorted(set(props) & set(low_count_types))
|
|
1414
|
+
if dropped:
|
|
1415
|
+
for ct in dropped:
|
|
1416
|
+
props.pop(ct, None)
|
|
1417
|
+
logger.warning(
|
|
1418
|
+
"Dropping target_proportions for low-count types: %s",
|
|
1419
|
+
", ".join(dropped),
|
|
1420
|
+
)
|
|
1421
|
+
|
|
1422
|
+
type_counts = cell_types.value_counts()
|
|
1423
|
+
|
|
1424
|
+
unique_types = cell_types.unique()
|
|
1392
1425
|
target_totals = _resolve_target_totals(
|
|
1393
1426
|
type_counts=type_counts,
|
|
1394
1427
|
min_cells_per_type=min_cells_per_type,
|
|
@@ -347,7 +347,79 @@ def validate_cell_type_column(
|
|
|
347
347
|
log_fn = logger.error if issue.severity == "error" else logger.warning
|
|
348
348
|
log_fn(f" {issue.code}: {issue.message}")
|
|
349
349
|
|
|
350
|
-
|
|
350
|
+
return result
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
@dataclass
|
|
354
|
+
class LabelOntologyConsistencyResult:
|
|
355
|
+
"""Result of checking label to ontology ID consistency."""
|
|
356
|
+
|
|
357
|
+
label_column: str
|
|
358
|
+
ontology_column: str
|
|
359
|
+
n_labels: int
|
|
360
|
+
n_labels_with_multiple_ids: int
|
|
361
|
+
labels_with_multiple_ids: Dict[str, List[str]]
|
|
362
|
+
n_hierarchical_labels: int
|
|
363
|
+
hierarchical_labels: List[str]
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
_HIERARCHY_PATTERN = re.compile(r"(?:\s>\s|\s->\s|;|\|)")
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def check_label_ontology_consistency(
|
|
370
|
+
adata: ad.AnnData,
|
|
371
|
+
label_column: str,
|
|
372
|
+
ontology_column: str,
|
|
373
|
+
detect_hierarchy: bool = True,
|
|
374
|
+
) -> LabelOntologyConsistencyResult:
|
|
375
|
+
"""
|
|
376
|
+
Check whether each label maps to a single ontology ID.
|
|
377
|
+
|
|
378
|
+
Flags labels that map to multiple valid CL IDs, which can cause label
|
|
379
|
+
collapsing when IDs are inferred from labels.
|
|
380
|
+
"""
|
|
381
|
+
if label_column not in adata.obs.columns:
|
|
382
|
+
raise ValueError(
|
|
383
|
+
f"Label column '{label_column}' not found in adata.obs. "
|
|
384
|
+
f"Available columns: {list(adata.obs.columns)}"
|
|
385
|
+
)
|
|
386
|
+
if ontology_column not in adata.obs.columns:
|
|
387
|
+
raise ValueError(
|
|
388
|
+
f"Ontology column '{ontology_column}' not found in adata.obs. "
|
|
389
|
+
f"Available columns: {list(adata.obs.columns)}"
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
labels = adata.obs[label_column].dropna().astype(str)
|
|
393
|
+
n_labels = int(labels.nunique())
|
|
394
|
+
|
|
395
|
+
pairs = adata.obs[[label_column, ontology_column]].dropna()
|
|
396
|
+
pairs = pairs.drop_duplicates().astype(str)
|
|
397
|
+
valid_mask = pairs[ontology_column].str.startswith("CL:")
|
|
398
|
+
unique_pairs = pairs.loc[valid_mask, [label_column, ontology_column]]
|
|
399
|
+
|
|
400
|
+
labels_with_multiple_ids: Dict[str, List[str]] = {}
|
|
401
|
+
if not unique_pairs.empty:
|
|
402
|
+
grouped = unique_pairs.groupby(label_column)[ontology_column].unique()
|
|
403
|
+
for label, ids in grouped.items():
|
|
404
|
+
unique_ids = sorted(set(ids))
|
|
405
|
+
if len(unique_ids) > 1:
|
|
406
|
+
labels_with_multiple_ids[str(label)] = unique_ids
|
|
407
|
+
|
|
408
|
+
hierarchical_labels: List[str] = []
|
|
409
|
+
if detect_hierarchy:
|
|
410
|
+
for label in labels.unique():
|
|
411
|
+
if _HIERARCHY_PATTERN.search(str(label)):
|
|
412
|
+
hierarchical_labels.append(str(label))
|
|
413
|
+
|
|
414
|
+
return LabelOntologyConsistencyResult(
|
|
415
|
+
label_column=label_column,
|
|
416
|
+
ontology_column=ontology_column,
|
|
417
|
+
n_labels=n_labels,
|
|
418
|
+
n_labels_with_multiple_ids=len(labels_with_multiple_ids),
|
|
419
|
+
labels_with_multiple_ids=labels_with_multiple_ids,
|
|
420
|
+
n_hierarchical_labels=len(hierarchical_labels),
|
|
421
|
+
hierarchical_labels=hierarchical_labels,
|
|
422
|
+
)
|
|
351
423
|
|
|
352
424
|
|
|
353
425
|
def validate_multiple_columns(
|
|
@@ -251,6 +251,36 @@ def _convert_ensembl_to_hugo(
|
|
|
251
251
|
return np.array(converted), stats
|
|
252
252
|
|
|
253
253
|
|
|
254
|
+
def _normalize_var_names(
|
|
255
|
+
var_names: pd.Index,
|
|
256
|
+
var_df: pd.DataFrame,
|
|
257
|
+
ensembl_to_hugo: Dict[str, str],
|
|
258
|
+
) -> Tuple[np.ndarray, Dict[str, int], bool, bool]:
|
|
259
|
+
"""
|
|
260
|
+
Normalize var_names using feature_name and Ensembl -> HUGO mapping.
|
|
261
|
+
|
|
262
|
+
Returns converted names, conversion stats, and flags indicating
|
|
263
|
+
whether non-symbol IDs were detected and feature_name was used.
|
|
264
|
+
"""
|
|
265
|
+
first_gene = str(var_names[0])
|
|
266
|
+
uses_non_symbol_ids = (
|
|
267
|
+
first_gene.isdigit() or
|
|
268
|
+
first_gene.startswith("ENSG") or
|
|
269
|
+
first_gene.startswith("ENST")
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
base_names = var_names.values
|
|
273
|
+
used_feature_name = False
|
|
274
|
+
if uses_non_symbol_ids and "feature_name" in var_df.columns:
|
|
275
|
+
base_names = var_df["feature_name"].values.astype(str)
|
|
276
|
+
used_feature_name = True
|
|
277
|
+
|
|
278
|
+
converted_names, stats = _convert_ensembl_to_hugo(
|
|
279
|
+
np.asarray(base_names), ensembl_to_hugo
|
|
280
|
+
)
|
|
281
|
+
return converted_names, stats, uses_non_symbol_ids, used_feature_name
|
|
282
|
+
|
|
283
|
+
|
|
254
284
|
def normalize_gene_names(
|
|
255
285
|
adata: ad.AnnData,
|
|
256
286
|
ensembl_to_hugo: Optional[Dict[str, str]] = None,
|
|
@@ -278,6 +308,7 @@ def normalize_gene_names(
|
|
|
278
308
|
-------
|
|
279
309
|
AnnData
|
|
280
310
|
AnnData with normalized gene names in var_names.
|
|
311
|
+
If adata.raw is present, its var_names are updated to stay aligned.
|
|
281
312
|
|
|
282
313
|
Notes
|
|
283
314
|
-----
|
|
@@ -297,22 +328,15 @@ def normalize_gene_names(
|
|
|
297
328
|
if copy:
|
|
298
329
|
adata = adata.copy()
|
|
299
330
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
331
|
+
if ensembl_to_hugo is None:
|
|
332
|
+
ensembl_to_hugo = load_ensembl_to_hugo_mapping()
|
|
333
|
+
|
|
334
|
+
converted_names, stats, uses_non_symbol_ids, used_feature_name = _normalize_var_names(
|
|
335
|
+
adata.var_names, adata.var, ensembl_to_hugo
|
|
305
336
|
)
|
|
306
337
|
|
|
307
338
|
if not uses_non_symbol_ids:
|
|
308
339
|
logger.info("Gene names already appear to be HUGO symbols")
|
|
309
|
-
# Still check for any remaining Ensembl IDs and convert them
|
|
310
|
-
if ensembl_to_hugo is None:
|
|
311
|
-
ensembl_to_hugo = load_ensembl_to_hugo_mapping()
|
|
312
|
-
|
|
313
|
-
converted_names, stats = _convert_ensembl_to_hugo(
|
|
314
|
-
adata.var_names.values, ensembl_to_hugo
|
|
315
|
-
)
|
|
316
340
|
if stats["converted_ensembl"] > 0:
|
|
317
341
|
adata.var_names = pd.Index(converted_names)
|
|
318
342
|
adata.var_names_make_unique()
|
|
@@ -324,38 +348,46 @@ def normalize_gene_names(
|
|
|
324
348
|
f"{stats['unmapped_ensembl']:,} Ensembl IDs not found in mapping; "
|
|
325
349
|
"leaving them unchanged"
|
|
326
350
|
)
|
|
327
|
-
|
|
351
|
+
else:
|
|
352
|
+
if used_feature_name:
|
|
353
|
+
logger.info("Using 'feature_name' column as gene names")
|
|
328
354
|
|
|
329
|
-
|
|
330
|
-
if "feature_name" in adata.var.columns:
|
|
331
|
-
feature_names = adata.var["feature_name"].values.astype(str)
|
|
332
|
-
adata.var_names = pd.Index(feature_names)
|
|
333
|
-
logger.info("Using 'feature_name' column as gene names")
|
|
355
|
+
adata.var_names = pd.Index(converted_names)
|
|
334
356
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
357
|
+
if stats["converted_ensembl"] > 0 or stats["unmapped_ensembl"] > 0:
|
|
358
|
+
logger.info(
|
|
359
|
+
f"Gene mapping: {stats['converted_ensembl']:,} converted, "
|
|
360
|
+
f"{stats['already_hugo']:,} already HUGO, "
|
|
361
|
+
f"{stats['unmapped_ensembl']:,} unmapped"
|
|
362
|
+
)
|
|
363
|
+
if stats["unmapped_ensembl"] > 0:
|
|
364
|
+
logger.warning(
|
|
365
|
+
f"{stats['unmapped_ensembl']:,} Ensembl IDs not found in mapping; "
|
|
366
|
+
"leaving them unchanged"
|
|
367
|
+
)
|
|
368
|
+
else:
|
|
369
|
+
logger.info(f"All {stats['already_hugo']:,} genes already HUGO symbols")
|
|
338
370
|
|
|
339
|
-
|
|
340
|
-
adata.var_names.values, ensembl_to_hugo
|
|
341
|
-
)
|
|
342
|
-
adata.var_names = pd.Index(converted_names)
|
|
371
|
+
adata.var_names_make_unique()
|
|
343
372
|
|
|
344
|
-
if
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
f"{stats['already_hugo']:,} already HUGO, "
|
|
348
|
-
f"{stats['unmapped_ensembl']:,} unmapped"
|
|
373
|
+
if adata.raw is not None:
|
|
374
|
+
raw_converted, raw_stats, _, raw_used_feature = _normalize_var_names(
|
|
375
|
+
adata.raw.var_names, adata.raw.var, ensembl_to_hugo
|
|
349
376
|
)
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
377
|
+
raw_converted_index = pd.Index(raw_converted)
|
|
378
|
+
|
|
379
|
+
if raw_used_feature or not raw_converted_index.equals(adata.raw.var_names):
|
|
380
|
+
raw_adata = adata.raw.to_adata()
|
|
381
|
+
raw_adata.var_names = raw_converted_index
|
|
382
|
+
raw_adata.var_names_make_unique()
|
|
383
|
+
adata.raw = raw_adata
|
|
384
|
+
logger.info("Updated adata.raw.var_names to normalized HUGO symbols")
|
|
385
|
+
if raw_stats["unmapped_ensembl"] > 0:
|
|
386
|
+
logger.warning(
|
|
387
|
+
f"{raw_stats['unmapped_ensembl']:,} raw Ensembl IDs not found in mapping; "
|
|
388
|
+
"leaving them unchanged"
|
|
389
|
+
)
|
|
357
390
|
|
|
358
|
-
adata.var_names_make_unique()
|
|
359
391
|
return adata
|
|
360
392
|
|
|
361
393
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/data/gene_mappings/ensembl_to_hugo_human.tsv
RENAMED
|
File without changes
|
|
File without changes
|
{spatialcore-0.2.0 → spatialcore-0.2.2}/src/spatialcore/data/ontology_mappings/ontology_index.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|