topomics 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
topomics/__init__.py ADDED
@@ -0,0 +1,24 @@
1
+ from importlib.metadata import PackageNotFoundError, version
2
+
3
+ from . import models, pl, pp, tl
4
+ from .models import BaseTopicModel, MultimodalAmortizedLDA, ShareTopic_LDA_Multi, SVEM_LDA_Multi
5
+
6
+ __all__ = [
7
+ "pl",
8
+ "pp",
9
+ "tl",
10
+ "models",
11
+ "SVEM_LDA_Multi",
12
+ "BaseTopicModel",
13
+ "MultimodalAmortizedLDA",
14
+ "ShareTopic_LDA_Multi",
15
+ ]
16
+
17
+ # Package was renamed from "omics-topic" -> "topomics"; tolerate stale editable installs.
18
+ try:
19
+ __version__ = version("topomics")
20
+ except PackageNotFoundError:
21
+ try:
22
+ __version__ = version("omics-topic")
23
+ except PackageNotFoundError:
24
+ __version__ = "0.0.0+unknown"
@@ -0,0 +1,18 @@
1
+ """Data preprocessing and extraction utilities."""
2
+
3
+ from .data_extraction import (
4
+ extract_from_adata_dict,
5
+ extract_from_anndata,
6
+ extract_from_mudata,
7
+ extract_from_spatialdata,
8
+ )
9
+ from .data_type_detection import detect_data_type, validate_data_type
10
+
11
+ __all__ = [
12
+ "detect_data_type",
13
+ "validate_data_type",
14
+ "extract_from_mudata",
15
+ "extract_from_adata_dict",
16
+ "extract_from_anndata",
17
+ "extract_from_spatialdata",
18
+ ]
@@ -0,0 +1,354 @@
1
+ """Data extraction and preprocessing utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import numpy as np
6
+ import scipy.sparse as sp
7
+ from anndata import AnnData
8
+ from mudata import MuData
9
+
10
+
11
+ def extract_from_mudata(
12
+ mdata: MuData,
13
+ modalities: list[str] | None = None,
14
+ layers: dict[str, str | None] | str | None = None,
15
+ spatial_keys: dict[str, str] | str | None = None,
16
+ ) -> tuple[AnnData, dict]:
17
+ """
18
+ Extract and preprocess data from MuData.
19
+
20
+ This function extracts modality-specific data from MuData, with support for:
21
+ - Layer selection (per-modality or global)
22
+ - Spatial graph extraction
23
+ - Modality subsetting
24
+
25
+ Parameters
26
+ ----------
27
+ mdata : MuData
28
+ Input MuData object containing multiple modalities.
29
+ modalities : list[str] | None
30
+ Subset of modalities to use. If None, uses all modalities in mdata.mod.
31
+ layers : dict[str, str | None] | str | None
32
+ Layer specification:
33
+ - dict: Per-modality layer specification, e.g. {"rna": "counts", "protein": None}
34
+ - str: Same layer name for all modalities, e.g. "counts"
35
+ - None: Use .X for all modalities (default)
36
+ spatial_keys : dict[str, str] | str | None
37
+ Spatial graph keys in .obsp:
38
+ - dict: Per-modality spatial graph keys, e.g. {"rna": "spatial_connectivities"}
39
+ - str: Same spatial key for all modalities
40
+ - None: No spatial graphs (default)
41
+
42
+ Returns
43
+ -------
44
+ adata_concat : AnnData
45
+ Concatenated AnnData with features from all modalities.
46
+ Features are concatenated horizontally in the order of `modalities`.
47
+ metadata : dict
48
+ Dictionary containing:
49
+ - modality_names: list[str] - names of modalities in concatenation order
50
+ - feature_counts: list[int] - feature counts per modality
51
+ - spatial_info: dict | None - spatial graph information
52
+ - layer_dict: dict[str, str | None] - layer used for each modality
53
+
54
+ Examples
55
+ --------
56
+ >>> # Extract all modalities with default .X
57
+ >>> adata, meta = extract_from_mudata(mdata)
58
+
59
+ >>> # Extract specific modalities with layer selection
60
+ >>> adata, meta = extract_from_mudata(
61
+ ... mdata, modalities=["rna", "protein"], layers={"rna": "counts", "protein": None}
62
+ ... )
63
+
64
+ >>> # Same layer for all modalities
65
+ >>> adata, meta = extract_from_mudata(mdata, layers="counts")
66
+
67
+ >>> # With spatial graphs
68
+ >>> adata, meta = extract_from_mudata(mdata, layers="counts", spatial_keys="spatial_connectivities")
69
+ """
70
+ # Normalize modalities
71
+ if modalities is None:
72
+ modalities = list(mdata.mod.keys())
73
+
74
+ # Normalize layers to dict
75
+ if isinstance(layers, str):
76
+ layer_dict = dict.fromkeys(modalities, layers)
77
+ elif layers is None:
78
+ layer_dict = dict.fromkeys(modalities)
79
+ else:
80
+ layer_dict = layers
81
+
82
+ # Normalize spatial_keys to dict
83
+ if isinstance(spatial_keys, str):
84
+ spatial_dict = dict.fromkeys(modalities, spatial_keys)
85
+ elif spatial_keys is None:
86
+ spatial_dict = {}
87
+ else:
88
+ spatial_dict = spatial_keys
89
+
90
+ # Extract data
91
+ matrices = []
92
+ feat_counts = []
93
+ var_names = []
94
+ n_cells_ref = mdata.n_obs
95
+
96
+ for mod in modalities:
97
+ if mod not in mdata.mod:
98
+ raise ValueError(f"Modality '{mod}' not found in MuData. Available: {list(mdata.mod.keys())}")
99
+
100
+ adata_mod = mdata.mod[mod]
101
+
102
+ # Validate cell counts
103
+ if adata_mod.n_obs != n_cells_ref:
104
+ raise ValueError(
105
+ f"Modality '{mod}' has {adata_mod.n_obs} cells, "
106
+ f"but MuData has {n_cells_ref} cells. All modalities must be aligned."
107
+ )
108
+
109
+ # Extract from layer if specified, otherwise use .X
110
+ layer_key = layer_dict.get(mod)
111
+ if layer_key is not None:
112
+ if layer_key not in adata_mod.layers:
113
+ raise KeyError(
114
+ f"Layer '{layer_key}' not found in modality '{mod}'. "
115
+ f"Available layers: {list(adata_mod.layers.keys())}"
116
+ )
117
+ X = adata_mod.layers[layer_key]
118
+ else:
119
+ X = adata_mod.X
120
+
121
+ # Convert to appropriate format
122
+ if sp.issparse(X):
123
+ X = X.tocsr()
124
+ else:
125
+ X = np.asarray(X)
126
+
127
+ # Ensure 2D
128
+ if X.ndim == 1:
129
+ X = X.reshape(-1, 1)
130
+
131
+ matrices.append(X)
132
+ feat_counts.append(X.shape[1])
133
+ var_names.extend(adata_mod.var_names)
134
+
135
+ # Concatenate matrices
136
+ if any(sp.issparse(M) for M in matrices):
137
+ X_concat = sp.hstack(matrices, format="csr")
138
+ else:
139
+ X_concat = np.hstack(matrices)
140
+
141
+ # Create concatenated AnnData
142
+ adata_concat = AnnData(X_concat, obs=mdata.obs.copy())
143
+ adata_concat.var_names = var_names
144
+
145
+ # Extract spatial graphs if specified
146
+ spatial_info = _extract_spatial_graphs(mdata, modalities, spatial_dict)
147
+
148
+ # Build metadata
149
+ metadata = {
150
+ "modality_names": modalities,
151
+ "feature_counts": feat_counts,
152
+ "spatial_info": spatial_info,
153
+ "layer_dict": layer_dict,
154
+ }
155
+
156
+ return adata_concat, metadata
157
+
158
+
159
+ def extract_from_adata_dict(
160
+ adata_dict: dict[str, AnnData],
161
+ layers: dict[str, str | None] | str | None = None,
162
+ spatial_keys: dict[str, str] | str | None = None,
163
+ ) -> tuple[AnnData, dict]:
164
+ """
165
+ Extract and preprocess data from dict of AnnData objects.
166
+
167
+ Converts dict → MuData → uses extract_from_mudata()
168
+
169
+ Parameters
170
+ ----------
171
+ adata_dict : dict[str, AnnData]
172
+ Dictionary mapping modality names to AnnData objects.
173
+ layers : dict[str, str | None] | str | None
174
+ Layer specification (same as extract_from_mudata).
175
+ spatial_keys : dict[str, str] | str | None
176
+ Spatial graph keys (same as extract_from_mudata).
177
+
178
+ Returns
179
+ -------
180
+ adata_concat : AnnData
181
+ Concatenated AnnData.
182
+ metadata : dict
183
+ Metadata dictionary.
184
+
185
+ Examples
186
+ --------
187
+ >>> adata_dict = {"rna": adata_rna, "protein": adata_protein}
188
+ >>> adata, meta = extract_from_adata_dict(adata_dict, layers={"rna": "counts"})
189
+ """
190
+ # Create MuData from dict
191
+ mdata = MuData(adata_dict)
192
+ modalities = list(adata_dict.keys())
193
+
194
+ return extract_from_mudata(mdata, modalities, layers, spatial_keys)
195
+
196
+
197
+ def extract_from_anndata(
198
+ adata: AnnData,
199
+ modality_name: str = "rna",
200
+ layer: str | None = None,
201
+ spatial_key: str | None = None,
202
+ ) -> tuple[AnnData, dict]:
203
+ """
204
+ Extract and preprocess data from single AnnData (single modality).
205
+
206
+ For single modality, we can use the AnnData directly,
207
+ but need to format metadata consistently.
208
+
209
+ Parameters
210
+ ----------
211
+ adata : AnnData
212
+ Input AnnData object.
213
+ modality_name : str
214
+ Name to assign to this modality (default: "rna").
215
+ layer : str | None
216
+ Layer to extract. If None, uses .X.
217
+ spatial_key : str | None
218
+ Spatial graph key in .obsp. If None, no spatial graph.
219
+
220
+ Returns
221
+ -------
222
+ adata_processed : AnnData
223
+ Processed AnnData (with layer extracted to .X if specified).
224
+ metadata : dict
225
+ Metadata dictionary.
226
+
227
+ Examples
228
+ --------
229
+ >>> adata, meta = extract_from_anndata(
230
+ ... adata, modality_name="rna", layer="counts", spatial_key="spatial_connectivities"
231
+ ... )
232
+ """
233
+ # Extract from layer if specified
234
+ if layer is not None:
235
+ if layer not in adata.layers:
236
+ raise KeyError(f"Layer '{layer}' not found. Available layers: {list(adata.layers.keys())}")
237
+ # Reuse the same AnnData object but move the selected layer into .X
238
+ adata.X = adata.layers[layer]
239
+ adata_processed = adata
240
+
241
+ # Extract spatial graph if specified
242
+ from topomics.utils.amortized_utils import _resolve_spatial_graph_from_adata
243
+
244
+ spatial_info = _resolve_spatial_graph_from_adata(adata_processed, spatial_key)
245
+
246
+ metadata = {
247
+ "modality_names": [modality_name],
248
+ "feature_counts": [adata_processed.n_vars],
249
+ "spatial_info": spatial_info,
250
+ "layer_dict": {modality_name: layer} if layer else {},
251
+ }
252
+
253
+ return adata_processed, metadata
254
+
255
+
256
+ def extract_from_spatialdata(
257
+ sdata, # SpatialData type
258
+ table_key: str = "table",
259
+ modalities: list[str] | None = None,
260
+ layers: dict[str, str | None] | str | None = None,
261
+ spatial_key: str | None = None,
262
+ ) -> tuple[AnnData, dict]:
263
+ """
264
+ Extract and preprocess data from SpatialData.
265
+
266
+ Parameters
267
+ ----------
268
+ sdata : SpatialData
269
+ Input SpatialData object.
270
+ table_key : str
271
+ Which table to extract from sdata.tables (default: "table").
272
+ modalities : list[str] | None
273
+ Modalities to extract (if table is MuData-like).
274
+ layers : dict[str, str | None] | str | None
275
+ Layer specification.
276
+ spatial_key : str | None
277
+ Spatial graph key in the table.
278
+
279
+ Returns
280
+ -------
281
+ adata_concat : AnnData
282
+ Concatenated AnnData.
283
+ metadata : dict
284
+ Metadata dictionary.
285
+
286
+ Examples
287
+ --------
288
+ >>> adata, meta = extract_from_spatialdata(sdata, table_key="table", layers="counts", spatial_key="spatial")
289
+ """
290
+ # Extract table from SpatialData
291
+ if table_key not in sdata.tables:
292
+ raise KeyError(f"Table '{table_key}' not found in SpatialData. Available tables: {list(sdata.tables.keys())}")
293
+
294
+ table = sdata.tables[table_key]
295
+
296
+ # Check if table is AnnData or MuData
297
+ try:
298
+ is_mudata = isinstance(table, MuData)
299
+ except NameError:
300
+ # MuData not imported
301
+ is_mudata = False
302
+
303
+ if is_mudata:
304
+ return extract_from_mudata(table, modalities, layers, spatial_key)
305
+ elif isinstance(table, AnnData):
306
+ modality_name = modalities[0] if modalities else "rna"
307
+ layer = layers if isinstance(layers, str) else None
308
+ return extract_from_anndata(table, modality_name, layer, spatial_key)
309
+ else:
310
+ raise TypeError(f"Table '{table_key}' is type {type(table)}, expected AnnData or MuData")
311
+
312
+
313
+ def _extract_spatial_graphs(
314
+ mdata: MuData,
315
+ modalities: list[str],
316
+ spatial_dict: dict[str, str],
317
+ ) -> dict | None:
318
+ """
319
+ Extract spatial graphs from MuData modalities.
320
+
321
+ Parameters
322
+ ----------
323
+ mdata : MuData
324
+ Input MuData object.
325
+ modalities : list[str]
326
+ List of modality names.
327
+ spatial_dict : dict[str, str]
328
+ Mapping of modality names to spatial graph keys in .obsp.
329
+
330
+ Returns
331
+ -------
332
+ dict | None
333
+ Dictionary mapping modality names to spatial graph info dicts,
334
+ or None if no spatial graphs found.
335
+ Each spatial graph info dict contains:
336
+ - adjacency: sparse matrix
337
+ - key: str - the obsp key used
338
+ """
339
+ # Import here to avoid circular dependency
340
+ from topomics.utils.amortized_utils import _resolve_spatial_graph_from_adata
341
+
342
+ if not spatial_dict:
343
+ return None
344
+
345
+ spatial_graphs = {}
346
+ for mod in modalities:
347
+ spatial_key = spatial_dict.get(mod)
348
+ if spatial_key:
349
+ adata_mod = mdata.mod[mod]
350
+ graph_info = _resolve_spatial_graph_from_adata(adata_mod, spatial_key)
351
+ if graph_info:
352
+ spatial_graphs[mod] = graph_info
353
+
354
+ return spatial_graphs if spatial_graphs else None
@@ -0,0 +1,91 @@
1
+ """Type detection utilities for flexible data input."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from anndata import AnnData
6
+
7
+
8
+ def detect_data_type(data) -> str:
9
+ """
10
+ Detect the type of input data.
11
+
12
+ Parameters
13
+ ----------
14
+ data
15
+ Input data to detect type for.
16
+
17
+ Returns
18
+ -------
19
+ str
20
+ One of: "anndata", "mudata", "spatialdata", "dict", "unknown"
21
+
22
+ Examples
23
+ --------
24
+ >>> from anndata import AnnData
25
+ >>> import numpy as np
26
+ >>> adata = AnnData(np.random.rand(10, 20))
27
+ >>> detect_data_type(adata)
28
+ 'anndata'
29
+ """
30
+ # Check for dict first (most specific)
31
+ if isinstance(data, dict):
32
+ # Check if it's a dict of AnnData objects
33
+ if all(isinstance(v, AnnData) for v in data.values()):
34
+ return "dict"
35
+ return "unknown"
36
+
37
+ # Check for AnnData
38
+ if isinstance(data, AnnData):
39
+ return "anndata"
40
+
41
+ # Check for MuData (conditional import to avoid hard dependency)
42
+ try:
43
+ from mudata import MuData
44
+
45
+ if isinstance(data, MuData):
46
+ return "mudata"
47
+ except ImportError:
48
+ pass
49
+
50
+ # Check for SpatialData (conditional import)
51
+ try:
52
+ from spatialdata import SpatialData
53
+
54
+ if isinstance(data, SpatialData):
55
+ return "spatialdata"
56
+ except ImportError:
57
+ pass
58
+
59
+ return "unknown"
60
+
61
+
62
+ def validate_data_type(data) -> None:
63
+ """
64
+ Validate that data is a supported type, raise clear error if not.
65
+
66
+ Parameters
67
+ ----------
68
+ data
69
+ Input data to validate.
70
+
71
+ Raises
72
+ ------
73
+ TypeError
74
+ If data type is not supported.
75
+
76
+ Examples
77
+ --------
78
+ >>> from anndata import AnnData
79
+ >>> import numpy as np
80
+ >>> adata = AnnData(np.random.rand(10, 20))
81
+ >>> validate_data_type(adata) # No error
82
+ >>> validate_data_type("invalid") # Raises TypeError
83
+ Traceback (most recent call last):
84
+ ...
85
+ TypeError: Unsupported data type: <class 'str'>. Supported types: AnnData, MuData, SpatialData, or dict[str, AnnData]
86
+ """
87
+ data_type = detect_data_type(data)
88
+ if data_type == "unknown":
89
+ raise TypeError(
90
+ f"Unsupported data type: {type(data)}. Supported types: AnnData, MuData, SpatialData, or dict[str, AnnData]"
91
+ )