vdata 0.3.4__tar.gz → 0.3.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vdata-0.3.4 → vdata-0.3.6}/PKG-INFO +2 -2
- {vdata-0.3.4 → vdata-0.3.6}/pyproject.toml +3 -3
- vdata-0.3.6/vdata/IO/__init__.py +12 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/__init__.py +6 -9
- {vdata-0.3.4 → vdata-0.3.6}/vdata/_typing.py +6 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/anndata.py +3 -1
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/data.py +23 -20
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/objects/layers.py +4 -4
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/objects/objects.py +2 -1
- vdata-0.3.6/vdata/data/_parse/time.py +119 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/utils.py +1 -2
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/concatenate.py +5 -5
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/convert.py +15 -10
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/read.py +11 -7
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/vdata.py +31 -38
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/view.py +10 -8
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/write.py +14 -3
- {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/_parse.py +17 -15
- {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/base.py +24 -18
- {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/dataframe.py +28 -22
- {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/view.py +3 -1
- {vdata-0.3.4 → vdata-0.3.6}/vdata/timepoint/__init__.py +7 -4
- vdata-0.3.6/vdata/timepoint/_typing.py +9 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/timepoint/array.py +9 -1
- {vdata-0.3.4 → vdata-0.3.6}/vdata/timepoint/index.py +1 -1
- {vdata-0.3.4 → vdata-0.3.6}/vdata/timepoint/timepoint.py +8 -5
- vdata-0.3.4/vdata/IO/__init__.py +0 -11
- vdata-0.3.4/vdata/data/_parse/time.py +0 -92
- vdata-0.3.4/vdata/timepoint/_typing.py +0 -3
- {vdata-0.3.4 → vdata-0.3.6}/LICENSE +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/README.md +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/IO/errors.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/IO/logger.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/_meta.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/anndata_proxy/__init__.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/anndata_proxy/anndata.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/anndata_proxy/containers.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/anndata_proxy/dataframe.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/array_view.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/cli.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/__init__.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_indexing.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/__init__.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/objects/__init__.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/objects/obs.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/objects/uns.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/objects/var.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/__init__.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/base.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/layers.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/lazy.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/obs.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/var.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/view.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/hash.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/data/name.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/names.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/py.typed +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/__init__.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/index.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/indexers.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/indexing.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/timepoint/_functions.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/timepoint/range.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/update/__init__.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/update/array.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/update/dict.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/update/tdf.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/update/update.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/update/utils.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/update/vdf.py +0 -0
- {vdata-0.3.4 → vdata-0.3.6}/vdata/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vdata
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.6
|
|
4
4
|
Summary: Annotated multivariate observation of timestamped data
|
|
5
5
|
Author: Matteo Bouvier
|
|
6
6
|
Author-email: Matteo Bouvier <matteo.bouvier@hotmail.fr>
|
|
@@ -11,7 +11,7 @@ Requires-Dist: anndata>=0.10.4
|
|
|
11
11
|
Requires-Dist: scipy>=1.12.0
|
|
12
12
|
Requires-Dist: numpy-indexed>=0.3.7
|
|
13
13
|
Requires-Dist: ch5mpy>=0.5.1
|
|
14
|
-
Requires-Dist: ezarr>=1.1.
|
|
14
|
+
Requires-Dist: ezarr>=1.1.4
|
|
15
15
|
Requires-Dist: h5dataframe>=0.2.3 ; extra == 'update'
|
|
16
16
|
Requires-Python: >=3.12
|
|
17
17
|
Provides-Extra: update
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "vdata"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.6"
|
|
4
4
|
description = "Annotated multivariate observation of timestamped data"
|
|
5
5
|
authors = [{ name = "Matteo Bouvier", email="matteo.bouvier@hotmail.fr"}]
|
|
6
6
|
license = "CECILL-B"
|
|
@@ -14,7 +14,7 @@ dependencies = [
|
|
|
14
14
|
"scipy>=1.12.0",
|
|
15
15
|
"numpy-indexed>=0.3.7",
|
|
16
16
|
"ch5mpy>=0.5.1",
|
|
17
|
-
"ezarr>=1.1.
|
|
17
|
+
"ezarr>=1.1.4",
|
|
18
18
|
]
|
|
19
19
|
|
|
20
20
|
[dependency-groups]
|
|
@@ -25,7 +25,7 @@ dev = [
|
|
|
25
25
|
|
|
26
26
|
docs= [
|
|
27
27
|
"mkdocs-material>=9.5.10",
|
|
28
|
-
|
|
28
|
+
"mkdocstrings[python]>=0.24.0",
|
|
29
29
|
"mkdocs>=1.5.3"
|
|
30
30
|
]
|
|
31
31
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from vdata.IO.errors import IncoherenceError, InvalidVDataFileError, ShapeError, VBaseError, VLockError, VReadOnlyError
|
|
2
|
+
from vdata.IO.logger import generalLogger
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"generalLogger",
|
|
6
|
+
"IncoherenceError",
|
|
7
|
+
"InvalidVDataFileError",
|
|
8
|
+
"ShapeError",
|
|
9
|
+
"VBaseError",
|
|
10
|
+
"VLockError",
|
|
11
|
+
"VReadOnlyError",
|
|
12
|
+
]
|
|
@@ -3,17 +3,9 @@
|
|
|
3
3
|
from importlib.metadata import metadata
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
-
import ch5mpy as ch
|
|
7
|
-
import ezarr
|
|
8
|
-
|
|
9
6
|
from vdata.data import VData, VDataView, concatenate, convert_anndata_to_vdata
|
|
10
7
|
from vdata.data.name import WRITE_PROTOCOL_VERSION
|
|
11
|
-
from vdata.IO import
|
|
12
|
-
IncoherenceError,
|
|
13
|
-
ShapeError,
|
|
14
|
-
VBaseError,
|
|
15
|
-
VLockError,
|
|
16
|
-
)
|
|
8
|
+
from vdata.IO import IncoherenceError, InvalidVDataFileError, ShapeError, VBaseError, VLockError, VReadOnlyError
|
|
17
9
|
from vdata.tdf import RepeatingIndex, TemporalDataFrame, TemporalDataFrameView
|
|
18
10
|
from vdata.timepoint import TimePoint
|
|
19
11
|
from vdata.utils import copy_vdata
|
|
@@ -27,6 +19,9 @@ __version__ = metadata("vdata").get("version")
|
|
|
27
19
|
|
|
28
20
|
|
|
29
21
|
def get_version(path: str | Path) -> int:
|
|
22
|
+
import ch5mpy as ch
|
|
23
|
+
import ezarr
|
|
24
|
+
|
|
30
25
|
try:
|
|
31
26
|
return ch.H5Dict.read(path).attributes.get("__vdata_write_version__", 0)
|
|
32
27
|
|
|
@@ -39,6 +34,7 @@ __all__ = [
|
|
|
39
34
|
"convert_anndata_to_vdata",
|
|
40
35
|
"copy_vdata",
|
|
41
36
|
"IncoherenceError",
|
|
37
|
+
"InvalidVDataFileError",
|
|
42
38
|
"RepeatingIndex",
|
|
43
39
|
"ShapeError",
|
|
44
40
|
"TemporalDataFrame",
|
|
@@ -48,5 +44,6 @@ __all__ = [
|
|
|
48
44
|
"VData",
|
|
49
45
|
"VDataView",
|
|
50
46
|
"VLockError",
|
|
47
|
+
"VReadOnlyError",
|
|
51
48
|
"WRITE_PROTOCOL_VERSION",
|
|
52
49
|
]
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
from collections.abc import Collection
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
from types import EllipsisType
|
|
3
4
|
from typing import SupportsIndex, TypedDict
|
|
4
5
|
|
|
5
6
|
import numpy as np
|
|
6
7
|
import numpy.typing as npt
|
|
7
8
|
import zarr
|
|
9
|
+
from zarr.abc.store import Store
|
|
10
|
+
from zarr.core.buffer import Buffer
|
|
11
|
+
from zarr.storage import StorePath
|
|
8
12
|
|
|
9
13
|
import vdata.timepoint as tp
|
|
10
14
|
from vdata.array_view import NDArrayView
|
|
@@ -28,6 +32,8 @@ type MultiSlicer = Collection[IFS | tp.TimePoint] | range | slice | EllipsisType
|
|
|
28
32
|
type PreSlicer = IFS | tp.TimePoint | Collection[IFS | bool | tp.TimePoint] | range | slice | EllipsisType
|
|
29
33
|
type Indexer = SupportsIndex | slice | npt.NDArray[np.int_] | npt.NDArray[np.bool_] | None
|
|
30
34
|
|
|
35
|
+
type StoreLike = Store | StorePath | Path | str | dict[str, Buffer]
|
|
36
|
+
|
|
31
37
|
|
|
32
38
|
class AttrDict(TypedDict):
|
|
33
39
|
name: str
|
|
@@ -44,10 +44,12 @@ def parse_AnnData(adata: AnnData, data: ParsingDataIn) -> ParsingDataOut:
|
|
|
44
44
|
generalLogger.debug(" VData creation from an AnnData.")
|
|
45
45
|
|
|
46
46
|
# import and cast obs to a TemporalDataFrame
|
|
47
|
+
if data.timepoints_column_name is not None:
|
|
48
|
+
del data.obs[data.timepoints_column_name]
|
|
49
|
+
|
|
47
50
|
obs = TemporalDataFrame(
|
|
48
51
|
adata.obs,
|
|
49
52
|
timepoints=data.timepoints_list,
|
|
50
|
-
time_col_name=data.time_col_name,
|
|
51
53
|
name="obs",
|
|
52
54
|
lock=(True, False),
|
|
53
55
|
)
|
|
@@ -33,7 +33,7 @@ def at_least_empty_dict(d: Mapping[Any, Any] | None) -> Mapping[Any, Any]:
|
|
|
33
33
|
def _get_time_list(
|
|
34
34
|
time_list: tp.TimePointNArray | NDArrayView[tp.TimePoint] | None,
|
|
35
35
|
data: Any,
|
|
36
|
-
|
|
36
|
+
timepoints_column_name: str | None,
|
|
37
37
|
) -> tp.TimePointNArray | NDArrayView[tp.TimePoint] | None:
|
|
38
38
|
if time_list is not None:
|
|
39
39
|
return time_list
|
|
@@ -47,8 +47,8 @@ def _get_time_list(
|
|
|
47
47
|
if isinstance(df, TemporalDataFrameBase):
|
|
48
48
|
return df.timepoints_column
|
|
49
49
|
|
|
50
|
-
elif isinstance(df, (pd.DataFrame, EZDataFrame)) and
|
|
51
|
-
return tp.as_timepointarray(df[
|
|
50
|
+
elif isinstance(df, (pd.DataFrame, EZDataFrame)) and timepoints_column_name is not None:
|
|
51
|
+
return tp.as_timepointarray(df[timepoints_column_name])
|
|
52
52
|
|
|
53
53
|
return None
|
|
54
54
|
|
|
@@ -61,13 +61,13 @@ def _valid_obs(
|
|
|
61
61
|
| None,
|
|
62
62
|
obs: pd.DataFrame | EZDataFrame | TemporalDataFrameBase | None,
|
|
63
63
|
time_list: tp.TimePointNArray | NDArrayView[tp.TimePoint] | None,
|
|
64
|
-
|
|
64
|
+
timepoints_column_name: str | None,
|
|
65
65
|
) -> TemporalDataFrameBase:
|
|
66
66
|
if obs is None:
|
|
67
67
|
generalLogger.debug("Default empty TemporalDataFrame for obs.")
|
|
68
68
|
|
|
69
69
|
_obs_index = get_obs_index(data, obs)
|
|
70
|
-
_time_list = _get_time_list(time_list, data,
|
|
70
|
+
_time_list = _get_time_list(time_list, data, timepoints_column_name)
|
|
71
71
|
|
|
72
72
|
_obs = TemporalDataFrame(
|
|
73
73
|
timepoints=_time_list,
|
|
@@ -81,7 +81,9 @@ def _valid_obs(
|
|
|
81
81
|
generalLogger.debug(f" 2. \u2713 'obs' is a {type(obs).__name__}.")
|
|
82
82
|
|
|
83
83
|
if isinstance(obs, (pd.DataFrame, EZDataFrame)):
|
|
84
|
-
_obs = TemporalDataFrame(
|
|
84
|
+
_obs = TemporalDataFrame(
|
|
85
|
+
obs, timepoints=time_list, timepoints_column_name=timepoints_column_name, name="obs", index=obs.index
|
|
86
|
+
)
|
|
85
87
|
_obs.lock_indices()
|
|
86
88
|
return _obs
|
|
87
89
|
|
|
@@ -104,14 +106,14 @@ def _valid_var(
|
|
|
104
106
|
| Mapping[str, pd.DataFrame | EZDataFrame | TemporalDataFrameBase]
|
|
105
107
|
| None,
|
|
106
108
|
var: pd.DataFrame | EZDataFrame | None,
|
|
107
|
-
|
|
109
|
+
timepoints_column_name: str | None,
|
|
108
110
|
) -> EZDataFrame:
|
|
109
111
|
if var is None:
|
|
110
112
|
generalLogger.debug("Default empty DataFrame for vars.")
|
|
111
113
|
_index = get_var_index(data, var)
|
|
112
114
|
|
|
113
|
-
if _index is not None and
|
|
114
|
-
ix = np.where(_index ==
|
|
115
|
+
if _index is not None and timepoints_column_name is not None:
|
|
116
|
+
ix = np.where(_index == timepoints_column_name)[0][0]
|
|
115
117
|
_index = np.delete(_index, ix)
|
|
116
118
|
|
|
117
119
|
return EZDataFrame(pd.DataFrame(index=_index))
|
|
@@ -139,7 +141,7 @@ class ParsingDataIn:
|
|
|
139
141
|
varm: Mapping[str, pd.DataFrame | EZDataFrame]
|
|
140
142
|
varp: Mapping[str, pd.DataFrame | EZDataFrame | npt.NDArray[np_IFS]]
|
|
141
143
|
timepoints: pd.DataFrame | EZDataFrame
|
|
142
|
-
|
|
144
|
+
timepoints_column_name: str | None
|
|
143
145
|
timepoints_list: tp.TimePointNArray | NDArrayView[tp.TimePoint] | None
|
|
144
146
|
uns: Mapping[str, Any]
|
|
145
147
|
layers: dict[str, TemporalDataFrame | TemporalDataFrameView] = field(init=False)
|
|
@@ -166,23 +168,24 @@ class ParsingDataIn:
|
|
|
166
168
|
var: pd.DataFrame | EZDataFrame | None,
|
|
167
169
|
varm: Mapping[str, pd.DataFrame | EZDataFrame] | None,
|
|
168
170
|
varp: Mapping[str, pd.DataFrame | EZDataFrame | npt.NDArray[np_IFS]] | None,
|
|
169
|
-
timepoints: pd.DataFrame | EZDataFrame | None,
|
|
170
|
-
|
|
171
|
+
timepoints: pd.DataFrame | EZDataFrame | tp.TimePointLike | None,
|
|
172
|
+
timepoints_column_name: str | None,
|
|
171
173
|
timepoints_list: Collection[str | tp.TimePoint] | tp.TimePointNArray | None,
|
|
172
174
|
uns: MutableMapping[str, Any] | ez.EZDict[Any] | None,
|
|
173
175
|
) -> ParsingDataIn:
|
|
174
|
-
_timepoints_list = parse_timepoints_list(timepoints_list,
|
|
176
|
+
_timepoints_list = parse_timepoints_list(timepoints_list, timepoints_column_name, obs)
|
|
177
|
+
_obs = _valid_obs(data, obs, _timepoints_list, timepoints_column_name)
|
|
175
178
|
|
|
176
179
|
return ParsingDataIn(
|
|
177
180
|
data,
|
|
178
|
-
|
|
181
|
+
_obs,
|
|
179
182
|
at_least_empty_dict(obsm),
|
|
180
183
|
at_least_empty_dict(obsp),
|
|
181
|
-
_valid_var(data, var,
|
|
184
|
+
_valid_var(data, var, timepoints_column_name),
|
|
182
185
|
at_least_empty_dict(varm),
|
|
183
186
|
at_least_empty_dict(varp),
|
|
184
187
|
parse_timepoints(timepoints),
|
|
185
|
-
|
|
188
|
+
timepoints_column_name,
|
|
186
189
|
_timepoints_list,
|
|
187
190
|
at_least_empty_dict(uns),
|
|
188
191
|
)
|
|
@@ -198,7 +201,7 @@ class ParsingDataIn:
|
|
|
198
201
|
varm: Any,
|
|
199
202
|
varp: Any,
|
|
200
203
|
timepoints: Any,
|
|
201
|
-
|
|
204
|
+
timepoints_column_name: Any,
|
|
202
205
|
timepoints_list: Any,
|
|
203
206
|
uns: Any,
|
|
204
207
|
) -> ParsingDataIn:
|
|
@@ -236,8 +239,8 @@ class ParsingDataIn:
|
|
|
236
239
|
varm=adata.varm,
|
|
237
240
|
varp=adata.varp,
|
|
238
241
|
timepoints=parse_timepoints(timepoints),
|
|
239
|
-
|
|
240
|
-
timepoints_list=parse_timepoints_list(timepoints_list,
|
|
242
|
+
timepoints_column_name=timepoints_column_name,
|
|
243
|
+
timepoints_list=parse_timepoints_list(timepoints_list, timepoints_column_name, adata.obs),
|
|
241
244
|
uns=adata.uns,
|
|
242
245
|
)
|
|
243
246
|
|
|
@@ -265,7 +268,7 @@ class ParsingDataOut:
|
|
|
265
268
|
dataset = getattr(self, attr)
|
|
266
269
|
if len(dataset) and first_in(dataset).shape[0] != n_timepoints:
|
|
267
270
|
raise IncoherenceError(
|
|
268
|
-
f"{attr} has {first_in(dataset).shape[0]} time point{'' if first_in(dataset).shape[0] == 1 else 's'} but {n_timepoints} {'was' if n_timepoints == 1 else 'were'} given."
|
|
271
|
+
f"{attr}:{dataset} has {first_in(dataset).shape[0]} time point{'' if first_in(dataset).shape[0] == 1 else 's'} but {n_timepoints} {'was' if n_timepoints == 1 else 'were'} given."
|
|
269
272
|
)
|
|
270
273
|
|
|
271
274
|
generalLogger.debug("Time points were coherent across arrays.")
|
|
@@ -15,7 +15,7 @@ def _parse_data_from_dataframe(df: pd.DataFrame, data: ParsingDataIn) -> Tempora
|
|
|
15
15
|
tdf = TemporalDataFrame(
|
|
16
16
|
df,
|
|
17
17
|
timepoints=data.timepoints_list,
|
|
18
|
-
|
|
18
|
+
timepoints_column_name=data.timepoints_column_name,
|
|
19
19
|
name="data",
|
|
20
20
|
)
|
|
21
21
|
|
|
@@ -30,7 +30,8 @@ def _parse_data_from_tdf(tdf: TemporalDataFrame, data: ParsingDataIn) -> Tempora
|
|
|
30
30
|
tdf.unlock_columns()
|
|
31
31
|
|
|
32
32
|
if data.timepoints.empty:
|
|
33
|
-
|
|
33
|
+
for idx, tp in enumerate(tdf.timepoints):
|
|
34
|
+
data.timepoints.loc[idx] = (tp.value, tp.unit)
|
|
34
35
|
|
|
35
36
|
elif np.any(data.timepoints.value.values != tdf.timepoints):
|
|
36
37
|
raise ValueError("'time points' found in DataFrame do not match 'layers' time points.")
|
|
@@ -81,6 +82,5 @@ def parse_layers(data: ParsingDataIn) -> None:
|
|
|
81
82
|
return
|
|
82
83
|
|
|
83
84
|
raise TypeError(
|
|
84
|
-
f"Type '{type(data.data)}' is not allowed for 'data' parameter, should be a dict,"
|
|
85
|
-
f"a pandas DataFrame, a TemporalDataFrame or an AnnData object."
|
|
85
|
+
f"Type '{type(data.data)}' is not allowed for 'data' parameter, should be a dict, a pandas DataFrame, a TemporalDataFrame or an AnnData object."
|
|
86
86
|
)
|
|
@@ -15,7 +15,8 @@ from vdata.tdf import TemporalDataFrameBase
|
|
|
15
15
|
def _valid_timepoints(data: ParsingDataIn, obs: TemporalDataFrameBase) -> Any: # EZDataFrame:
|
|
16
16
|
if data.timepoints.empty:
|
|
17
17
|
generalLogger.debug("Default empty DataFrame for time points.")
|
|
18
|
-
|
|
18
|
+
for row in [(tp.value, tp.unit) for tp in obs.timepoints]:
|
|
19
|
+
data.timepoints.loc[len(data.timepoints)] = row
|
|
19
20
|
|
|
20
21
|
log_timepoints(data.timepoints)
|
|
21
22
|
return data.timepoints if isinstance(data.timepoints, EZDataFrame) else EZDataFrame(data.timepoints)
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Collection
|
|
4
|
+
from typing import TYPE_CHECKING, cast
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from anndata._core.xarray import Dataset2D
|
|
9
|
+
from ezarr.dataframe import EZDataFrame
|
|
10
|
+
|
|
11
|
+
import vdata.timepoint as tp
|
|
12
|
+
from vdata.array_view import NDArrayView
|
|
13
|
+
from vdata.data._parse.utils import log_timepoints
|
|
14
|
+
from vdata.IO.logger import generalLogger
|
|
15
|
+
from vdata.tdf import TemporalDataFrameBase, TemporalDataFrameView
|
|
16
|
+
from vdata.utils import first_in
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from vdata.data._parse.data import ParsingDataIn
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def parse_timepoints_list(
|
|
23
|
+
timepoints_list: Collection[str | tp.TimePoint] | tp.TimePointNArray | None,
|
|
24
|
+
timepoints_column_name: str | None,
|
|
25
|
+
obs: pd.DataFrame | EZDataFrame | Dataset2D | TemporalDataFrameBase | None,
|
|
26
|
+
) -> tp.TimePointNArray | NDArrayView[tp.TimePoint] | None:
|
|
27
|
+
if timepoints_list is not None:
|
|
28
|
+
return tp.as_timepointarray(timepoints_list)
|
|
29
|
+
|
|
30
|
+
elif obs is not None and timepoints_column_name is not None:
|
|
31
|
+
if timepoints_column_name not in obs.columns:
|
|
32
|
+
raise ValueError(f"Could not find column '{timepoints_column_name}' in obs.")
|
|
33
|
+
|
|
34
|
+
if isinstance(obs, TemporalDataFrameBase):
|
|
35
|
+
column = cast(TemporalDataFrameView, obs[timepoints_column_name])
|
|
36
|
+
return tp.as_timepointarray(column.values)
|
|
37
|
+
|
|
38
|
+
return tp.as_timepointarray(obs[timepoints_column_name])
|
|
39
|
+
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
# TODO : could also get timepoints_list from obsm and obsp
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def parse_timepoints(timepoints: pd.DataFrame | EZDataFrame | tp.TimePointLike | None) -> EZDataFrame:
|
|
46
|
+
if timepoints is None:
|
|
47
|
+
generalLogger.debug(" 'time points' DataFrame was not given.")
|
|
48
|
+
return EZDataFrame(pd.DataFrame(columns=np.array(["value", "unit"])), dtypes={"value": float, "unit": str})
|
|
49
|
+
|
|
50
|
+
if not isinstance(timepoints, (pd.DataFrame, EZDataFrame)):
|
|
51
|
+
try:
|
|
52
|
+
timepoint = tp.TimePoint(timepoints)
|
|
53
|
+
|
|
54
|
+
except ValueError:
|
|
55
|
+
raise TypeError(f"'time points' must be a DataFrame or TimePointLike, got '{timepoints}'.")
|
|
56
|
+
|
|
57
|
+
else:
|
|
58
|
+
return EZDataFrame({"value": [timepoint.value], "unit": [timepoint.unit]})
|
|
59
|
+
|
|
60
|
+
if "value" not in timepoints.columns:
|
|
61
|
+
raise ValueError("'time points' must have at least a column 'value' to store time points value.")
|
|
62
|
+
|
|
63
|
+
to_drop = ["value"]
|
|
64
|
+
|
|
65
|
+
if "unit" in timepoints.columns:
|
|
66
|
+
timepoints_col = tp.as_timepointarray(
|
|
67
|
+
[f"{value}{unit}" for value, unit in zip(timepoints.value, timepoints.unit)]
|
|
68
|
+
)
|
|
69
|
+
to_drop.append("unit")
|
|
70
|
+
|
|
71
|
+
else:
|
|
72
|
+
timepoints_col = tp.as_timepointarray(timepoints["value"])
|
|
73
|
+
|
|
74
|
+
timepoints_col.sort()
|
|
75
|
+
|
|
76
|
+
timepoints = EZDataFrame(timepoints.drop(to_drop, axis=1))
|
|
77
|
+
timepoints.insert(0, "value", np.array(timepoints_col))
|
|
78
|
+
timepoints.insert(1, "unit", np.repeat(timepoints_col.unit, len(timepoints_col)))
|
|
79
|
+
log_timepoints(timepoints)
|
|
80
|
+
|
|
81
|
+
return timepoints
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def check_time_match(data: ParsingDataIn) -> None:
|
|
85
|
+
"""
|
|
86
|
+
Build timepoints DataFrame if it was not given by the user but 'timepoints_list' or 'timepoints_column_name' were given.
|
|
87
|
+
Otherwise, if both timepoints and 'timepoints_list' or 'timepoints_column_name' were given, check that they match.
|
|
88
|
+
"""
|
|
89
|
+
if data.timepoints.empty and data.timepoints_list is None and data.timepoints_column_name is None:
|
|
90
|
+
# timepoints cannot be guessed
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
# build timepoints DataFrame from timepoints_list or timepoints_column_name
|
|
94
|
+
if data.timepoints.empty and data.timepoints_list is not None:
|
|
95
|
+
timepoints = np.unique(data.timepoints_list, equal_nan=False)
|
|
96
|
+
|
|
97
|
+
data.timepoints["value"] = np.array(timepoints)
|
|
98
|
+
data.timepoints["unit"] = np.repeat(timepoints.unit, len(timepoints))
|
|
99
|
+
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
if data.timepoints.empty and len(data.layers):
|
|
103
|
+
timepoints = np.unique(first_in(data.layers).timepoints, equal_nan=False)
|
|
104
|
+
|
|
105
|
+
data.timepoints["value"] = np.array(timepoints)
|
|
106
|
+
data.timepoints["unit"] = np.repeat(timepoints.unit, len(timepoints))
|
|
107
|
+
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
# check that timepoints and _time_list and _timepoints_column_name match
|
|
111
|
+
if data.timepoints_list is not None and not np.all(
|
|
112
|
+
np.isin(data.timepoints_list, tp.as_timepointarray(data.timepoints.value))
|
|
113
|
+
):
|
|
114
|
+
raise ValueError("There are values in 'timepoints_list' unknown in 'timepoints'.")
|
|
115
|
+
|
|
116
|
+
elif data.timepoints_column_name is not None and not np.all(
|
|
117
|
+
np.isin(tp.as_timepointarray(data.obs["timepoints"]), tp.as_timepointarray(data.timepoints.value))
|
|
118
|
+
):
|
|
119
|
+
raise ValueError(f"There are values in obs['{data.timepoints_column_name}'] unknown in 'timepoints'.")
|
|
@@ -7,6 +7,5 @@ from vdata.utils import repr_array
|
|
|
7
7
|
def log_timepoints(timepoints: pd.DataFrame) -> None:
|
|
8
8
|
generalLogger.debug(f" {len(timepoints)} time point{' was' if len(timepoints) == 1 else 's were'} found finally.")
|
|
9
9
|
generalLogger.debug(
|
|
10
|
-
f" \u21b3 Time point{' is' if len(timepoints) == 1 else 's are'} : "
|
|
11
|
-
f"{repr_array(list(timepoints.value)) if len(timepoints) else '[]'}"
|
|
10
|
+
f" \u21b3 Time point{' is' if len(timepoints) == 1 else 's are'} : {repr_array(list(timepoints.value)) if len(timepoints) else '[]'}"
|
|
12
11
|
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from collections.abc import Sequence
|
|
3
4
|
from copy import deepcopy
|
|
4
|
-
from typing import Sequence
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
@@ -10,13 +10,13 @@ from vdata.data.vdata import VData
|
|
|
10
10
|
from vdata.IO import generalLogger
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
def concatenate(arr: Sequence[VData], name: str = "") ->
|
|
13
|
+
def concatenate(arr: Sequence[VData], name: str = "") -> VData:
|
|
14
14
|
"""
|
|
15
15
|
Concatenate together multiple VData objects, which share the same layer keys, vars and time points.
|
|
16
16
|
|
|
17
17
|
Args:
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
arr: sequence of at least 2 VData objects to concatenate.
|
|
19
|
+
name: a name for the concatenated VData object.
|
|
20
20
|
|
|
21
21
|
Returns:
|
|
22
22
|
A concatenated VData object.
|
|
@@ -24,7 +24,7 @@ def concatenate(arr: Sequence[VData], name: str = "") -> "VData":
|
|
|
24
24
|
if len(arr) < 2:
|
|
25
25
|
raise ValueError("At least 2 VData objects must be provided.")
|
|
26
26
|
|
|
27
|
-
if not all(isinstance(arg, VData) for arg in arr):
|
|
27
|
+
if not all(isinstance(arg, VData) for arg in arr): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
28
28
|
raise TypeError("Only Vdata objects are allowed.")
|
|
29
29
|
|
|
30
30
|
generalLogger.debug(
|
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import pickle
|
|
4
|
+
import shutil
|
|
5
|
+
import warnings
|
|
4
6
|
from collections.abc import Collection, Iterable
|
|
5
7
|
from pathlib import Path
|
|
6
|
-
import shutil
|
|
7
8
|
from typing import Any, Literal, overload
|
|
8
|
-
import warnings
|
|
9
9
|
|
|
10
10
|
import ch5mpy as ch
|
|
11
11
|
import ezarr as ez
|
|
12
|
-
from ezarr.names import Attribute, EZType
|
|
13
12
|
import numpy as np
|
|
14
13
|
import numpy.typing as npt
|
|
15
14
|
from anndata import AnnData
|
|
16
15
|
from ezarr.dataframe import EZDataFrame
|
|
16
|
+
from ezarr.names import Attribute, EZType
|
|
17
17
|
from tqdm.auto import tqdm
|
|
18
18
|
from zarr.codecs.numcodecs import LZ4
|
|
19
|
-
from zarr.errors import UnstableSpecificationWarning
|
|
19
|
+
from zarr.errors import UnstableSpecificationWarning, ZarrUserWarning
|
|
20
20
|
|
|
21
21
|
import vdata
|
|
22
22
|
import vdata.timepoint as tp
|
|
@@ -231,7 +231,9 @@ def convert_to_TDF(
|
|
|
231
231
|
}
|
|
232
232
|
)
|
|
233
233
|
|
|
234
|
-
|
|
234
|
+
with warnings.catch_warnings(action="ignore", category=ZarrUserWarning):
|
|
235
|
+
compressors = LZ4()
|
|
236
|
+
|
|
235
237
|
with data.parameters(compressors):
|
|
236
238
|
data["timepoints_index"] = timepoints
|
|
237
239
|
data["index"] = index[sorting_indices]
|
|
@@ -284,7 +286,10 @@ def _convert_anndata_to_vdata(
|
|
|
284
286
|
np.ones(data["obs"][next(iter(data["obs"]))].shape[0]) * timepoint.value, unit=timepoint.unit
|
|
285
287
|
)
|
|
286
288
|
|
|
287
|
-
|
|
289
|
+
_unique_tps = np.unique(timepoints_list, equal_nan=False)
|
|
290
|
+
z_data["timepoints"] = EZDataFrame(
|
|
291
|
+
{"value": _unique_tps, "unit": np.repeat(timepoints_list.unit, len(_unique_tps))}
|
|
292
|
+
)
|
|
288
293
|
progressBar.update()
|
|
289
294
|
|
|
290
295
|
# obs ---------------------------------------------------------------------
|
|
@@ -394,10 +399,10 @@ def convert_anndata_to_vdata(
|
|
|
394
399
|
/!\ WARNING : if done inplace, you won't be able to open the file as an anndata anymore !
|
|
395
400
|
|
|
396
401
|
Args:
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
402
|
+
path: path to the anndata h5 file to convert.
|
|
403
|
+
timepoint: a unique timepoint to set for the data in the anndata.
|
|
404
|
+
timepoints_column_name: the name of the column in anndata's obs to use as indicator of time point for the data.
|
|
405
|
+
drop_X: do not preserve the 'X' dataset ? (default: False)
|
|
401
406
|
"""
|
|
402
407
|
path = Path(path)
|
|
403
408
|
data = ch.H5Dict.read(path, mode=ch.H5Mode.READ_WRITE)
|
|
@@ -14,14 +14,14 @@ from vdata.timepoint import TimePoint
|
|
|
14
14
|
from vdata.utils import spacer
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def
|
|
17
|
+
def _get_timepoints_column_name(
|
|
18
18
|
time_list: Sequence[str | TimePoint] | Literal["*"] | None,
|
|
19
|
-
|
|
19
|
+
timepoints_column_name: str | None,
|
|
20
20
|
metadata: dict[str, Any] | None,
|
|
21
21
|
*metadata_keys: str,
|
|
22
22
|
) -> str | None:
|
|
23
|
-
if time_list is not None or
|
|
24
|
-
return
|
|
23
|
+
if time_list is not None or timepoints_column_name is not None:
|
|
24
|
+
return timepoints_column_name
|
|
25
25
|
|
|
26
26
|
if metadata is None:
|
|
27
27
|
return None
|
|
@@ -48,7 +48,7 @@ def _get_col_dtype(
|
|
|
48
48
|
def read_from_csv(
|
|
49
49
|
path: str | Path,
|
|
50
50
|
time_list: Sequence[str | TimePoint] | Literal["*"] | None = None,
|
|
51
|
-
|
|
51
|
+
timepoints_column_name: str | None = None,
|
|
52
52
|
name: str = "",
|
|
53
53
|
) -> vdata.VData:
|
|
54
54
|
"""
|
|
@@ -111,7 +111,9 @@ def read_from_csv(
|
|
|
111
111
|
obs = TemporalDataFrame.read_from_csv(
|
|
112
112
|
parsed_directory / f.name,
|
|
113
113
|
timepoints=time_list,
|
|
114
|
-
|
|
114
|
+
timepoints_column_name=_get_timepoints_column_name(
|
|
115
|
+
time_list, timepoints_column_name, metadata, "obs"
|
|
116
|
+
),
|
|
115
117
|
)
|
|
116
118
|
|
|
117
119
|
else:
|
|
@@ -126,7 +128,9 @@ def read_from_csv(
|
|
|
126
128
|
dataset_dict[dataset.name[:-4]] = TemporalDataFrame.read_from_csv(
|
|
127
129
|
parsed_directory / f.name / dataset.name,
|
|
128
130
|
timepoints=time_list,
|
|
129
|
-
|
|
131
|
+
timepoints_column_name=_get_timepoints_column_name(
|
|
132
|
+
time_list, timepoints_column_name, metadata, f.name, dataset.name[:-4]
|
|
133
|
+
),
|
|
130
134
|
columns_dtype=_get_col_dtype(metadata, f.name, dataset.name[:-4]),
|
|
131
135
|
)
|
|
132
136
|
|