vdata 0.3.4__tar.gz → 0.3.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {vdata-0.3.4 → vdata-0.3.6}/PKG-INFO +2 -2
  2. {vdata-0.3.4 → vdata-0.3.6}/pyproject.toml +3 -3
  3. vdata-0.3.6/vdata/IO/__init__.py +12 -0
  4. {vdata-0.3.4 → vdata-0.3.6}/vdata/__init__.py +6 -9
  5. {vdata-0.3.4 → vdata-0.3.6}/vdata/_typing.py +6 -0
  6. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/anndata.py +3 -1
  7. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/data.py +23 -20
  8. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/objects/layers.py +4 -4
  9. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/objects/objects.py +2 -1
  10. vdata-0.3.6/vdata/data/_parse/time.py +119 -0
  11. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/utils.py +1 -2
  12. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/concatenate.py +5 -5
  13. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/convert.py +15 -10
  14. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/read.py +11 -7
  15. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/vdata.py +31 -38
  16. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/view.py +10 -8
  17. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/write.py +14 -3
  18. {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/_parse.py +17 -15
  19. {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/base.py +24 -18
  20. {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/dataframe.py +28 -22
  21. {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/view.py +3 -1
  22. {vdata-0.3.4 → vdata-0.3.6}/vdata/timepoint/__init__.py +7 -4
  23. vdata-0.3.6/vdata/timepoint/_typing.py +9 -0
  24. {vdata-0.3.4 → vdata-0.3.6}/vdata/timepoint/array.py +9 -1
  25. {vdata-0.3.4 → vdata-0.3.6}/vdata/timepoint/index.py +1 -1
  26. {vdata-0.3.4 → vdata-0.3.6}/vdata/timepoint/timepoint.py +8 -5
  27. vdata-0.3.4/vdata/IO/__init__.py +0 -11
  28. vdata-0.3.4/vdata/data/_parse/time.py +0 -92
  29. vdata-0.3.4/vdata/timepoint/_typing.py +0 -3
  30. {vdata-0.3.4 → vdata-0.3.6}/LICENSE +0 -0
  31. {vdata-0.3.4 → vdata-0.3.6}/README.md +0 -0
  32. {vdata-0.3.4 → vdata-0.3.6}/vdata/IO/errors.py +0 -0
  33. {vdata-0.3.4 → vdata-0.3.6}/vdata/IO/logger.py +0 -0
  34. {vdata-0.3.4 → vdata-0.3.6}/vdata/_meta.py +0 -0
  35. {vdata-0.3.4 → vdata-0.3.6}/vdata/anndata_proxy/__init__.py +0 -0
  36. {vdata-0.3.4 → vdata-0.3.6}/vdata/anndata_proxy/anndata.py +0 -0
  37. {vdata-0.3.4 → vdata-0.3.6}/vdata/anndata_proxy/containers.py +0 -0
  38. {vdata-0.3.4 → vdata-0.3.6}/vdata/anndata_proxy/dataframe.py +0 -0
  39. {vdata-0.3.4 → vdata-0.3.6}/vdata/array_view.py +0 -0
  40. {vdata-0.3.4 → vdata-0.3.6}/vdata/cli.py +0 -0
  41. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/__init__.py +0 -0
  42. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_indexing.py +0 -0
  43. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/__init__.py +0 -0
  44. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/objects/__init__.py +0 -0
  45. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/objects/obs.py +0 -0
  46. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/objects/uns.py +0 -0
  47. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/_parse/objects/var.py +0 -0
  48. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/__init__.py +0 -0
  49. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/base.py +0 -0
  50. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/layers.py +0 -0
  51. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/lazy.py +0 -0
  52. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/obs.py +0 -0
  53. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/var.py +0 -0
  54. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/arrays/view.py +0 -0
  55. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/hash.py +0 -0
  56. {vdata-0.3.4 → vdata-0.3.6}/vdata/data/name.py +0 -0
  57. {vdata-0.3.4 → vdata-0.3.6}/vdata/names.py +0 -0
  58. {vdata-0.3.4 → vdata-0.3.6}/vdata/py.typed +0 -0
  59. {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/__init__.py +0 -0
  60. {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/index.py +0 -0
  61. {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/indexers.py +0 -0
  62. {vdata-0.3.4 → vdata-0.3.6}/vdata/tdf/indexing.py +0 -0
  63. {vdata-0.3.4 → vdata-0.3.6}/vdata/timepoint/_functions.py +0 -0
  64. {vdata-0.3.4 → vdata-0.3.6}/vdata/timepoint/range.py +0 -0
  65. {vdata-0.3.4 → vdata-0.3.6}/vdata/update/__init__.py +0 -0
  66. {vdata-0.3.4 → vdata-0.3.6}/vdata/update/array.py +0 -0
  67. {vdata-0.3.4 → vdata-0.3.6}/vdata/update/dict.py +0 -0
  68. {vdata-0.3.4 → vdata-0.3.6}/vdata/update/tdf.py +0 -0
  69. {vdata-0.3.4 → vdata-0.3.6}/vdata/update/update.py +0 -0
  70. {vdata-0.3.4 → vdata-0.3.6}/vdata/update/utils.py +0 -0
  71. {vdata-0.3.4 → vdata-0.3.6}/vdata/update/vdf.py +0 -0
  72. {vdata-0.3.4 → vdata-0.3.6}/vdata/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vdata
3
- Version: 0.3.4
3
+ Version: 0.3.6
4
4
  Summary: Annotated multivariate observation of timestamped data
5
5
  Author: Matteo Bouvier
6
6
  Author-email: Matteo Bouvier <matteo.bouvier@hotmail.fr>
@@ -11,7 +11,7 @@ Requires-Dist: anndata>=0.10.4
11
11
  Requires-Dist: scipy>=1.12.0
12
12
  Requires-Dist: numpy-indexed>=0.3.7
13
13
  Requires-Dist: ch5mpy>=0.5.1
14
- Requires-Dist: ezarr>=1.1.3
14
+ Requires-Dist: ezarr>=1.1.4
15
15
  Requires-Dist: h5dataframe>=0.2.3 ; extra == 'update'
16
16
  Requires-Python: >=3.12
17
17
  Provides-Extra: update
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "vdata"
3
- version = "0.3.4"
3
+ version = "0.3.6"
4
4
  description = "Annotated multivariate observation of timestamped data"
5
5
  authors = [{ name = "Matteo Bouvier", email="matteo.bouvier@hotmail.fr"}]
6
6
  license = "CECILL-B"
@@ -14,7 +14,7 @@ dependencies = [
14
14
  "scipy>=1.12.0",
15
15
  "numpy-indexed>=0.3.7",
16
16
  "ch5mpy>=0.5.1",
17
- "ezarr>=1.1.3",
17
+ "ezarr>=1.1.4",
18
18
  ]
19
19
 
20
20
  [dependency-groups]
@@ -25,7 +25,7 @@ dev = [
25
25
 
26
26
  docs= [
27
27
  "mkdocs-material>=9.5.10",
28
- # mkdocstrings = {extras = ["python"], version = "^0.24.0"}
28
+ "mkdocstrings[python]>=0.24.0",
29
29
  "mkdocs>=1.5.3"
30
30
  ]
31
31
 
@@ -0,0 +1,12 @@
1
+ from vdata.IO.errors import IncoherenceError, InvalidVDataFileError, ShapeError, VBaseError, VLockError, VReadOnlyError
2
+ from vdata.IO.logger import generalLogger
3
+
4
+ __all__ = [
5
+ "generalLogger",
6
+ "IncoherenceError",
7
+ "InvalidVDataFileError",
8
+ "ShapeError",
9
+ "VBaseError",
10
+ "VLockError",
11
+ "VReadOnlyError",
12
+ ]
@@ -3,17 +3,9 @@
3
3
  from importlib.metadata import metadata
4
4
  from pathlib import Path
5
5
 
6
- import ch5mpy as ch
7
- import ezarr
8
-
9
6
  from vdata.data import VData, VDataView, concatenate, convert_anndata_to_vdata
10
7
  from vdata.data.name import WRITE_PROTOCOL_VERSION
11
- from vdata.IO import (
12
- IncoherenceError,
13
- ShapeError,
14
- VBaseError,
15
- VLockError,
16
- )
8
+ from vdata.IO import IncoherenceError, InvalidVDataFileError, ShapeError, VBaseError, VLockError, VReadOnlyError
17
9
  from vdata.tdf import RepeatingIndex, TemporalDataFrame, TemporalDataFrameView
18
10
  from vdata.timepoint import TimePoint
19
11
  from vdata.utils import copy_vdata
@@ -27,6 +19,9 @@ __version__ = metadata("vdata").get("version")
27
19
 
28
20
 
29
21
  def get_version(path: str | Path) -> int:
22
+ import ch5mpy as ch
23
+ import ezarr
24
+
30
25
  try:
31
26
  return ch.H5Dict.read(path).attributes.get("__vdata_write_version__", 0)
32
27
 
@@ -39,6 +34,7 @@ __all__ = [
39
34
  "convert_anndata_to_vdata",
40
35
  "copy_vdata",
41
36
  "IncoherenceError",
37
+ "InvalidVDataFileError",
42
38
  "RepeatingIndex",
43
39
  "ShapeError",
44
40
  "TemporalDataFrame",
@@ -48,5 +44,6 @@ __all__ = [
48
44
  "VData",
49
45
  "VDataView",
50
46
  "VLockError",
47
+ "VReadOnlyError",
51
48
  "WRITE_PROTOCOL_VERSION",
52
49
  ]
@@ -1,10 +1,14 @@
1
1
  from collections.abc import Collection
2
+ from pathlib import Path
2
3
  from types import EllipsisType
3
4
  from typing import SupportsIndex, TypedDict
4
5
 
5
6
  import numpy as np
6
7
  import numpy.typing as npt
7
8
  import zarr
9
+ from zarr.abc.store import Store
10
+ from zarr.core.buffer import Buffer
11
+ from zarr.storage import StorePath
8
12
 
9
13
  import vdata.timepoint as tp
10
14
  from vdata.array_view import NDArrayView
@@ -28,6 +32,8 @@ type MultiSlicer = Collection[IFS | tp.TimePoint] | range | slice | EllipsisType
28
32
  type PreSlicer = IFS | tp.TimePoint | Collection[IFS | bool | tp.TimePoint] | range | slice | EllipsisType
29
33
  type Indexer = SupportsIndex | slice | npt.NDArray[np.int_] | npt.NDArray[np.bool_] | None
30
34
 
35
+ type StoreLike = Store | StorePath | Path | str | dict[str, Buffer]
36
+
31
37
 
32
38
  class AttrDict(TypedDict):
33
39
  name: str
@@ -44,10 +44,12 @@ def parse_AnnData(adata: AnnData, data: ParsingDataIn) -> ParsingDataOut:
44
44
  generalLogger.debug(" VData creation from an AnnData.")
45
45
 
46
46
  # import and cast obs to a TemporalDataFrame
47
+ if data.timepoints_column_name is not None:
48
+ del data.obs[data.timepoints_column_name]
49
+
47
50
  obs = TemporalDataFrame(
48
51
  adata.obs,
49
52
  timepoints=data.timepoints_list,
50
- time_col_name=data.time_col_name,
51
53
  name="obs",
52
54
  lock=(True, False),
53
55
  )
@@ -33,7 +33,7 @@ def at_least_empty_dict(d: Mapping[Any, Any] | None) -> Mapping[Any, Any]:
33
33
  def _get_time_list(
34
34
  time_list: tp.TimePointNArray | NDArrayView[tp.TimePoint] | None,
35
35
  data: Any,
36
- time_col_name: str | None,
36
+ timepoints_column_name: str | None,
37
37
  ) -> tp.TimePointNArray | NDArrayView[tp.TimePoint] | None:
38
38
  if time_list is not None:
39
39
  return time_list
@@ -47,8 +47,8 @@ def _get_time_list(
47
47
  if isinstance(df, TemporalDataFrameBase):
48
48
  return df.timepoints_column
49
49
 
50
- elif isinstance(df, (pd.DataFrame, EZDataFrame)) and time_col_name is not None:
51
- return tp.as_timepointarray(df[time_col_name])
50
+ elif isinstance(df, (pd.DataFrame, EZDataFrame)) and timepoints_column_name is not None:
51
+ return tp.as_timepointarray(df[timepoints_column_name])
52
52
 
53
53
  return None
54
54
 
@@ -61,13 +61,13 @@ def _valid_obs(
61
61
  | None,
62
62
  obs: pd.DataFrame | EZDataFrame | TemporalDataFrameBase | None,
63
63
  time_list: tp.TimePointNArray | NDArrayView[tp.TimePoint] | None,
64
- time_col_name: str | None,
64
+ timepoints_column_name: str | None,
65
65
  ) -> TemporalDataFrameBase:
66
66
  if obs is None:
67
67
  generalLogger.debug("Default empty TemporalDataFrame for obs.")
68
68
 
69
69
  _obs_index = get_obs_index(data, obs)
70
- _time_list = _get_time_list(time_list, data, time_col_name)
70
+ _time_list = _get_time_list(time_list, data, timepoints_column_name)
71
71
 
72
72
  _obs = TemporalDataFrame(
73
73
  timepoints=_time_list,
@@ -81,7 +81,9 @@ def _valid_obs(
81
81
  generalLogger.debug(f" 2. \u2713 'obs' is a {type(obs).__name__}.")
82
82
 
83
83
  if isinstance(obs, (pd.DataFrame, EZDataFrame)):
84
- _obs = TemporalDataFrame(obs, timepoints=time_list, time_col_name=time_col_name, name="obs", index=obs.index)
84
+ _obs = TemporalDataFrame(
85
+ obs, timepoints=time_list, timepoints_column_name=timepoints_column_name, name="obs", index=obs.index
86
+ )
85
87
  _obs.lock_indices()
86
88
  return _obs
87
89
 
@@ -104,14 +106,14 @@ def _valid_var(
104
106
  | Mapping[str, pd.DataFrame | EZDataFrame | TemporalDataFrameBase]
105
107
  | None,
106
108
  var: pd.DataFrame | EZDataFrame | None,
107
- time_col_name: str | None,
109
+ timepoints_column_name: str | None,
108
110
  ) -> EZDataFrame:
109
111
  if var is None:
110
112
  generalLogger.debug("Default empty DataFrame for vars.")
111
113
  _index = get_var_index(data, var)
112
114
 
113
- if _index is not None and time_col_name is not None:
114
- ix = np.where(_index == time_col_name)[0][0]
115
+ if _index is not None and timepoints_column_name is not None:
116
+ ix = np.where(_index == timepoints_column_name)[0][0]
115
117
  _index = np.delete(_index, ix)
116
118
 
117
119
  return EZDataFrame(pd.DataFrame(index=_index))
@@ -139,7 +141,7 @@ class ParsingDataIn:
139
141
  varm: Mapping[str, pd.DataFrame | EZDataFrame]
140
142
  varp: Mapping[str, pd.DataFrame | EZDataFrame | npt.NDArray[np_IFS]]
141
143
  timepoints: pd.DataFrame | EZDataFrame
142
- time_col_name: str | None
144
+ timepoints_column_name: str | None
143
145
  timepoints_list: tp.TimePointNArray | NDArrayView[tp.TimePoint] | None
144
146
  uns: Mapping[str, Any]
145
147
  layers: dict[str, TemporalDataFrame | TemporalDataFrameView] = field(init=False)
@@ -166,23 +168,24 @@ class ParsingDataIn:
166
168
  var: pd.DataFrame | EZDataFrame | None,
167
169
  varm: Mapping[str, pd.DataFrame | EZDataFrame] | None,
168
170
  varp: Mapping[str, pd.DataFrame | EZDataFrame | npt.NDArray[np_IFS]] | None,
169
- timepoints: pd.DataFrame | EZDataFrame | None,
170
- time_col_name: str | None,
171
+ timepoints: pd.DataFrame | EZDataFrame | tp.TimePointLike | None,
172
+ timepoints_column_name: str | None,
171
173
  timepoints_list: Collection[str | tp.TimePoint] | tp.TimePointNArray | None,
172
174
  uns: MutableMapping[str, Any] | ez.EZDict[Any] | None,
173
175
  ) -> ParsingDataIn:
174
- _timepoints_list = parse_timepoints_list(timepoints_list, time_col_name, obs)
176
+ _timepoints_list = parse_timepoints_list(timepoints_list, timepoints_column_name, obs)
177
+ _obs = _valid_obs(data, obs, _timepoints_list, timepoints_column_name)
175
178
 
176
179
  return ParsingDataIn(
177
180
  data,
178
- _valid_obs(data, obs, _timepoints_list, time_col_name),
181
+ _obs,
179
182
  at_least_empty_dict(obsm),
180
183
  at_least_empty_dict(obsp),
181
- _valid_var(data, var, time_col_name),
184
+ _valid_var(data, var, timepoints_column_name),
182
185
  at_least_empty_dict(varm),
183
186
  at_least_empty_dict(varp),
184
187
  parse_timepoints(timepoints),
185
- time_col_name,
188
+ timepoints_column_name,
186
189
  _timepoints_list,
187
190
  at_least_empty_dict(uns),
188
191
  )
@@ -198,7 +201,7 @@ class ParsingDataIn:
198
201
  varm: Any,
199
202
  varp: Any,
200
203
  timepoints: Any,
201
- time_col_name: Any,
204
+ timepoints_column_name: Any,
202
205
  timepoints_list: Any,
203
206
  uns: Any,
204
207
  ) -> ParsingDataIn:
@@ -236,8 +239,8 @@ class ParsingDataIn:
236
239
  varm=adata.varm,
237
240
  varp=adata.varp,
238
241
  timepoints=parse_timepoints(timepoints),
239
- time_col_name=time_col_name,
240
- timepoints_list=parse_timepoints_list(timepoints_list, time_col_name, adata.obs),
242
+ timepoints_column_name=timepoints_column_name,
243
+ timepoints_list=parse_timepoints_list(timepoints_list, timepoints_column_name, adata.obs),
241
244
  uns=adata.uns,
242
245
  )
243
246
 
@@ -265,7 +268,7 @@ class ParsingDataOut:
265
268
  dataset = getattr(self, attr)
266
269
  if len(dataset) and first_in(dataset).shape[0] != n_timepoints:
267
270
  raise IncoherenceError(
268
- f"{attr} has {first_in(dataset).shape[0]} time point{'' if first_in(dataset).shape[0] == 1 else 's'} but {n_timepoints} {'was' if n_timepoints == 1 else 'were'} given."
271
+ f"{attr}:{dataset} has {first_in(dataset).shape[0]} time point{'' if first_in(dataset).shape[0] == 1 else 's'} but {n_timepoints} {'was' if n_timepoints == 1 else 'were'} given."
269
272
  )
270
273
 
271
274
  generalLogger.debug("Time points were coherent across arrays.")
@@ -15,7 +15,7 @@ def _parse_data_from_dataframe(df: pd.DataFrame, data: ParsingDataIn) -> Tempora
15
15
  tdf = TemporalDataFrame(
16
16
  df,
17
17
  timepoints=data.timepoints_list,
18
- time_col_name=data.time_col_name,
18
+ timepoints_column_name=data.timepoints_column_name,
19
19
  name="data",
20
20
  )
21
21
 
@@ -30,7 +30,8 @@ def _parse_data_from_tdf(tdf: TemporalDataFrame, data: ParsingDataIn) -> Tempora
30
30
  tdf.unlock_columns()
31
31
 
32
32
  if data.timepoints.empty:
33
- data.timepoints = pd.DataFrame({"value": tdf.timepoints})
33
+ for idx, tp in enumerate(tdf.timepoints):
34
+ data.timepoints.loc[idx] = (tp.value, tp.unit)
34
35
 
35
36
  elif np.any(data.timepoints.value.values != tdf.timepoints):
36
37
  raise ValueError("'time points' found in DataFrame do not match 'layers' time points.")
@@ -81,6 +82,5 @@ def parse_layers(data: ParsingDataIn) -> None:
81
82
  return
82
83
 
83
84
  raise TypeError(
84
- f"Type '{type(data.data)}' is not allowed for 'data' parameter, should be a dict,"
85
- f"a pandas DataFrame, a TemporalDataFrame or an AnnData object."
85
+ f"Type '{type(data.data)}' is not allowed for 'data' parameter, should be a dict, a pandas DataFrame, a TemporalDataFrame or an AnnData object."
86
86
  )
@@ -15,7 +15,8 @@ from vdata.tdf import TemporalDataFrameBase
15
15
  def _valid_timepoints(data: ParsingDataIn, obs: TemporalDataFrameBase) -> Any: # EZDataFrame:
16
16
  if data.timepoints.empty:
17
17
  generalLogger.debug("Default empty DataFrame for time points.")
18
- data.timepoints["value"] = obs.timepoints
18
+ for row in [(tp.value, tp.unit) for tp in obs.timepoints]:
19
+ data.timepoints.loc[len(data.timepoints)] = row
19
20
 
20
21
  log_timepoints(data.timepoints)
21
22
  return data.timepoints if isinstance(data.timepoints, EZDataFrame) else EZDataFrame(data.timepoints)
@@ -0,0 +1,119 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Collection
4
+ from typing import TYPE_CHECKING, cast
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from anndata._core.xarray import Dataset2D
9
+ from ezarr.dataframe import EZDataFrame
10
+
11
+ import vdata.timepoint as tp
12
+ from vdata.array_view import NDArrayView
13
+ from vdata.data._parse.utils import log_timepoints
14
+ from vdata.IO.logger import generalLogger
15
+ from vdata.tdf import TemporalDataFrameBase, TemporalDataFrameView
16
+ from vdata.utils import first_in
17
+
18
+ if TYPE_CHECKING:
19
+ from vdata.data._parse.data import ParsingDataIn
20
+
21
+
22
+ def parse_timepoints_list(
23
+ timepoints_list: Collection[str | tp.TimePoint] | tp.TimePointNArray | None,
24
+ timepoints_column_name: str | None,
25
+ obs: pd.DataFrame | EZDataFrame | Dataset2D | TemporalDataFrameBase | None,
26
+ ) -> tp.TimePointNArray | NDArrayView[tp.TimePoint] | None:
27
+ if timepoints_list is not None:
28
+ return tp.as_timepointarray(timepoints_list)
29
+
30
+ elif obs is not None and timepoints_column_name is not None:
31
+ if timepoints_column_name not in obs.columns:
32
+ raise ValueError(f"Could not find column '{timepoints_column_name}' in obs.")
33
+
34
+ if isinstance(obs, TemporalDataFrameBase):
35
+ column = cast(TemporalDataFrameView, obs[timepoints_column_name])
36
+ return tp.as_timepointarray(column.values)
37
+
38
+ return tp.as_timepointarray(obs[timepoints_column_name])
39
+
40
+ return None
41
+
42
+ # TODO : could also get timepoints_list from obsm and obsp
43
+
44
+
45
+ def parse_timepoints(timepoints: pd.DataFrame | EZDataFrame | tp.TimePointLike | None) -> EZDataFrame:
46
+ if timepoints is None:
47
+ generalLogger.debug(" 'time points' DataFrame was not given.")
48
+ return EZDataFrame(pd.DataFrame(columns=np.array(["value", "unit"])), dtypes={"value": float, "unit": str})
49
+
50
+ if not isinstance(timepoints, (pd.DataFrame, EZDataFrame)):
51
+ try:
52
+ timepoint = tp.TimePoint(timepoints)
53
+
54
+ except ValueError:
55
+ raise TypeError(f"'time points' must be a DataFrame or TimePointLike, got '{timepoints}'.")
56
+
57
+ else:
58
+ return EZDataFrame({"value": [timepoint.value], "unit": [timepoint.unit]})
59
+
60
+ if "value" not in timepoints.columns:
61
+ raise ValueError("'time points' must have at least a column 'value' to store time points value.")
62
+
63
+ to_drop = ["value"]
64
+
65
+ if "unit" in timepoints.columns:
66
+ timepoints_col = tp.as_timepointarray(
67
+ [f"{value}{unit}" for value, unit in zip(timepoints.value, timepoints.unit)]
68
+ )
69
+ to_drop.append("unit")
70
+
71
+ else:
72
+ timepoints_col = tp.as_timepointarray(timepoints["value"])
73
+
74
+ timepoints_col.sort()
75
+
76
+ timepoints = EZDataFrame(timepoints.drop(to_drop, axis=1))
77
+ timepoints.insert(0, "value", np.array(timepoints_col))
78
+ timepoints.insert(1, "unit", np.repeat(timepoints_col.unit, len(timepoints_col)))
79
+ log_timepoints(timepoints)
80
+
81
+ return timepoints
82
+
83
+
84
+ def check_time_match(data: ParsingDataIn) -> None:
85
+ """
86
+ Build timepoints DataFrame if it was not given by the user but 'timepoints_list' or 'timepoints_column_name' were given.
87
+ Otherwise, if both timepoints and 'timepoints_list' or 'timepoints_column_name' were given, check that they match.
88
+ """
89
+ if data.timepoints.empty and data.timepoints_list is None and data.timepoints_column_name is None:
90
+ # timepoints cannot be guessed
91
+ return
92
+
93
+ # build timepoints DataFrame from timepoints_list or timepoints_column_name
94
+ if data.timepoints.empty and data.timepoints_list is not None:
95
+ timepoints = np.unique(data.timepoints_list, equal_nan=False)
96
+
97
+ data.timepoints["value"] = np.array(timepoints)
98
+ data.timepoints["unit"] = np.repeat(timepoints.unit, len(timepoints))
99
+
100
+ return
101
+
102
+ if data.timepoints.empty and len(data.layers):
103
+ timepoints = np.unique(first_in(data.layers).timepoints, equal_nan=False)
104
+
105
+ data.timepoints["value"] = np.array(timepoints)
106
+ data.timepoints["unit"] = np.repeat(timepoints.unit, len(timepoints))
107
+
108
+ return
109
+
110
+ # check that timepoints and _time_list and _timepoints_column_name match
111
+ if data.timepoints_list is not None and not np.all(
112
+ np.isin(data.timepoints_list, tp.as_timepointarray(data.timepoints.value))
113
+ ):
114
+ raise ValueError("There are values in 'timepoints_list' unknown in 'timepoints'.")
115
+
116
+ elif data.timepoints_column_name is not None and not np.all(
117
+ np.isin(tp.as_timepointarray(data.obs["timepoints"]), tp.as_timepointarray(data.timepoints.value))
118
+ ):
119
+ raise ValueError(f"There are values in obs['{data.timepoints_column_name}'] unknown in 'timepoints'.")
@@ -7,6 +7,5 @@ from vdata.utils import repr_array
7
7
  def log_timepoints(timepoints: pd.DataFrame) -> None:
8
8
  generalLogger.debug(f" {len(timepoints)} time point{' was' if len(timepoints) == 1 else 's were'} found finally.")
9
9
  generalLogger.debug(
10
- f" \u21b3 Time point{' is' if len(timepoints) == 1 else 's are'} : "
11
- f"{repr_array(list(timepoints.value)) if len(timepoints) else '[]'}"
10
+ f" \u21b3 Time point{' is' if len(timepoints) == 1 else 's are'} : {repr_array(list(timepoints.value)) if len(timepoints) else '[]'}"
12
11
  )
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from collections.abc import Sequence
3
4
  from copy import deepcopy
4
- from typing import Sequence
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
@@ -10,13 +10,13 @@ from vdata.data.vdata import VData
10
10
  from vdata.IO import generalLogger
11
11
 
12
12
 
13
- def concatenate(arr: Sequence[VData], name: str = "") -> "VData":
13
+ def concatenate(arr: Sequence[VData], name: str = "") -> VData:
14
14
  """
15
15
  Concatenate together multiple VData objects, which share the same layer keys, vars and time points.
16
16
 
17
17
  Args:
18
- - arr: sequence of at least 2 VData objects to concatenate.
19
- - name: a name for the concatenated VData object.
18
+ arr: sequence of at least 2 VData objects to concatenate.
19
+ name: a name for the concatenated VData object.
20
20
 
21
21
  Returns:
22
22
  A concatenated VData object.
@@ -24,7 +24,7 @@ def concatenate(arr: Sequence[VData], name: str = "") -> "VData":
24
24
  if len(arr) < 2:
25
25
  raise ValueError("At least 2 VData objects must be provided.")
26
26
 
27
- if not all(isinstance(arg, VData) for arg in arr):
27
+ if not all(isinstance(arg, VData) for arg in arr): # pyright: ignore[reportUnnecessaryIsInstance]
28
28
  raise TypeError("Only Vdata objects are allowed.")
29
29
 
30
30
  generalLogger.debug(
@@ -1,22 +1,22 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import pickle
4
+ import shutil
5
+ import warnings
4
6
  from collections.abc import Collection, Iterable
5
7
  from pathlib import Path
6
- import shutil
7
8
  from typing import Any, Literal, overload
8
- import warnings
9
9
 
10
10
  import ch5mpy as ch
11
11
  import ezarr as ez
12
- from ezarr.names import Attribute, EZType
13
12
  import numpy as np
14
13
  import numpy.typing as npt
15
14
  from anndata import AnnData
16
15
  from ezarr.dataframe import EZDataFrame
16
+ from ezarr.names import Attribute, EZType
17
17
  from tqdm.auto import tqdm
18
18
  from zarr.codecs.numcodecs import LZ4
19
- from zarr.errors import UnstableSpecificationWarning
19
+ from zarr.errors import UnstableSpecificationWarning, ZarrUserWarning
20
20
 
21
21
  import vdata
22
22
  import vdata.timepoint as tp
@@ -231,7 +231,9 @@ def convert_to_TDF(
231
231
  }
232
232
  )
233
233
 
234
- compressors = LZ4()
234
+ with warnings.catch_warnings(action="ignore", category=ZarrUserWarning):
235
+ compressors = LZ4()
236
+
235
237
  with data.parameters(compressors):
236
238
  data["timepoints_index"] = timepoints
237
239
  data["index"] = index[sorting_indices]
@@ -284,7 +286,10 @@ def _convert_anndata_to_vdata(
284
286
  np.ones(data["obs"][next(iter(data["obs"]))].shape[0]) * timepoint.value, unit=timepoint.unit
285
287
  )
286
288
 
287
- z_data["timepoints"] = EZDataFrame({"value": np.unique(timepoints_list, equal_nan=False)})
289
+ _unique_tps = np.unique(timepoints_list, equal_nan=False)
290
+ z_data["timepoints"] = EZDataFrame(
291
+ {"value": _unique_tps, "unit": np.repeat(timepoints_list.unit, len(_unique_tps))}
292
+ )
288
293
  progressBar.update()
289
294
 
290
295
  # obs ---------------------------------------------------------------------
@@ -394,10 +399,10 @@ def convert_anndata_to_vdata(
394
399
  /!\ WARNING : if done inplace, you won't be able to open the file as an anndata anymore !
395
400
 
396
401
  Args:
397
- - path: path to the anndata h5 file to convert.
398
- - timepoint: a unique timepoint to set for the data in the anndata.
399
- - timepoints_column_name: the name of the column in anndata's obs to use as indicator of time point for the data.
400
- - drop_X: do not preserve the 'X' dataset ? (default: False)
402
+ path: path to the anndata h5 file to convert.
403
+ timepoint: a unique timepoint to set for the data in the anndata.
404
+ timepoints_column_name: the name of the column in anndata's obs to use as indicator of time point for the data.
405
+ drop_X: do not preserve the 'X' dataset ? (default: False)
401
406
  """
402
407
  path = Path(path)
403
408
  data = ch.H5Dict.read(path, mode=ch.H5Mode.READ_WRITE)
@@ -14,14 +14,14 @@ from vdata.timepoint import TimePoint
14
14
  from vdata.utils import spacer
15
15
 
16
16
 
17
- def _get_time_col_name(
17
+ def _get_timepoints_column_name(
18
18
  time_list: Sequence[str | TimePoint] | Literal["*"] | None,
19
- time_col_name: str | None,
19
+ timepoints_column_name: str | None,
20
20
  metadata: dict[str, Any] | None,
21
21
  *metadata_keys: str,
22
22
  ) -> str | None:
23
- if time_list is not None or time_col_name is not None:
24
- return time_col_name
23
+ if time_list is not None or timepoints_column_name is not None:
24
+ return timepoints_column_name
25
25
 
26
26
  if metadata is None:
27
27
  return None
@@ -48,7 +48,7 @@ def _get_col_dtype(
48
48
  def read_from_csv(
49
49
  path: str | Path,
50
50
  time_list: Sequence[str | TimePoint] | Literal["*"] | None = None,
51
- time_col_name: str | None = None,
51
+ timepoints_column_name: str | None = None,
52
52
  name: str = "",
53
53
  ) -> vdata.VData:
54
54
  """
@@ -111,7 +111,9 @@ def read_from_csv(
111
111
  obs = TemporalDataFrame.read_from_csv(
112
112
  parsed_directory / f.name,
113
113
  timepoints=time_list,
114
- time_col_name=_get_time_col_name(time_list, time_col_name, metadata, "obs"),
114
+ timepoints_column_name=_get_timepoints_column_name(
115
+ time_list, timepoints_column_name, metadata, "obs"
116
+ ),
115
117
  )
116
118
 
117
119
  else:
@@ -126,7 +128,9 @@ def read_from_csv(
126
128
  dataset_dict[dataset.name[:-4]] = TemporalDataFrame.read_from_csv(
127
129
  parsed_directory / f.name / dataset.name,
128
130
  timepoints=time_list,
129
- time_col_name=_get_time_col_name(time_list, time_col_name, metadata, f.name, dataset.name[:-4]),
131
+ timepoints_column_name=_get_timepoints_column_name(
132
+ time_list, timepoints_column_name, metadata, f.name, dataset.name[:-4]
133
+ ),
130
134
  columns_dtype=_get_col_dtype(metadata, f.name, dataset.name[:-4]),
131
135
  )
132
136