vdata 0.3.3__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {vdata-0.3.3 → vdata-0.3.4}/PKG-INFO +2 -2
  2. {vdata-0.3.3 → vdata-0.3.4}/pyproject.toml +5 -3
  3. {vdata-0.3.3 → vdata-0.3.4}/vdata/IO/errors.py +8 -1
  4. {vdata-0.3.3 → vdata-0.3.4}/vdata/__init__.py +24 -8
  5. vdata-0.3.4/vdata/cli.py +98 -0
  6. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/data.py +8 -6
  7. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/objects/objects.py +1 -2
  8. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/objects/obs.py +1 -7
  9. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/time.py +4 -3
  10. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/layers.py +1 -1
  11. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/obs.py +2 -2
  12. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/vdata.py +31 -19
  13. {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/base.py +4 -4
  14. {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/index.py +3 -1
  15. {vdata-0.3.3 → vdata-0.3.4}/vdata/update/tdf.py +6 -22
  16. {vdata-0.3.3 → vdata-0.3.4}/vdata/update/update.py +10 -4
  17. vdata-0.3.4/vdata/update/utils.py +22 -0
  18. {vdata-0.3.3 → vdata-0.3.4}/vdata/update/vdf.py +18 -17
  19. {vdata-0.3.3 → vdata-0.3.4}/vdata/utils.py +68 -7
  20. vdata-0.3.3/vdata/cli.py +0 -41
  21. {vdata-0.3.3 → vdata-0.3.4}/LICENSE +0 -0
  22. {vdata-0.3.3 → vdata-0.3.4}/README.md +0 -0
  23. {vdata-0.3.3 → vdata-0.3.4}/vdata/IO/__init__.py +0 -0
  24. {vdata-0.3.3 → vdata-0.3.4}/vdata/IO/logger.py +0 -0
  25. {vdata-0.3.3 → vdata-0.3.4}/vdata/_meta.py +0 -0
  26. {vdata-0.3.3 → vdata-0.3.4}/vdata/_typing.py +0 -0
  27. {vdata-0.3.3 → vdata-0.3.4}/vdata/anndata_proxy/__init__.py +0 -0
  28. {vdata-0.3.3 → vdata-0.3.4}/vdata/anndata_proxy/anndata.py +0 -0
  29. {vdata-0.3.3 → vdata-0.3.4}/vdata/anndata_proxy/containers.py +0 -0
  30. {vdata-0.3.3 → vdata-0.3.4}/vdata/anndata_proxy/dataframe.py +0 -0
  31. {vdata-0.3.3 → vdata-0.3.4}/vdata/array_view.py +0 -0
  32. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/__init__.py +0 -0
  33. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_indexing.py +0 -0
  34. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/__init__.py +0 -0
  35. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/anndata.py +0 -0
  36. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/objects/__init__.py +0 -0
  37. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/objects/layers.py +0 -0
  38. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/objects/uns.py +0 -0
  39. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/objects/var.py +0 -0
  40. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/utils.py +0 -0
  41. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/__init__.py +0 -0
  42. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/base.py +0 -0
  43. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/lazy.py +0 -0
  44. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/var.py +0 -0
  45. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/view.py +0 -0
  46. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/concatenate.py +0 -0
  47. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/convert.py +0 -0
  48. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/hash.py +0 -0
  49. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/name.py +0 -0
  50. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/read.py +0 -0
  51. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/view.py +0 -0
  52. {vdata-0.3.3 → vdata-0.3.4}/vdata/data/write.py +0 -0
  53. {vdata-0.3.3 → vdata-0.3.4}/vdata/names.py +0 -0
  54. {vdata-0.3.3 → vdata-0.3.4}/vdata/py.typed +0 -0
  55. {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/__init__.py +0 -0
  56. {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/_parse.py +0 -0
  57. {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/dataframe.py +0 -0
  58. {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/index.py +0 -0
  59. {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/indexers.py +0 -0
  60. {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/indexing.py +0 -0
  61. {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/view.py +0 -0
  62. {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/__init__.py +0 -0
  63. {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/_functions.py +0 -0
  64. {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/_typing.py +0 -0
  65. {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/array.py +0 -0
  66. {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/range.py +0 -0
  67. {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/timepoint.py +0 -0
  68. {vdata-0.3.3 → vdata-0.3.4}/vdata/update/__init__.py +0 -0
  69. {vdata-0.3.3 → vdata-0.3.4}/vdata/update/array.py +0 -0
  70. {vdata-0.3.3 → vdata-0.3.4}/vdata/update/dict.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vdata
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Annotated multivariate observation of timestamped data
5
5
  Author: Matteo Bouvier
6
6
  Author-email: Matteo Bouvier <matteo.bouvier@hotmail.fr>
@@ -11,7 +11,7 @@ Requires-Dist: anndata>=0.10.4
11
11
  Requires-Dist: scipy>=1.12.0
12
12
  Requires-Dist: numpy-indexed>=0.3.7
13
13
  Requires-Dist: ch5mpy>=0.5.1
14
- Requires-Dist: ezarr>=1.1.2
14
+ Requires-Dist: ezarr>=1.1.3
15
15
  Requires-Dist: h5dataframe>=0.2.3 ; extra == 'update'
16
16
  Requires-Python: >=3.12
17
17
  Provides-Extra: update
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "vdata"
3
- version = "0.3.3"
3
+ version = "0.3.4"
4
4
  description = "Annotated multivariate observation of timestamped data"
5
5
  authors = [{ name = "Matteo Bouvier", email="matteo.bouvier@hotmail.fr"}]
6
6
  license = "CECILL-B"
@@ -14,7 +14,7 @@ dependencies = [
14
14
  "scipy>=1.12.0",
15
15
  "numpy-indexed>=0.3.7",
16
16
  "ch5mpy>=0.5.1",
17
- "ezarr>=1.1.2",
17
+ "ezarr>=1.1.3",
18
18
  ]
19
19
 
20
20
  [dependency-groups]
@@ -30,7 +30,9 @@ docs= [
30
30
  ]
31
31
 
32
32
  [project.scripts]
33
- vdata-update = "vdata.cli:main"
33
+ vdata-update = "vdata.cli:udpate"
34
+ vdata-copy = "vdata.cli:copy"
35
+ vdata-info = "vdata.cli:info"
34
36
 
35
37
  [project.optional-dependencies]
36
38
  update = [
@@ -9,7 +9,7 @@ class VBaseError(BaseException):
9
9
  """
10
10
 
11
11
  def __init__(self, msg: str = ""):
12
- super().__init__()
12
+ super().__init__(msg)
13
13
  self.msg: str = msg
14
14
 
15
15
  @override
@@ -44,3 +44,10 @@ class VReadOnlyError(VBaseError):
44
44
  def __init__(self, msg: str = ""):
45
45
  super().__init__(msg="Read-only file !")
46
46
 
47
+
48
+ class InvalidVDataFileError(VBaseError):
49
+ """
50
+ File or directory is not valid as a VData storage
51
+ """
52
+
53
+ msg: str = "File or directory is not a valid VData"
@@ -1,8 +1,13 @@
1
1
  """Annotated, temporal and multivariate observation data."""
2
2
 
3
3
  from importlib.metadata import metadata
4
+ from pathlib import Path
5
+
6
+ import ch5mpy as ch
7
+ import ezarr
4
8
 
5
9
  from vdata.data import VData, VDataView, concatenate, convert_anndata_to_vdata
10
+ from vdata.data.name import WRITE_PROTOCOL_VERSION
6
11
  from vdata.IO import (
7
12
  IncoherenceError,
8
13
  ShapeError,
@@ -11,6 +16,7 @@ from vdata.IO import (
11
16
  )
12
17
  from vdata.tdf import RepeatingIndex, TemporalDataFrame, TemporalDataFrameView
13
18
  from vdata.timepoint import TimePoint
19
+ from vdata.utils import copy_vdata
14
20
 
15
21
  read = VData.read
16
22
  read_from_csv = VData.read_from_csv
@@ -20,17 +26,27 @@ read_from_pickle = VData.read_from_pickle
20
26
  __version__ = metadata("vdata").get("version")
21
27
 
22
28
 
29
+ def get_version(path: str | Path) -> int:
30
+ try:
31
+ return ch.H5Dict.read(path).attributes.get("__vdata_write_version__", 0)
32
+
33
+ except IsADirectoryError:
34
+ return ezarr.EZDict.open(path).attrs["__vdata_write_version__"] # pyright: ignore[reportReturnType]
35
+
36
+
23
37
  __all__ = [
24
- "VData",
38
+ "concatenate",
39
+ "convert_anndata_to_vdata",
40
+ "copy_vdata",
41
+ "IncoherenceError",
42
+ "RepeatingIndex",
43
+ "ShapeError",
25
44
  "TemporalDataFrame",
26
- "VDataView",
27
45
  "TemporalDataFrameView",
28
- "convert_anndata_to_vdata",
29
- "concatenate",
46
+ "TimePoint",
30
47
  "VBaseError",
31
- "ShapeError",
32
- "IncoherenceError",
48
+ "VData",
49
+ "VDataView",
33
50
  "VLockError",
34
- "TimePoint",
35
- "RepeatingIndex",
51
+ "WRITE_PROTOCOL_VERSION",
36
52
  ]
@@ -0,0 +1,98 @@
1
+ import argparse
2
+ import subprocess
3
+ import traceback
4
+ from pathlib import Path
5
+
6
+ import ch5mpy as ch
7
+ from py import sys
8
+
9
+ import vdata
10
+ from vdata.update.update import update_vdata
11
+ from vdata.utils import copy_vdata
12
+
13
+
14
+ def print_err(msg: str) -> None:
15
+ print("\033[31m[ERROR] " + msg + "\033[0m", file=sys.stderr)
16
+
17
+
18
+ def update() -> int:
19
+ parser = argparse.ArgumentParser(prog="vdata-update", description="Update a VData from an older version")
20
+
21
+ parser.add_argument("filename")
22
+ parser.add_argument("-o", "--out-file", default=None, type=str)
23
+ parser.add_argument("-v", "--verbose", default=False, action="store_true")
24
+
25
+ args = parser.parse_args()
26
+
27
+ data = ch.H5Dict.read(args.filename, mode=ch.H5Mode.READ_WRITE)
28
+
29
+ ez_filename = Path(data.filename)
30
+ ez_filename = ez_filename.with_stem("~" + ez_filename.stem)
31
+
32
+ try:
33
+ update_vdata(data, output_file=args.out_file, verbose=args.verbose)
34
+
35
+ except Exception as e:
36
+ print_err(" ".join(filter(lambda a: isinstance(a, str), e.args))) # pyright: ignore[reportUnnecessaryIsInstance]
37
+
38
+ if args.verbose:
39
+ traceback.print_tb(e.__traceback__)
40
+
41
+ return 1
42
+
43
+ print("\033[32m[Done]\033[0m")
44
+ return 0
45
+
46
+
47
+ def copy() -> int:
48
+ parser = argparse.ArgumentParser(prog="vdata-copy", description="Copy a VData to a new location")
49
+
50
+ parser.add_argument("source")
51
+ parser.add_argument("destination")
52
+ parser.add_argument("-e", "--exclude", default=[], action="append", choices=["obsm", "obsp", "varm", "varp", "uns"])
53
+ parser.add_argument("-v", "--verbose", default=False, action="store_true")
54
+
55
+ args = parser.parse_args()
56
+
57
+ try:
58
+ copy_vdata(args.source, args.destination, args.exclude, verbose=args.verbose)
59
+
60
+ except BaseException as e:
61
+ print_err(" ".join(filter(lambda a: isinstance(a, str), e.args))) # pyright: ignore[reportUnnecessaryIsInstance]
62
+
63
+ if args.verbose:
64
+ traceback.print_tb(e.__traceback__)
65
+
66
+ return 1
67
+
68
+ print("\033[32m[Done]\033[0m")
69
+ return 0
70
+
71
+
72
+ def info() -> int:
73
+ parser = argparse.ArgumentParser(prog="vdata-copy", description="Copy a VData to a new location")
74
+
75
+ parser.add_argument("filename")
76
+
77
+ args = parser.parse_args()
78
+
79
+ size = subprocess.check_output(["du", "-sh", args.filename]).split()[0].decode("utf-8")
80
+ data = vdata.read(args.filename)
81
+
82
+ print(f"""\
83
+ size: \t{size}
84
+
85
+ name: \t{data.name}
86
+ timepoints:\t{", ".join(map(str, data.timepoints_values))}
87
+ shape: \t{data.n_obs} obs x {data.n_var} vars x {data.n_timepoints} timepoints
88
+
89
+ layers: \t{", ".join(data.layers.keys())}
90
+ obs: \t{", ".join(data.obs.columns)}
91
+ obsm: \t{", ".join(data.obsm.keys())}
92
+ obsp: \t{", ".join(data.obsp.keys())}
93
+ var: \t{", ".join(data.var.keys())}
94
+ varm: \t{", ".join(data.varm.keys())}
95
+ varp: \t{", ".join(data.varp.keys())}
96
+ uns: \t{", ".join(data.uns.keys())}
97
+ """)
98
+ return 0
@@ -1,7 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
- from collections.abc import Mapping, MutableMapping, Sequence
3
+ from collections.abc import Collection, Mapping, MutableMapping
4
4
  from dataclasses import dataclass, field
5
+ from pathlib import Path
5
6
  from typing import Any
6
7
 
7
8
  import ezarr as ez
@@ -9,6 +10,7 @@ import numpy as np
9
10
  import numpy.typing as npt
10
11
  import pandas as pd
11
12
  from anndata import AnnData
13
+ from anndata._core.xarray import Dataset2D
12
14
  from ezarr.dataframe import EZDataFrame
13
15
  from scipy.sparse import spmatrix
14
16
 
@@ -130,10 +132,10 @@ class ParsingDataIn:
130
132
  | Mapping[str, pd.DataFrame | EZDataFrame | TemporalDataFrameBase]
131
133
  | None
132
134
  )
133
- obs: pd.DataFrame | EZDataFrame | TemporalDataFrameBase
135
+ obs: pd.DataFrame | EZDataFrame | Dataset2D | TemporalDataFrameBase
134
136
  obsm: Mapping[str, pd.DataFrame | EZDataFrame | TemporalDataFrameBase]
135
137
  obsp: Mapping[str, pd.DataFrame | EZDataFrame | npt.NDArray[np_IFS]]
136
- var: pd.DataFrame | EZDataFrame
138
+ var: pd.DataFrame | EZDataFrame | Dataset2D
137
139
  varm: Mapping[str, pd.DataFrame | EZDataFrame]
138
140
  varp: Mapping[str, pd.DataFrame | EZDataFrame | npt.NDArray[np_IFS]]
139
141
  timepoints: pd.DataFrame | EZDataFrame
@@ -166,8 +168,8 @@ class ParsingDataIn:
166
168
  varp: Mapping[str, pd.DataFrame | EZDataFrame | npt.NDArray[np_IFS]] | None,
167
169
  timepoints: pd.DataFrame | EZDataFrame | None,
168
170
  time_col_name: str | None,
169
- timepoints_list: Sequence[str | tp.TimePoint] | tp.TimePointNArray | None,
170
- uns: dict[str, Any] | ez.EZDict[Any] | None,
171
+ timepoints_list: Collection[str | tp.TimePoint] | tp.TimePointNArray | None,
172
+ uns: MutableMapping[str, Any] | ez.EZDict[Any] | None,
171
173
  ) -> ParsingDataIn:
172
174
  _timepoints_list = parse_timepoints_list(timepoints_list, time_col_name, obs)
173
175
 
@@ -320,5 +322,5 @@ class ParsingDataOut:
320
322
  varm=data.setdefault("varm", {}),
321
323
  varp=data.setdefault("varp", {}),
322
324
  timepoints=_timepoints,
323
- uns=data["uns"],
325
+ uns=data.setdefault("uns", {}),
324
326
  )
@@ -1,6 +1,5 @@
1
1
  from typing import Any
2
2
 
3
- import pandas as pd
4
3
  from ezarr.dataframe import EZDataFrame
5
4
 
6
5
  from vdata.data._parse.data import ParsingDataIn, ParsingDataOut
@@ -33,7 +32,7 @@ def parse_objects(data: ParsingDataIn) -> ParsingDataOut:
33
32
  _obs,
34
33
  parse_obsm(data),
35
34
  parse_obsp(data),
36
- EZDataFrame(data.var) if isinstance(data.var, pd.DataFrame) else data.var,
35
+ EZDataFrame(data.var) if not isinstance(data.var, EZDataFrame) else data.var,
37
36
  parse_varm(data),
38
37
  parse_varp(data),
39
38
  _valid_timepoints(data, _obs),
@@ -65,9 +65,6 @@ def parse_obsm(data: ParsingDataIn) -> dict[str, TemporalDataFrame | TemporalDat
65
65
 
66
66
  generalLogger.debug(f" 3. \u2713 'obsm' is a {type(data.obsm).__name__}.")
67
67
 
68
- if data.obs is None and not len(data.layers):
69
- raise ValueError("'obsm' parameter cannot be set unless either 'data' or 'obs' are set.")
70
-
71
68
  if not isinstance(data.obsm, dict):
72
69
  raise TypeError("'obsm' must be a dictionary of DataFrames.")
73
70
 
@@ -76,10 +73,7 @@ def parse_obsm(data: ParsingDataIn) -> dict[str, TemporalDataFrame | TemporalDat
76
73
  for key, value in data.obsm.items():
77
74
  if isinstance(value, (pd.DataFrame, EZDataFrame)):
78
75
  if data.timepoints_list is None:
79
- if data.obs is not None:
80
- data.timepoints_list = TimePointNArray(data.obs.timepoints_column)
81
- else:
82
- data.timepoints_list = first_in(data.layers).timepoints_column
76
+ data.timepoints_list = TimePointNArray(data.obs.timepoints_column)
83
77
 
84
78
  valid_obsm[str(key)] = TemporalDataFrame(value, timepoints=data.timepoints_list, name=str(key))
85
79
 
@@ -1,10 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
- from collections.abc import Sequence
3
+ from collections.abc import Collection
4
4
  from typing import TYPE_CHECKING, cast
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
+ from anndata._core.xarray import Dataset2D
8
9
  from ezarr.dataframe import EZDataFrame
9
10
 
10
11
  import vdata.timepoint as tp
@@ -19,9 +20,9 @@ if TYPE_CHECKING:
19
20
 
20
21
 
21
22
  def parse_timepoints_list(
22
- timepoints_list: Sequence[str | tp.TimePoint] | tp.TimePointNArray | None,
23
+ timepoints_list: Collection[str | tp.TimePoint] | tp.TimePointNArray | None,
23
24
  time_col_name: str | None,
24
- obs: pd.DataFrame | EZDataFrame | TemporalDataFrameBase | None,
25
+ obs: pd.DataFrame | EZDataFrame | Dataset2D | TemporalDataFrameBase | None,
25
26
  ) -> tp.TimePointNArray | NDArrayView[tp.TimePoint] | None:
26
27
  if timepoints_list is not None:
27
28
  return tp.as_timepointarray(timepoints_list)
@@ -83,7 +83,7 @@ class VLayersArrayContainer(VTDFArrayContainer):
83
83
  f"Column names of layer '{TDF_index}' ({tdf.columns}) do not match var's index. ({self._vdata.var.index})"
84
84
  )
85
85
 
86
- if not np.all(self._vdata.timepoints.value.values == tdf.timepoints):
86
+ if not np.all(self._vdata.timepoints_values == tdf.timepoints):
87
87
  raise IncoherenceError(
88
88
  f"Time points of layer '{TDF_index}' ({tdf.timepoints}) do not match time_point's index. ({self._vdata.timepoints.value.values})"
89
89
  )
@@ -78,9 +78,9 @@ class VObsmArrayContainer(VTDFArrayContainer):
78
78
  f"Index of TemporalDataFrame '{TDF_index}' ({tdf.index}) does not match obs' index. ({self._vdata.obs.index})"
79
79
  )
80
80
 
81
- if np.any(self._vdata.timepoints.value.values != tdf.timepoints):
81
+ if np.any(self._vdata.timepoints_values != tdf.timepoints):
82
82
  raise IncoherenceError(
83
- f"Time points of TemporalDataFrame '{TDF_index}' ({tdf.timepoints}) do not match time_point's index. ({self._vdata.timepoints.value.values})"
83
+ f"Time points of TemporalDataFrame '{TDF_index}' ({tdf.timepoints}) do not match vdata's timepoints. ({self._vdata.timepoints_values})"
84
84
  )
85
85
 
86
86
  tdf.lock_indices()
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import pickle
4
+ import shutil
4
5
  from collections.abc import Collection, Mapping, MutableMapping, Sequence
5
6
  from pathlib import Path
6
7
  from types import TracebackType
@@ -18,6 +19,7 @@ from zarr.core.common import AccessModeLiteral
18
19
  from zarr.errors import GroupNotFoundError
19
20
  from zarr.storage import LocalStore, ZipStore
20
21
 
22
+ import vdata
21
23
  import vdata.timepoint as tp
22
24
  from vdata._meta import PrettyRepr
23
25
  from vdata._typing import IFS, PreSlicer, np_IFS
@@ -84,7 +86,7 @@ class VData(ez.SupportsEZReadWrite):
84
86
  timepoints: pd.DataFrame | EZDataFrame | None = None,
85
87
  uns: MutableMapping[str, Any] | None = None,
86
88
  time_col_name: str | None = None,
87
- timepoints_list: Sequence[str | tp.TimePoint] | tp.TimePointNArray | None = None,
89
+ timepoints_list: Collection[str | tp.TimePoint] | tp.TimePointNArray | None = None,
88
90
  name: str = "",
89
91
  ):
90
92
  """
@@ -380,7 +382,6 @@ class VData(ez.SupportsEZReadWrite):
380
382
  def timepoints(self) -> EZDataFrame:
381
383
  """
382
384
  Get time points data.
383
- :return: the time points DataFrame.
384
385
  """
385
386
  return self._timepoints
386
387
 
@@ -395,14 +396,17 @@ class VData(ez.SupportsEZReadWrite):
395
396
  raise VReadOnlyError
396
397
 
397
398
  if not isinstance(df, (pd.DataFrame, EZDataFrame)): # pyright: ignore[reportUnnecessaryIsInstance]
398
- raise TypeError("'time points' must be a pandas DataFrame.") # pyright: ignore[reportUnreachable]
399
+ raise TypeError("'timepoints' must be a pandas DataFrame.") # pyright: ignore[reportUnreachable]
399
400
 
400
401
  elif df.shape[0] != self.n_timepoints:
401
- raise ShapeError(f"'time points' has {df.shape[0]} lines, it should have {self.n_timepoints}.")
402
+ raise ShapeError(f"'timepoints' has {df.shape[0]} rows, it should have {self.n_timepoints}.")
402
403
 
403
404
  elif "value" not in df.columns:
404
405
  raise ValueError("Time points DataFrame should contain a 'value' column.")
405
406
 
407
+ elif "unit" not in df.columns:
408
+ raise ValueError("Time points DataFrame should contain a 'unit' column.")
409
+
406
410
  df["value"] = tp.as_timepointarray(df["value"])
407
411
  self._timepoints = EZDataFrame(df)
408
412
 
@@ -410,28 +414,22 @@ class VData(ez.SupportsEZReadWrite):
410
414
  def timepoints_values(self) -> tp.TimePointNArray | NDArrayView[tp.TimePoint]:
411
415
  """
412
416
  Get the list of time points values (with the unit if possible).
413
-
414
- :return: the list of time points values (with the unit if possible).
415
417
  """
416
- return tp.as_timepointarray(self.timepoints.value)
418
+ return tp.as_timepointarray(self.timepoints_strings)
417
419
 
418
420
  @property
419
421
  def timepoints_strings(self) -> list[str]:
420
422
  """
421
423
  Get the list of time points as strings.
422
-
423
- :return: the list of time points as strings.
424
424
  """
425
- return [str(tp.value) for tp in self.timepoints_values]
425
+ return [str(value) + unit for value, unit in zip(self.timepoints.value, self.timepoints.unit)] # pyright: ignore[reportUnknownVariableType]
426
426
 
427
427
  @property
428
428
  def timepoints_numerical(self) -> list[float]:
429
429
  """
430
430
  Get the list of bare values from the time points.
431
-
432
- :return: the list of bare values from the time points.
433
431
  """
434
- return [tp.value for tp in self.timepoints_values]
432
+ return self.timepoints.value.values[:].tolist() # pyright: ignore[reportUnknownVariableType]
435
433
 
436
434
  @property
437
435
  def obs(self) -> TemporalDataFrameBase:
@@ -714,7 +712,7 @@ class VData(ez.SupportsEZReadWrite):
714
712
 
715
713
  def write(self, file: str | Path | None = None, verbose: bool = True) -> None:
716
714
  """
717
- Save this VData object in HDF5 file format.
715
+ Save this VData object to a local file.
718
716
 
719
717
  Args:
720
718
  file: path to save the VData
@@ -742,16 +740,19 @@ class VData(ez.SupportsEZReadWrite):
742
740
  write_vdata_to_csv(self, directory, sep, na_rep, index, header)
743
741
 
744
742
  @classmethod
745
- def read(cls, path: str | Path, mode: AccessModeLiteral = "a") -> VData:
743
+ def read(cls, path: str | Path, mode: AccessModeLiteral = "a", secure: bool = False, verbose: bool = True) -> VData:
746
744
  """
747
745
  Read a saved VData from a local file.
748
746
 
749
747
  Args:
750
748
  - path: path to a h5 file.
751
- - mode: mode for opening the h5 file.
749
+ - mode: mode for opening the h5 file. (default: "a")
750
+ - secure: create a temporary file to work on instead of opening the file at `path` directly. (default: False)
751
+ - verbose: verbose output in case of update. (default: True)
752
752
  """
753
- if not Path(path).suffix == ".vd":
754
- raise IOError(f"Cannot read file with suffix '{Path(path).suffix}', should be '.vd'")
753
+ path = Path(path)
754
+ if not path.suffix == ".vd":
755
+ raise IOError(f"Cannot read file with suffix '{path.suffix}', should be '.vd'")
755
756
 
756
757
  try:
757
758
  data = ez.EZDict[Any].open(path, mode=mode)
@@ -768,7 +769,18 @@ class VData(ez.SupportsEZReadWrite):
768
769
  f"Found old VData with {version=} but could not update, please install `vdata[update]`"
769
770
  )
770
771
 
771
- _, data = update_vdata(data, verbose=True)
772
+ _, data = update_vdata(data, verbose=verbose)
773
+
774
+ if secure:
775
+ temp_path = path.with_stem("~" + path.stem)
776
+
777
+ if not temp_path.exists():
778
+ shutil.copytree(path, temp_path)
779
+
780
+ elif vdata.get_version(temp_path) < vdata.get_version(path):
781
+ raise FileExistsError(f"{path} was updated, but {temp_path} exists")
782
+
783
+ data = ez.EZDict[Any].open(temp_path, mode=mode)
772
784
 
773
785
  return VData.__ez_read__(data)
774
786
 
@@ -700,11 +700,11 @@ class TemporalDataFrameBase(ABC, ez.SupportsEZReadWrite):
700
700
  return len(self._timepoints_index.at(timepoint))
701
701
 
702
702
  @property
703
- def columns_num(self) -> AnyNDArrayLike_IFS:
703
+ def columns_num(self) -> npt.NDArray[np_IFS]:
704
704
  """
705
705
  Get the list of column names for numerical data.
706
706
  """
707
- return self._columns_numerical
707
+ return np.asarray(self._columns_numerical)
708
708
 
709
709
  @columns_num.setter
710
710
  def columns_num(self, values: AnyNDArrayLike_IFS) -> None:
@@ -724,11 +724,11 @@ class TemporalDataFrameBase(ABC, ez.SupportsEZReadWrite):
724
724
  return self._columns_numerical.shape[0]
725
725
 
726
726
  @property
727
- def columns_str(self) -> AnyNDArrayLike_IFS:
727
+ def columns_str(self) -> npt.NDArray[np_IFS]:
728
728
  """
729
729
  Get the list of column names for string data.
730
730
  """
731
- return self._columns_string
731
+ return np.asarray(self._columns_string)
732
732
 
733
733
  @columns_str.setter
734
734
  def columns_str(self, values: AnyNDArrayLike_IFS) -> None:
@@ -112,7 +112,9 @@ class TimePointIndex(ez.SupportsEZReadWrite):
112
112
  return TimePointIndex(timepoints, ranges)
113
113
 
114
114
  def as_array(self) -> TimePointNArray:
115
- return TimePointNArray(np.repeat(self._timepoints, np.diff(self._ranges, prepend=0)))
115
+ return TimePointNArray(
116
+ np.repeat(self._timepoints, np.diff(self._ranges, prepend=0)), unit=self._timepoints.unit
117
+ )
116
118
 
117
119
  def len(self, timepoint: TimePoint) -> int:
118
120
  index_tp = np.where(self._timepoints[:] == timepoint)[0][0]
@@ -16,6 +16,7 @@ import vdata
16
16
  from vdata.timepoint import TimePointIndex, TimePointNArray, TimePointZArray
17
17
  from vdata.timepoint.array import as_timepointarray
18
18
  from vdata.update.array import update_array
19
+ from vdata.update.utils import save_class_info
19
20
 
20
21
 
21
22
  def _update_tdf_v0_to_v1(data: ch.H5Dict[Any], output_file: Path | None, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
@@ -63,7 +64,7 @@ def _update_tdf_v1_to_v2(data: ch.H5Dict[Any], output_file: Path | None, **kwarg
63
64
  (data @ "timepoints_index" @ "timepoints").attributes.set(
64
65
  __h5_type__="object",
65
66
  __h5_class__=np.void(pickle.dumps(TimePointNArray, protocol=pickle.HIGHEST_PROTOCOL)),
66
- unit="h",
67
+ unit=data["timepoints_array"][0][-1],
67
68
  )
68
69
 
69
70
  del data["timepoints_array"]
@@ -80,7 +81,6 @@ def _update_tdf_v2_to_v3(data: ch.H5Dict[Any], output_file: Path | None, **kwarg
80
81
  "repeating_index": bool(data.attributes["repeating_index"]),
81
82
  "locked_indices": bool(data.attributes["locked_indices"]),
82
83
  "locked_columns": bool(data.attributes["locked_columns"]),
83
- Attribute.EZType: EZType.Object,
84
84
  }
85
85
  )
86
86
 
@@ -90,23 +90,12 @@ def _update_tdf_v2_to_v3(data: ch.H5Dict[Any], output_file: Path | None, **kwarg
90
90
  "timepoints": data @ "timepoints_index" @ "timepoints",
91
91
  "ranges": data @ "timepoints_index" @ "ranges",
92
92
  }
93
- ez_data["timepoints_index"].attrs.put({Attribute.EZType: EZType.Object})
94
- with warnings.catch_warnings(action="ignore", category=UnstableSpecificationWarning):
95
- (ez_data @ "timepoints_index").create_array(
96
- Attribute.EZClass,
97
- data=np.void(pickle.dumps(TimePointIndex, protocol=pickle.HIGHEST_PROTOCOL)),
98
- overwrite=True,
99
- )
93
+ save_class_info(TimePointIndex, ez_data["timepoints_index"])
100
94
 
101
95
  (ez_data @ "timepoints_index" @ "timepoints").attrs.put( # pyright: ignore[reportOperatorIssue]
102
- {"unit": str(data["timepoints_index"]["timepoints"].attributes["unit"]), Attribute.EZType: EZType.Object}
96
+ {"unit": str(data["timepoints_index"]["timepoints"].attributes["unit"])}
103
97
  )
104
- with warnings.catch_warnings(action="ignore", category=UnstableSpecificationWarning):
105
- (ez_data @ "timepoints_index" @ "timepoints").create_array( # pyright: ignore[reportOperatorIssue]
106
- Attribute.EZClass,
107
- data=np.void(pickle.dumps(TimePointZArray, protocol=pickle.HIGHEST_PROTOCOL)),
108
- overwrite=True,
109
- )
98
+ save_class_info(TimePointZArray, ez_data @ "timepoints_index" @ "timepoints") # pyright: ignore[reportOperatorIssue]
110
99
 
111
100
  ez_data["index"] = data @ "index"
112
101
  ez_data["columns_numerical"] = data @ "columns_numerical"
@@ -114,12 +103,7 @@ def _update_tdf_v2_to_v3(data: ch.H5Dict[Any], output_file: Path | None, **kwarg
114
103
  ez_data["array_numerical"] = data @ "numerical_array"
115
104
  ez_data["array_string"] = data @ "string_array"
116
105
 
117
- with warnings.catch_warnings(action="ignore", category=UnstableSpecificationWarning):
118
- ez_data.create_array(
119
- Attribute.EZClass,
120
- data=np.void(pickle.dumps(vdata.TemporalDataFrame, protocol=pickle.HIGHEST_PROTOCOL)),
121
- overwrite=True,
122
- )
106
+ save_class_info(vdata.TemporalDataFrame, ez_data)
123
107
 
124
108
 
125
109
  class tdf_updator(Protocol):
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import shutil
4
+ import warnings
4
5
  from pathlib import Path
5
6
  from typing import Any
6
7
 
@@ -12,6 +13,7 @@ from h5dataframe import H5DataFrame
12
13
  from tqdm.auto import tqdm
13
14
  from zarr.codecs.numcodecs import LZ4
14
15
  from zarr.core.array import CompressorsLike
16
+ from zarr.errors import ZarrUserWarning
15
17
 
16
18
  import vdata
17
19
  from vdata.data.name import WRITE_PROTOCOL_VERSION
@@ -133,21 +135,25 @@ def _get_output_file(data_file: Path, output_file: str | Path | None, from_versi
133
135
  return ez_filename
134
136
 
135
137
 
138
+ with warnings.catch_warnings(action="ignore", category=ZarrUserWarning):
139
+ _default_compressors = (LZ4(),)
140
+
141
+
136
142
  def update_vdata(
137
143
  data: Path | str | ch.H5Dict[Any] | ez.EZDict[Any],
138
144
  *,
139
145
  output_file: str | Path | None = None,
140
146
  verbose: bool = False,
141
- compressors: CompressorsLike = LZ4(), # pyright: ignore[reportCallInDefaultInitializer]
147
+ compressors: CompressorsLike = _default_compressors,
142
148
  ) -> tuple[int, ez.EZDict[Any]]:
143
149
  """
144
- Update an h5 file containing a vdata saved in an older version.
150
+ Update a saved vdata from an older version.
145
151
 
146
152
  Args:
147
153
  data: path to the h5 file to update.
148
154
  output_file: path to the updated output vdata file.
149
155
  verbose: print a progress bar ? (default: False)
150
- compressors:
156
+ compressors: zarr compressors to use when writing Arrays. (default: LZ4)
151
157
  """
152
158
  if isinstance(data, ez.EZDict):
153
159
  assert data.attrs.get("__vdata_write_version__") == WRITE_PROTOCOL_VERSION, (
@@ -175,7 +181,7 @@ def update_vdata(
175
181
 
176
182
  for v in range(data_version, WRITE_PROTOCOL_VERSION):
177
183
  progressBar: tqdm[Any] | NoBar = (
178
- tqdm(total=nb_items_to_write, desc=f" Updating VData {filename} [version {v} => {v + 1}]", unit="object")
184
+ tqdm(total=nb_items_to_write, desc=f"Updating VData {filename} [version {v} => {v + 1}]", unit="object")
179
185
  if verbose
180
186
  else NoBar()
181
187
  )
@@ -0,0 +1,22 @@
1
+ import pickle
2
+ import warnings
3
+ from typing import Any
4
+
5
+ import ezarr as ez
6
+ import numpy as np
7
+ from ezarr.names import Attribute, EZType
8
+ from zarr.errors import UnstableSpecificationWarning
9
+
10
+
11
+ def save_class_info(klass: type, ez_data: ez.EZDict[Any]) -> None:
12
+ ez_data.attrs.update(
13
+ {
14
+ Attribute.EZType: EZType.Object,
15
+ }
16
+ )
17
+ with warnings.catch_warnings(action="ignore", category=UnstableSpecificationWarning):
18
+ ez_data.create_array(
19
+ Attribute.EZClass,
20
+ data=np.void(pickle.dumps(klass, protocol=pickle.HIGHEST_PROTOCOL)), # pyright: ignore[reportArgumentType]
21
+ overwrite=True,
22
+ )
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import pickle
4
- import warnings
5
4
  from pathlib import Path
6
5
  from typing import Any, Protocol
7
6
 
@@ -11,11 +10,11 @@ import numpy as np
11
10
  import numpy.typing as npt
12
11
  import zarr
13
12
  from ezarr.dataframe import EZDataFrame
14
- from ezarr.names import Attribute, EZType
15
13
  from h5dataframe import H5DataFrame
16
- from zarr.errors import UnstableSpecificationWarning
17
14
 
15
+ import vdata.timepoint as tp
18
16
  from vdata.update.array import update_array
17
+ from vdata.update.utils import save_class_info
19
18
 
20
19
 
21
20
  def get_common_dtype(dt1: npt.DTypeLike, dt2: npt.DTypeLike) -> type[np.generic]:
@@ -76,24 +75,26 @@ def _update_vdf_v2_to_v3(data: ch.H5Dict[Any], output_file: Path | None, **kwarg
76
75
  assert output_file is not None
77
76
  ez_data = ez.EZDict[Any](zarr.open_group(output_file, path=data.file.name))
78
77
 
79
- ez_data.attrs.put(
80
- {
81
- Attribute.EZType: EZType.Object,
82
- }
83
- )
78
+ is_timepoints = ez_data.group.basename == "timepoints"
84
79
 
85
80
  compressors = kwargs.get("compressors")
86
81
  with ez_data.parameters(compressors):
87
82
  ez_data["index"] = data @ "index"
88
- ez_data["arrays"] = {name: arr for name, arr in (data @ "arrays").items()}
89
- ez_data["arrays"].attrs.put({"columns_order": list((data @ "arrays").keys())})
90
-
91
- with warnings.catch_warnings(action="ignore", category=UnstableSpecificationWarning):
92
- ez_data.create_array(
93
- Attribute.EZClass,
94
- data=np.void(pickle.dumps(EZDataFrame, protocol=pickle.HIGHEST_PROTOCOL)), # pyright: ignore[reportArgumentType]
95
- overwrite=True,
96
- )
83
+ ez_data["arrays"] = {
84
+ name: arr for name, arr in (data @ "arrays").items() if not is_timepoints or not name == "value"
85
+ }
86
+ ez_data["arrays"].attrs.put(
87
+ {"columns_order": [name for name in (data @ "arrays").keys() if not is_timepoints or not name == "value"]}
88
+ )
89
+
90
+ if is_timepoints:
91
+ tps = tp.as_timepointarray(data["arrays"]["value"])
92
+ ez_data["arrays"]["value"] = np.array(tps)
93
+ ez_data["arrays"]["unit"] = np.repeat(tps.unit, len(tps))
94
+
95
+ ez_data["arrays"].attrs["columns_order"] = ["value", "unit"] + ez_data["arrays"].attrs["columns_order"]
96
+
97
+ save_class_info(EZDataFrame, ez_data)
97
98
 
98
99
 
99
100
  class vdf_updator(Protocol):
@@ -1,24 +1,26 @@
1
1
  from __future__ import annotations
2
2
 
3
- from collections.abc import Collection, Mapping
3
+ import shutil
4
+ from collections.abc import Collection, Mapping, Sequence
4
5
  from itertools import islice
5
6
  from math import ceil, floor
6
- from typing import TYPE_CHECKING, Any, TypeGuard, TypeVar
7
+ from pathlib import Path
8
+ from typing import TYPE_CHECKING, Any, Literal, TypeGuard
7
9
 
8
10
  import numpy as np
9
11
  import numpy.typing as npt
10
12
  import zarr
13
+ from tqdm.auto import tqdm
11
14
 
12
15
  from vdata.array_view import NDArrayView
16
+ from vdata.IO.errors import InvalidVDataFileError
13
17
 
14
18
  if TYPE_CHECKING:
15
19
  from vdata._typing import PreSlicer
16
20
 
17
- _V = TypeVar("_V")
18
-
19
21
 
20
22
  # misc ------------------------------------------------------------------------
21
- def first_in(d: Mapping[Any, _V]) -> _V:
23
+ def first_in[V](d: Mapping[Any, V]) -> V:
22
24
  return next(iter(d.values()))
23
25
 
24
26
 
@@ -105,7 +107,7 @@ def repr_index(
105
107
 
106
108
 
107
109
  # type coercion ---------------------------------------------------------------
108
- def deep_dict_convert(obj: Mapping[Any, Any]) -> dict[Any, Any]:
110
+ def deep_dict_convert(obj: Any) -> dict[Any, Any]:
109
111
  """
110
112
  'Deep' convert a mapping of any kind (and children mappings) into regular dictionaries.
111
113
 
@@ -118,4 +120,63 @@ def deep_dict_convert(obj: Mapping[Any, Any]) -> dict[Any, Any]:
118
120
  if not isinstance(obj, Mapping):
119
121
  return obj
120
122
 
121
- return {k: deep_dict_convert(v) for k, v in obj.items()}
123
+ return {k: deep_dict_convert(v) for k, v in obj.items()} # pyright: ignore[reportUnknownVariableType]
124
+
125
+
126
+ # copy ------------------------------------------------------------------------
127
+ def is_valid_storage(path: Path) -> bool:
128
+ if not path.exists() or not path.is_dir():
129
+ return False
130
+
131
+ sub_dir = [p.name for p in path.iterdir()]
132
+
133
+ for key in ("layers", "obs", "var", "timepoints", "zarr.json"):
134
+ if key not in sub_dir:
135
+ return False
136
+
137
+ return True
138
+
139
+
140
+ def copy_vdata(
141
+ source: str | Path,
142
+ destination: str | Path,
143
+ exclude: list[Literal["obsm", "obsp", "varm", "varp", "uns"]],
144
+ verbose: bool = False,
145
+ ) -> None:
146
+ source = Path(source)
147
+ destination = Path(destination).with_suffix(".vd")
148
+
149
+ if not source.exists():
150
+ raise FileNotFoundError("")
151
+
152
+ if not is_valid_storage(source):
153
+ raise InvalidVDataFileError(f"{source} is not a valid stored VData")
154
+
155
+ destination.parent.mkdir(parents=True, exist_ok=True)
156
+
157
+ # for file in filter(lambda p: p.name not in exclude, source.iterdir()):
158
+ def _ignore(src: str, _) -> Sequence[str]:
159
+ if src == str(source):
160
+ return exclude
161
+
162
+ return ()
163
+
164
+ if verbose:
165
+ total = (
166
+ sum(
167
+ len([file for file in dir.rglob("*") if file.is_file()])
168
+ for dir in source.iterdir()
169
+ if dir.name not in exclude
170
+ )
171
+ + 1
172
+ )
173
+ progress = tqdm(total=total, desc=f"Copying VData {source}", unit="files")
174
+
175
+ def _copy(src: str, dst: str) -> str:
176
+ progress.update()
177
+ return shutil.copy2(src, dst)
178
+
179
+ shutil.copytree(source, destination, ignore=_ignore, copy_function=_copy)
180
+
181
+ else:
182
+ shutil.copytree(source, destination, ignore=_ignore)
vdata-0.3.3/vdata/cli.py DELETED
@@ -1,41 +0,0 @@
1
- import argparse
2
- import traceback
3
- from pathlib import Path
4
-
5
- import ch5mpy as ch
6
- from py import sys
7
-
8
- from vdata.update.update import update_vdata
9
-
10
-
11
- def print_err(msg: str) -> None:
12
- print("\033[31m[ERROR] " + msg + "\033[0m", file=sys.stderr)
13
-
14
-
15
- def main() -> int:
16
- parser = argparse.ArgumentParser(prog="vdata-update", description="Update a VData from an older version")
17
-
18
- parser.add_argument("filename")
19
- parser.add_argument("-o", "--out-file", default=None, type=str)
20
- parser.add_argument("-v", "--verbose", default=False, action="store_true")
21
-
22
- args = parser.parse_args()
23
-
24
- data = ch.H5Dict.read(args.filename, mode=ch.H5Mode.READ_WRITE)
25
-
26
- ez_filename = Path(data.filename)
27
- ez_filename = ez_filename.with_stem("~" + ez_filename.stem)
28
-
29
- try:
30
- update_vdata(data, output_file=args.out_file, verbose=args.verbose)
31
-
32
- except Exception as e:
33
- print_err(" ".join(filter(lambda a: isinstance(a, str), e.args))) # pyright: ignore[reportUnnecessaryIsInstance]
34
-
35
- if args.verbose:
36
- traceback.print_tb(e.__traceback__)
37
-
38
- return 1
39
-
40
- print("\033[32m[Done]\033[0m")
41
- return 0
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes