vdata 0.3.2__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {vdata-0.3.2 → vdata-0.3.4}/PKG-INFO +2 -2
  2. {vdata-0.3.2 → vdata-0.3.4}/pyproject.toml +5 -3
  3. {vdata-0.3.2 → vdata-0.3.4}/vdata/IO/errors.py +8 -1
  4. {vdata-0.3.2 → vdata-0.3.4}/vdata/__init__.py +24 -8
  5. vdata-0.3.4/vdata/cli.py +98 -0
  6. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/_parse/data.py +8 -6
  7. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/_parse/objects/objects.py +1 -2
  8. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/_parse/objects/obs.py +1 -7
  9. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/_parse/time.py +4 -3
  10. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/arrays/layers.py +1 -1
  11. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/arrays/obs.py +2 -2
  12. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/convert.py +158 -122
  13. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/vdata.py +31 -19
  14. {vdata-0.3.2 → vdata-0.3.4}/vdata/tdf/base.py +4 -4
  15. {vdata-0.3.2 → vdata-0.3.4}/vdata/timepoint/index.py +3 -1
  16. {vdata-0.3.2 → vdata-0.3.4}/vdata/update/tdf.py +6 -22
  17. {vdata-0.3.2 → vdata-0.3.4}/vdata/update/update.py +10 -4
  18. vdata-0.3.4/vdata/update/utils.py +22 -0
  19. {vdata-0.3.2 → vdata-0.3.4}/vdata/update/vdf.py +18 -17
  20. {vdata-0.3.2 → vdata-0.3.4}/vdata/utils.py +68 -7
  21. vdata-0.3.2/vdata/cli.py +0 -41
  22. {vdata-0.3.2 → vdata-0.3.4}/LICENSE +0 -0
  23. {vdata-0.3.2 → vdata-0.3.4}/README.md +0 -0
  24. {vdata-0.3.2 → vdata-0.3.4}/vdata/IO/__init__.py +0 -0
  25. {vdata-0.3.2 → vdata-0.3.4}/vdata/IO/logger.py +0 -0
  26. {vdata-0.3.2 → vdata-0.3.4}/vdata/_meta.py +0 -0
  27. {vdata-0.3.2 → vdata-0.3.4}/vdata/_typing.py +0 -0
  28. {vdata-0.3.2 → vdata-0.3.4}/vdata/anndata_proxy/__init__.py +0 -0
  29. {vdata-0.3.2 → vdata-0.3.4}/vdata/anndata_proxy/anndata.py +0 -0
  30. {vdata-0.3.2 → vdata-0.3.4}/vdata/anndata_proxy/containers.py +0 -0
  31. {vdata-0.3.2 → vdata-0.3.4}/vdata/anndata_proxy/dataframe.py +0 -0
  32. {vdata-0.3.2 → vdata-0.3.4}/vdata/array_view.py +0 -0
  33. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/__init__.py +0 -0
  34. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/_indexing.py +0 -0
  35. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/_parse/__init__.py +0 -0
  36. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/_parse/anndata.py +0 -0
  37. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/_parse/objects/__init__.py +0 -0
  38. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/_parse/objects/layers.py +0 -0
  39. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/_parse/objects/uns.py +0 -0
  40. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/_parse/objects/var.py +0 -0
  41. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/_parse/utils.py +0 -0
  42. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/arrays/__init__.py +0 -0
  43. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/arrays/base.py +0 -0
  44. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/arrays/lazy.py +0 -0
  45. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/arrays/var.py +0 -0
  46. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/arrays/view.py +0 -0
  47. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/concatenate.py +0 -0
  48. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/hash.py +0 -0
  49. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/name.py +0 -0
  50. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/read.py +0 -0
  51. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/view.py +0 -0
  52. {vdata-0.3.2 → vdata-0.3.4}/vdata/data/write.py +0 -0
  53. {vdata-0.3.2 → vdata-0.3.4}/vdata/names.py +0 -0
  54. {vdata-0.3.2 → vdata-0.3.4}/vdata/py.typed +0 -0
  55. {vdata-0.3.2 → vdata-0.3.4}/vdata/tdf/__init__.py +0 -0
  56. {vdata-0.3.2 → vdata-0.3.4}/vdata/tdf/_parse.py +0 -0
  57. {vdata-0.3.2 → vdata-0.3.4}/vdata/tdf/dataframe.py +0 -0
  58. {vdata-0.3.2 → vdata-0.3.4}/vdata/tdf/index.py +0 -0
  59. {vdata-0.3.2 → vdata-0.3.4}/vdata/tdf/indexers.py +0 -0
  60. {vdata-0.3.2 → vdata-0.3.4}/vdata/tdf/indexing.py +0 -0
  61. {vdata-0.3.2 → vdata-0.3.4}/vdata/tdf/view.py +0 -0
  62. {vdata-0.3.2 → vdata-0.3.4}/vdata/timepoint/__init__.py +0 -0
  63. {vdata-0.3.2 → vdata-0.3.4}/vdata/timepoint/_functions.py +0 -0
  64. {vdata-0.3.2 → vdata-0.3.4}/vdata/timepoint/_typing.py +0 -0
  65. {vdata-0.3.2 → vdata-0.3.4}/vdata/timepoint/array.py +0 -0
  66. {vdata-0.3.2 → vdata-0.3.4}/vdata/timepoint/range.py +0 -0
  67. {vdata-0.3.2 → vdata-0.3.4}/vdata/timepoint/timepoint.py +0 -0
  68. {vdata-0.3.2 → vdata-0.3.4}/vdata/update/__init__.py +0 -0
  69. {vdata-0.3.2 → vdata-0.3.4}/vdata/update/array.py +0 -0
  70. {vdata-0.3.2 → vdata-0.3.4}/vdata/update/dict.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vdata
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: Annotated multivariate observation of timestamped data
5
5
  Author: Matteo Bouvier
6
6
  Author-email: Matteo Bouvier <matteo.bouvier@hotmail.fr>
@@ -11,7 +11,7 @@ Requires-Dist: anndata>=0.10.4
11
11
  Requires-Dist: scipy>=1.12.0
12
12
  Requires-Dist: numpy-indexed>=0.3.7
13
13
  Requires-Dist: ch5mpy>=0.5.1
14
- Requires-Dist: ezarr>=1.1.1
14
+ Requires-Dist: ezarr>=1.1.3
15
15
  Requires-Dist: h5dataframe>=0.2.3 ; extra == 'update'
16
16
  Requires-Python: >=3.12
17
17
  Provides-Extra: update
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "vdata"
3
- version = "0.3.2"
3
+ version = "0.3.4"
4
4
  description = "Annotated multivariate observation of timestamped data"
5
5
  authors = [{ name = "Matteo Bouvier", email="matteo.bouvier@hotmail.fr"}]
6
6
  license = "CECILL-B"
@@ -14,7 +14,7 @@ dependencies = [
14
14
  "scipy>=1.12.0",
15
15
  "numpy-indexed>=0.3.7",
16
16
  "ch5mpy>=0.5.1",
17
- "ezarr>=1.1.1",
17
+ "ezarr>=1.1.3",
18
18
  ]
19
19
 
20
20
  [dependency-groups]
@@ -30,7 +30,9 @@ docs= [
30
30
  ]
31
31
 
32
32
  [project.scripts]
33
- vdata-update = "vdata.cli:main"
33
+ vdata-update = "vdata.cli:udpate"
34
+ vdata-copy = "vdata.cli:copy"
35
+ vdata-info = "vdata.cli:info"
34
36
 
35
37
  [project.optional-dependencies]
36
38
  update = [
@@ -9,7 +9,7 @@ class VBaseError(BaseException):
9
9
  """
10
10
 
11
11
  def __init__(self, msg: str = ""):
12
- super().__init__()
12
+ super().__init__(msg)
13
13
  self.msg: str = msg
14
14
 
15
15
  @override
@@ -44,3 +44,10 @@ class VReadOnlyError(VBaseError):
44
44
  def __init__(self, msg: str = ""):
45
45
  super().__init__(msg="Read-only file !")
46
46
 
47
+
48
+ class InvalidVDataFileError(VBaseError):
49
+ """
50
+ File or directory is not valid as a VData storage
51
+ """
52
+
53
+ msg: str = "File or directory is not a valid VData"
@@ -1,8 +1,13 @@
1
1
  """Annotated, temporal and multivariate observation data."""
2
2
 
3
3
  from importlib.metadata import metadata
4
+ from pathlib import Path
5
+
6
+ import ch5mpy as ch
7
+ import ezarr
4
8
 
5
9
  from vdata.data import VData, VDataView, concatenate, convert_anndata_to_vdata
10
+ from vdata.data.name import WRITE_PROTOCOL_VERSION
6
11
  from vdata.IO import (
7
12
  IncoherenceError,
8
13
  ShapeError,
@@ -11,6 +16,7 @@ from vdata.IO import (
11
16
  )
12
17
  from vdata.tdf import RepeatingIndex, TemporalDataFrame, TemporalDataFrameView
13
18
  from vdata.timepoint import TimePoint
19
+ from vdata.utils import copy_vdata
14
20
 
15
21
  read = VData.read
16
22
  read_from_csv = VData.read_from_csv
@@ -20,17 +26,27 @@ read_from_pickle = VData.read_from_pickle
20
26
  __version__ = metadata("vdata").get("version")
21
27
 
22
28
 
29
+ def get_version(path: str | Path) -> int:
30
+ try:
31
+ return ch.H5Dict.read(path).attributes.get("__vdata_write_version__", 0)
32
+
33
+ except IsADirectoryError:
34
+ return ezarr.EZDict.open(path).attrs["__vdata_write_version__"] # pyright: ignore[reportReturnType]
35
+
36
+
23
37
  __all__ = [
24
- "VData",
38
+ "concatenate",
39
+ "convert_anndata_to_vdata",
40
+ "copy_vdata",
41
+ "IncoherenceError",
42
+ "RepeatingIndex",
43
+ "ShapeError",
25
44
  "TemporalDataFrame",
26
- "VDataView",
27
45
  "TemporalDataFrameView",
28
- "convert_anndata_to_vdata",
29
- "concatenate",
46
+ "TimePoint",
30
47
  "VBaseError",
31
- "ShapeError",
32
- "IncoherenceError",
48
+ "VData",
49
+ "VDataView",
33
50
  "VLockError",
34
- "TimePoint",
35
- "RepeatingIndex",
51
+ "WRITE_PROTOCOL_VERSION",
36
52
  ]
@@ -0,0 +1,98 @@
1
+ import argparse
2
+ import subprocess
3
+ import traceback
4
+ from pathlib import Path
5
+
6
+ import ch5mpy as ch
7
+ from py import sys
8
+
9
+ import vdata
10
+ from vdata.update.update import update_vdata
11
+ from vdata.utils import copy_vdata
12
+
13
+
14
+ def print_err(msg: str) -> None:
15
+ print("\033[31m[ERROR] " + msg + "\033[0m", file=sys.stderr)
16
+
17
+
18
+ def update() -> int:
19
+ parser = argparse.ArgumentParser(prog="vdata-update", description="Update a VData from an older version")
20
+
21
+ parser.add_argument("filename")
22
+ parser.add_argument("-o", "--out-file", default=None, type=str)
23
+ parser.add_argument("-v", "--verbose", default=False, action="store_true")
24
+
25
+ args = parser.parse_args()
26
+
27
+ data = ch.H5Dict.read(args.filename, mode=ch.H5Mode.READ_WRITE)
28
+
29
+ ez_filename = Path(data.filename)
30
+ ez_filename = ez_filename.with_stem("~" + ez_filename.stem)
31
+
32
+ try:
33
+ update_vdata(data, output_file=args.out_file, verbose=args.verbose)
34
+
35
+ except Exception as e:
36
+ print_err(" ".join(filter(lambda a: isinstance(a, str), e.args))) # pyright: ignore[reportUnnecessaryIsInstance]
37
+
38
+ if args.verbose:
39
+ traceback.print_tb(e.__traceback__)
40
+
41
+ return 1
42
+
43
+ print("\033[32m[Done]\033[0m")
44
+ return 0
45
+
46
+
47
+ def copy() -> int:
48
+ parser = argparse.ArgumentParser(prog="vdata-copy", description="Copy a VData to a new location")
49
+
50
+ parser.add_argument("source")
51
+ parser.add_argument("destination")
52
+ parser.add_argument("-e", "--exclude", default=[], action="append", choices=["obsm", "obsp", "varm", "varp", "uns"])
53
+ parser.add_argument("-v", "--verbose", default=False, action="store_true")
54
+
55
+ args = parser.parse_args()
56
+
57
+ try:
58
+ copy_vdata(args.source, args.destination, args.exclude, verbose=args.verbose)
59
+
60
+ except BaseException as e:
61
+ print_err(" ".join(filter(lambda a: isinstance(a, str), e.args))) # pyright: ignore[reportUnnecessaryIsInstance]
62
+
63
+ if args.verbose:
64
+ traceback.print_tb(e.__traceback__)
65
+
66
+ return 1
67
+
68
+ print("\033[32m[Done]\033[0m")
69
+ return 0
70
+
71
+
72
+ def info() -> int:
73
+ parser = argparse.ArgumentParser(prog="vdata-copy", description="Copy a VData to a new location")
74
+
75
+ parser.add_argument("filename")
76
+
77
+ args = parser.parse_args()
78
+
79
+ size = subprocess.check_output(["du", "-sh", args.filename]).split()[0].decode("utf-8")
80
+ data = vdata.read(args.filename)
81
+
82
+ print(f"""\
83
+ size: \t{size}
84
+
85
+ name: \t{data.name}
86
+ timepoints:\t{", ".join(map(str, data.timepoints_values))}
87
+ shape: \t{data.n_obs} obs x {data.n_var} vars x {data.n_timepoints} timepoints
88
+
89
+ layers: \t{", ".join(data.layers.keys())}
90
+ obs: \t{", ".join(data.obs.columns)}
91
+ obsm: \t{", ".join(data.obsm.keys())}
92
+ obsp: \t{", ".join(data.obsp.keys())}
93
+ var: \t{", ".join(data.var.keys())}
94
+ varm: \t{", ".join(data.varm.keys())}
95
+ varp: \t{", ".join(data.varp.keys())}
96
+ uns: \t{", ".join(data.uns.keys())}
97
+ """)
98
+ return 0
@@ -1,7 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
- from collections.abc import Mapping, MutableMapping, Sequence
3
+ from collections.abc import Collection, Mapping, MutableMapping
4
4
  from dataclasses import dataclass, field
5
+ from pathlib import Path
5
6
  from typing import Any
6
7
 
7
8
  import ezarr as ez
@@ -9,6 +10,7 @@ import numpy as np
9
10
  import numpy.typing as npt
10
11
  import pandas as pd
11
12
  from anndata import AnnData
13
+ from anndata._core.xarray import Dataset2D
12
14
  from ezarr.dataframe import EZDataFrame
13
15
  from scipy.sparse import spmatrix
14
16
 
@@ -130,10 +132,10 @@ class ParsingDataIn:
130
132
  | Mapping[str, pd.DataFrame | EZDataFrame | TemporalDataFrameBase]
131
133
  | None
132
134
  )
133
- obs: pd.DataFrame | EZDataFrame | TemporalDataFrameBase
135
+ obs: pd.DataFrame | EZDataFrame | Dataset2D | TemporalDataFrameBase
134
136
  obsm: Mapping[str, pd.DataFrame | EZDataFrame | TemporalDataFrameBase]
135
137
  obsp: Mapping[str, pd.DataFrame | EZDataFrame | npt.NDArray[np_IFS]]
136
- var: pd.DataFrame | EZDataFrame
138
+ var: pd.DataFrame | EZDataFrame | Dataset2D
137
139
  varm: Mapping[str, pd.DataFrame | EZDataFrame]
138
140
  varp: Mapping[str, pd.DataFrame | EZDataFrame | npt.NDArray[np_IFS]]
139
141
  timepoints: pd.DataFrame | EZDataFrame
@@ -166,8 +168,8 @@ class ParsingDataIn:
166
168
  varp: Mapping[str, pd.DataFrame | EZDataFrame | npt.NDArray[np_IFS]] | None,
167
169
  timepoints: pd.DataFrame | EZDataFrame | None,
168
170
  time_col_name: str | None,
169
- timepoints_list: Sequence[str | tp.TimePoint] | tp.TimePointNArray | None,
170
- uns: dict[str, Any] | ez.EZDict[Any] | None,
171
+ timepoints_list: Collection[str | tp.TimePoint] | tp.TimePointNArray | None,
172
+ uns: MutableMapping[str, Any] | ez.EZDict[Any] | None,
171
173
  ) -> ParsingDataIn:
172
174
  _timepoints_list = parse_timepoints_list(timepoints_list, time_col_name, obs)
173
175
 
@@ -320,5 +322,5 @@ class ParsingDataOut:
320
322
  varm=data.setdefault("varm", {}),
321
323
  varp=data.setdefault("varp", {}),
322
324
  timepoints=_timepoints,
323
- uns=data["uns"],
325
+ uns=data.setdefault("uns", {}),
324
326
  )
@@ -1,6 +1,5 @@
1
1
  from typing import Any
2
2
 
3
- import pandas as pd
4
3
  from ezarr.dataframe import EZDataFrame
5
4
 
6
5
  from vdata.data._parse.data import ParsingDataIn, ParsingDataOut
@@ -33,7 +32,7 @@ def parse_objects(data: ParsingDataIn) -> ParsingDataOut:
33
32
  _obs,
34
33
  parse_obsm(data),
35
34
  parse_obsp(data),
36
- EZDataFrame(data.var) if isinstance(data.var, pd.DataFrame) else data.var,
35
+ EZDataFrame(data.var) if not isinstance(data.var, EZDataFrame) else data.var,
37
36
  parse_varm(data),
38
37
  parse_varp(data),
39
38
  _valid_timepoints(data, _obs),
@@ -65,9 +65,6 @@ def parse_obsm(data: ParsingDataIn) -> dict[str, TemporalDataFrame | TemporalDat
65
65
 
66
66
  generalLogger.debug(f" 3. \u2713 'obsm' is a {type(data.obsm).__name__}.")
67
67
 
68
- if data.obs is None and not len(data.layers):
69
- raise ValueError("'obsm' parameter cannot be set unless either 'data' or 'obs' are set.")
70
-
71
68
  if not isinstance(data.obsm, dict):
72
69
  raise TypeError("'obsm' must be a dictionary of DataFrames.")
73
70
 
@@ -76,10 +73,7 @@ def parse_obsm(data: ParsingDataIn) -> dict[str, TemporalDataFrame | TemporalDat
76
73
  for key, value in data.obsm.items():
77
74
  if isinstance(value, (pd.DataFrame, EZDataFrame)):
78
75
  if data.timepoints_list is None:
79
- if data.obs is not None:
80
- data.timepoints_list = TimePointNArray(data.obs.timepoints_column)
81
- else:
82
- data.timepoints_list = first_in(data.layers).timepoints_column
76
+ data.timepoints_list = TimePointNArray(data.obs.timepoints_column)
83
77
 
84
78
  valid_obsm[str(key)] = TemporalDataFrame(value, timepoints=data.timepoints_list, name=str(key))
85
79
 
@@ -1,10 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
- from collections.abc import Sequence
3
+ from collections.abc import Collection
4
4
  from typing import TYPE_CHECKING, cast
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
+ from anndata._core.xarray import Dataset2D
8
9
  from ezarr.dataframe import EZDataFrame
9
10
 
10
11
  import vdata.timepoint as tp
@@ -19,9 +20,9 @@ if TYPE_CHECKING:
19
20
 
20
21
 
21
22
  def parse_timepoints_list(
22
- timepoints_list: Sequence[str | tp.TimePoint] | tp.TimePointNArray | None,
23
+ timepoints_list: Collection[str | tp.TimePoint] | tp.TimePointNArray | None,
23
24
  time_col_name: str | None,
24
- obs: pd.DataFrame | EZDataFrame | TemporalDataFrameBase | None,
25
+ obs: pd.DataFrame | EZDataFrame | Dataset2D | TemporalDataFrameBase | None,
25
26
  ) -> tp.TimePointNArray | NDArrayView[tp.TimePoint] | None:
26
27
  if timepoints_list is not None:
27
28
  return tp.as_timepointarray(timepoints_list)
@@ -83,7 +83,7 @@ class VLayersArrayContainer(VTDFArrayContainer):
83
83
  f"Column names of layer '{TDF_index}' ({tdf.columns}) do not match var's index. ({self._vdata.var.index})"
84
84
  )
85
85
 
86
- if not np.all(self._vdata.timepoints.value.values == tdf.timepoints):
86
+ if not np.all(self._vdata.timepoints_values == tdf.timepoints):
87
87
  raise IncoherenceError(
88
88
  f"Time points of layer '{TDF_index}' ({tdf.timepoints}) do not match time_point's index. ({self._vdata.timepoints.value.values})"
89
89
  )
@@ -78,9 +78,9 @@ class VObsmArrayContainer(VTDFArrayContainer):
78
78
  f"Index of TemporalDataFrame '{TDF_index}' ({tdf.index}) does not match obs' index. ({self._vdata.obs.index})"
79
79
  )
80
80
 
81
- if np.any(self._vdata.timepoints.value.values != tdf.timepoints):
81
+ if np.any(self._vdata.timepoints_values != tdf.timepoints):
82
82
  raise IncoherenceError(
83
- f"Time points of TemporalDataFrame '{TDF_index}' ({tdf.timepoints}) do not match time_point's index. ({self._vdata.timepoints.value.values})"
83
+ f"Time points of TemporalDataFrame '{TDF_index}' ({tdf.timepoints}) do not match vdata's timepoints. ({self._vdata.timepoints_values})"
84
84
  )
85
85
 
86
86
  tdf.lock_indices()