vdata 0.3.3__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vdata-0.3.3 → vdata-0.3.4}/PKG-INFO +2 -2
- {vdata-0.3.3 → vdata-0.3.4}/pyproject.toml +5 -3
- {vdata-0.3.3 → vdata-0.3.4}/vdata/IO/errors.py +8 -1
- {vdata-0.3.3 → vdata-0.3.4}/vdata/__init__.py +24 -8
- vdata-0.3.4/vdata/cli.py +98 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/data.py +8 -6
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/objects/objects.py +1 -2
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/objects/obs.py +1 -7
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/time.py +4 -3
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/layers.py +1 -1
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/obs.py +2 -2
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/vdata.py +31 -19
- {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/base.py +4 -4
- {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/index.py +3 -1
- {vdata-0.3.3 → vdata-0.3.4}/vdata/update/tdf.py +6 -22
- {vdata-0.3.3 → vdata-0.3.4}/vdata/update/update.py +10 -4
- vdata-0.3.4/vdata/update/utils.py +22 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/update/vdf.py +18 -17
- {vdata-0.3.3 → vdata-0.3.4}/vdata/utils.py +68 -7
- vdata-0.3.3/vdata/cli.py +0 -41
- {vdata-0.3.3 → vdata-0.3.4}/LICENSE +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/README.md +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/IO/__init__.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/IO/logger.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/_meta.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/_typing.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/anndata_proxy/__init__.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/anndata_proxy/anndata.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/anndata_proxy/containers.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/anndata_proxy/dataframe.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/array_view.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/__init__.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_indexing.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/__init__.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/anndata.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/objects/__init__.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/objects/layers.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/objects/uns.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/objects/var.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/_parse/utils.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/__init__.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/base.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/lazy.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/var.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/arrays/view.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/concatenate.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/convert.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/hash.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/name.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/read.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/view.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/data/write.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/names.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/py.typed +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/__init__.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/_parse.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/dataframe.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/index.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/indexers.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/indexing.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/tdf/view.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/__init__.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/_functions.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/_typing.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/array.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/range.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/timepoint/timepoint.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/update/__init__.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/update/array.py +0 -0
- {vdata-0.3.3 → vdata-0.3.4}/vdata/update/dict.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vdata
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: Annotated multivariate observation of timestamped data
|
|
5
5
|
Author: Matteo Bouvier
|
|
6
6
|
Author-email: Matteo Bouvier <matteo.bouvier@hotmail.fr>
|
|
@@ -11,7 +11,7 @@ Requires-Dist: anndata>=0.10.4
|
|
|
11
11
|
Requires-Dist: scipy>=1.12.0
|
|
12
12
|
Requires-Dist: numpy-indexed>=0.3.7
|
|
13
13
|
Requires-Dist: ch5mpy>=0.5.1
|
|
14
|
-
Requires-Dist: ezarr>=1.1.
|
|
14
|
+
Requires-Dist: ezarr>=1.1.3
|
|
15
15
|
Requires-Dist: h5dataframe>=0.2.3 ; extra == 'update'
|
|
16
16
|
Requires-Python: >=3.12
|
|
17
17
|
Provides-Extra: update
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "vdata"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.4"
|
|
4
4
|
description = "Annotated multivariate observation of timestamped data"
|
|
5
5
|
authors = [{ name = "Matteo Bouvier", email="matteo.bouvier@hotmail.fr"}]
|
|
6
6
|
license = "CECILL-B"
|
|
@@ -14,7 +14,7 @@ dependencies = [
|
|
|
14
14
|
"scipy>=1.12.0",
|
|
15
15
|
"numpy-indexed>=0.3.7",
|
|
16
16
|
"ch5mpy>=0.5.1",
|
|
17
|
-
"ezarr>=1.1.
|
|
17
|
+
"ezarr>=1.1.3",
|
|
18
18
|
]
|
|
19
19
|
|
|
20
20
|
[dependency-groups]
|
|
@@ -30,7 +30,9 @@ docs= [
|
|
|
30
30
|
]
|
|
31
31
|
|
|
32
32
|
[project.scripts]
|
|
33
|
-
vdata-update = "vdata.cli:
|
|
33
|
+
vdata-update = "vdata.cli:udpate"
|
|
34
|
+
vdata-copy = "vdata.cli:copy"
|
|
35
|
+
vdata-info = "vdata.cli:info"
|
|
34
36
|
|
|
35
37
|
[project.optional-dependencies]
|
|
36
38
|
update = [
|
|
@@ -9,7 +9,7 @@ class VBaseError(BaseException):
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
def __init__(self, msg: str = ""):
|
|
12
|
-
super().__init__()
|
|
12
|
+
super().__init__(msg)
|
|
13
13
|
self.msg: str = msg
|
|
14
14
|
|
|
15
15
|
@override
|
|
@@ -44,3 +44,10 @@ class VReadOnlyError(VBaseError):
|
|
|
44
44
|
def __init__(self, msg: str = ""):
|
|
45
45
|
super().__init__(msg="Read-only file !")
|
|
46
46
|
|
|
47
|
+
|
|
48
|
+
class InvalidVDataFileError(VBaseError):
|
|
49
|
+
"""
|
|
50
|
+
File or directory is not valid as a VData storage
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
msg: str = "File or directory is not a valid VData"
|
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
"""Annotated, temporal and multivariate observation data."""
|
|
2
2
|
|
|
3
3
|
from importlib.metadata import metadata
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import ch5mpy as ch
|
|
7
|
+
import ezarr
|
|
4
8
|
|
|
5
9
|
from vdata.data import VData, VDataView, concatenate, convert_anndata_to_vdata
|
|
10
|
+
from vdata.data.name import WRITE_PROTOCOL_VERSION
|
|
6
11
|
from vdata.IO import (
|
|
7
12
|
IncoherenceError,
|
|
8
13
|
ShapeError,
|
|
@@ -11,6 +16,7 @@ from vdata.IO import (
|
|
|
11
16
|
)
|
|
12
17
|
from vdata.tdf import RepeatingIndex, TemporalDataFrame, TemporalDataFrameView
|
|
13
18
|
from vdata.timepoint import TimePoint
|
|
19
|
+
from vdata.utils import copy_vdata
|
|
14
20
|
|
|
15
21
|
read = VData.read
|
|
16
22
|
read_from_csv = VData.read_from_csv
|
|
@@ -20,17 +26,27 @@ read_from_pickle = VData.read_from_pickle
|
|
|
20
26
|
__version__ = metadata("vdata").get("version")
|
|
21
27
|
|
|
22
28
|
|
|
29
|
+
def get_version(path: str | Path) -> int:
|
|
30
|
+
try:
|
|
31
|
+
return ch.H5Dict.read(path).attributes.get("__vdata_write_version__", 0)
|
|
32
|
+
|
|
33
|
+
except IsADirectoryError:
|
|
34
|
+
return ezarr.EZDict.open(path).attrs["__vdata_write_version__"] # pyright: ignore[reportReturnType]
|
|
35
|
+
|
|
36
|
+
|
|
23
37
|
__all__ = [
|
|
24
|
-
"
|
|
38
|
+
"concatenate",
|
|
39
|
+
"convert_anndata_to_vdata",
|
|
40
|
+
"copy_vdata",
|
|
41
|
+
"IncoherenceError",
|
|
42
|
+
"RepeatingIndex",
|
|
43
|
+
"ShapeError",
|
|
25
44
|
"TemporalDataFrame",
|
|
26
|
-
"VDataView",
|
|
27
45
|
"TemporalDataFrameView",
|
|
28
|
-
"
|
|
29
|
-
"concatenate",
|
|
46
|
+
"TimePoint",
|
|
30
47
|
"VBaseError",
|
|
31
|
-
"
|
|
32
|
-
"
|
|
48
|
+
"VData",
|
|
49
|
+
"VDataView",
|
|
33
50
|
"VLockError",
|
|
34
|
-
"
|
|
35
|
-
"RepeatingIndex",
|
|
51
|
+
"WRITE_PROTOCOL_VERSION",
|
|
36
52
|
]
|
vdata-0.3.4/vdata/cli.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import subprocess
|
|
3
|
+
import traceback
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import ch5mpy as ch
|
|
7
|
+
from py import sys
|
|
8
|
+
|
|
9
|
+
import vdata
|
|
10
|
+
from vdata.update.update import update_vdata
|
|
11
|
+
from vdata.utils import copy_vdata
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def print_err(msg: str) -> None:
|
|
15
|
+
print("\033[31m[ERROR] " + msg + "\033[0m", file=sys.stderr)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def update() -> int:
|
|
19
|
+
parser = argparse.ArgumentParser(prog="vdata-update", description="Update a VData from an older version")
|
|
20
|
+
|
|
21
|
+
parser.add_argument("filename")
|
|
22
|
+
parser.add_argument("-o", "--out-file", default=None, type=str)
|
|
23
|
+
parser.add_argument("-v", "--verbose", default=False, action="store_true")
|
|
24
|
+
|
|
25
|
+
args = parser.parse_args()
|
|
26
|
+
|
|
27
|
+
data = ch.H5Dict.read(args.filename, mode=ch.H5Mode.READ_WRITE)
|
|
28
|
+
|
|
29
|
+
ez_filename = Path(data.filename)
|
|
30
|
+
ez_filename = ez_filename.with_stem("~" + ez_filename.stem)
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
update_vdata(data, output_file=args.out_file, verbose=args.verbose)
|
|
34
|
+
|
|
35
|
+
except Exception as e:
|
|
36
|
+
print_err(" ".join(filter(lambda a: isinstance(a, str), e.args))) # pyright: ignore[reportUnnecessaryIsInstance]
|
|
37
|
+
|
|
38
|
+
if args.verbose:
|
|
39
|
+
traceback.print_tb(e.__traceback__)
|
|
40
|
+
|
|
41
|
+
return 1
|
|
42
|
+
|
|
43
|
+
print("\033[32m[Done]\033[0m")
|
|
44
|
+
return 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def copy() -> int:
|
|
48
|
+
parser = argparse.ArgumentParser(prog="vdata-copy", description="Copy a VData to a new location")
|
|
49
|
+
|
|
50
|
+
parser.add_argument("source")
|
|
51
|
+
parser.add_argument("destination")
|
|
52
|
+
parser.add_argument("-e", "--exclude", default=[], action="append", choices=["obsm", "obsp", "varm", "varp", "uns"])
|
|
53
|
+
parser.add_argument("-v", "--verbose", default=False, action="store_true")
|
|
54
|
+
|
|
55
|
+
args = parser.parse_args()
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
copy_vdata(args.source, args.destination, args.exclude, verbose=args.verbose)
|
|
59
|
+
|
|
60
|
+
except BaseException as e:
|
|
61
|
+
print_err(" ".join(filter(lambda a: isinstance(a, str), e.args))) # pyright: ignore[reportUnnecessaryIsInstance]
|
|
62
|
+
|
|
63
|
+
if args.verbose:
|
|
64
|
+
traceback.print_tb(e.__traceback__)
|
|
65
|
+
|
|
66
|
+
return 1
|
|
67
|
+
|
|
68
|
+
print("\033[32m[Done]\033[0m")
|
|
69
|
+
return 0
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def info() -> int:
|
|
73
|
+
parser = argparse.ArgumentParser(prog="vdata-copy", description="Copy a VData to a new location")
|
|
74
|
+
|
|
75
|
+
parser.add_argument("filename")
|
|
76
|
+
|
|
77
|
+
args = parser.parse_args()
|
|
78
|
+
|
|
79
|
+
size = subprocess.check_output(["du", "-sh", args.filename]).split()[0].decode("utf-8")
|
|
80
|
+
data = vdata.read(args.filename)
|
|
81
|
+
|
|
82
|
+
print(f"""\
|
|
83
|
+
size: \t{size}
|
|
84
|
+
|
|
85
|
+
name: \t{data.name}
|
|
86
|
+
timepoints:\t{", ".join(map(str, data.timepoints_values))}
|
|
87
|
+
shape: \t{data.n_obs} obs x {data.n_var} vars x {data.n_timepoints} timepoints
|
|
88
|
+
|
|
89
|
+
layers: \t{", ".join(data.layers.keys())}
|
|
90
|
+
obs: \t{", ".join(data.obs.columns)}
|
|
91
|
+
obsm: \t{", ".join(data.obsm.keys())}
|
|
92
|
+
obsp: \t{", ".join(data.obsp.keys())}
|
|
93
|
+
var: \t{", ".join(data.var.keys())}
|
|
94
|
+
varm: \t{", ".join(data.varm.keys())}
|
|
95
|
+
varp: \t{", ".join(data.varp.keys())}
|
|
96
|
+
uns: \t{", ".join(data.uns.keys())}
|
|
97
|
+
""")
|
|
98
|
+
return 0
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from collections.abc import Mapping, MutableMapping
|
|
3
|
+
from collections.abc import Collection, Mapping, MutableMapping
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
from typing import Any
|
|
6
7
|
|
|
7
8
|
import ezarr as ez
|
|
@@ -9,6 +10,7 @@ import numpy as np
|
|
|
9
10
|
import numpy.typing as npt
|
|
10
11
|
import pandas as pd
|
|
11
12
|
from anndata import AnnData
|
|
13
|
+
from anndata._core.xarray import Dataset2D
|
|
12
14
|
from ezarr.dataframe import EZDataFrame
|
|
13
15
|
from scipy.sparse import spmatrix
|
|
14
16
|
|
|
@@ -130,10 +132,10 @@ class ParsingDataIn:
|
|
|
130
132
|
| Mapping[str, pd.DataFrame | EZDataFrame | TemporalDataFrameBase]
|
|
131
133
|
| None
|
|
132
134
|
)
|
|
133
|
-
obs: pd.DataFrame | EZDataFrame | TemporalDataFrameBase
|
|
135
|
+
obs: pd.DataFrame | EZDataFrame | Dataset2D | TemporalDataFrameBase
|
|
134
136
|
obsm: Mapping[str, pd.DataFrame | EZDataFrame | TemporalDataFrameBase]
|
|
135
137
|
obsp: Mapping[str, pd.DataFrame | EZDataFrame | npt.NDArray[np_IFS]]
|
|
136
|
-
var: pd.DataFrame | EZDataFrame
|
|
138
|
+
var: pd.DataFrame | EZDataFrame | Dataset2D
|
|
137
139
|
varm: Mapping[str, pd.DataFrame | EZDataFrame]
|
|
138
140
|
varp: Mapping[str, pd.DataFrame | EZDataFrame | npt.NDArray[np_IFS]]
|
|
139
141
|
timepoints: pd.DataFrame | EZDataFrame
|
|
@@ -166,8 +168,8 @@ class ParsingDataIn:
|
|
|
166
168
|
varp: Mapping[str, pd.DataFrame | EZDataFrame | npt.NDArray[np_IFS]] | None,
|
|
167
169
|
timepoints: pd.DataFrame | EZDataFrame | None,
|
|
168
170
|
time_col_name: str | None,
|
|
169
|
-
timepoints_list:
|
|
170
|
-
uns:
|
|
171
|
+
timepoints_list: Collection[str | tp.TimePoint] | tp.TimePointNArray | None,
|
|
172
|
+
uns: MutableMapping[str, Any] | ez.EZDict[Any] | None,
|
|
171
173
|
) -> ParsingDataIn:
|
|
172
174
|
_timepoints_list = parse_timepoints_list(timepoints_list, time_col_name, obs)
|
|
173
175
|
|
|
@@ -320,5 +322,5 @@ class ParsingDataOut:
|
|
|
320
322
|
varm=data.setdefault("varm", {}),
|
|
321
323
|
varp=data.setdefault("varp", {}),
|
|
322
324
|
timepoints=_timepoints,
|
|
323
|
-
uns=data
|
|
325
|
+
uns=data.setdefault("uns", {}),
|
|
324
326
|
)
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from typing import Any
|
|
2
2
|
|
|
3
|
-
import pandas as pd
|
|
4
3
|
from ezarr.dataframe import EZDataFrame
|
|
5
4
|
|
|
6
5
|
from vdata.data._parse.data import ParsingDataIn, ParsingDataOut
|
|
@@ -33,7 +32,7 @@ def parse_objects(data: ParsingDataIn) -> ParsingDataOut:
|
|
|
33
32
|
_obs,
|
|
34
33
|
parse_obsm(data),
|
|
35
34
|
parse_obsp(data),
|
|
36
|
-
EZDataFrame(data.var) if isinstance(data.var,
|
|
35
|
+
EZDataFrame(data.var) if not isinstance(data.var, EZDataFrame) else data.var,
|
|
37
36
|
parse_varm(data),
|
|
38
37
|
parse_varp(data),
|
|
39
38
|
_valid_timepoints(data, _obs),
|
|
@@ -65,9 +65,6 @@ def parse_obsm(data: ParsingDataIn) -> dict[str, TemporalDataFrame | TemporalDat
|
|
|
65
65
|
|
|
66
66
|
generalLogger.debug(f" 3. \u2713 'obsm' is a {type(data.obsm).__name__}.")
|
|
67
67
|
|
|
68
|
-
if data.obs is None and not len(data.layers):
|
|
69
|
-
raise ValueError("'obsm' parameter cannot be set unless either 'data' or 'obs' are set.")
|
|
70
|
-
|
|
71
68
|
if not isinstance(data.obsm, dict):
|
|
72
69
|
raise TypeError("'obsm' must be a dictionary of DataFrames.")
|
|
73
70
|
|
|
@@ -76,10 +73,7 @@ def parse_obsm(data: ParsingDataIn) -> dict[str, TemporalDataFrame | TemporalDat
|
|
|
76
73
|
for key, value in data.obsm.items():
|
|
77
74
|
if isinstance(value, (pd.DataFrame, EZDataFrame)):
|
|
78
75
|
if data.timepoints_list is None:
|
|
79
|
-
|
|
80
|
-
data.timepoints_list = TimePointNArray(data.obs.timepoints_column)
|
|
81
|
-
else:
|
|
82
|
-
data.timepoints_list = first_in(data.layers).timepoints_column
|
|
76
|
+
data.timepoints_list = TimePointNArray(data.obs.timepoints_column)
|
|
83
77
|
|
|
84
78
|
valid_obsm[str(key)] = TemporalDataFrame(value, timepoints=data.timepoints_list, name=str(key))
|
|
85
79
|
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from collections.abc import
|
|
3
|
+
from collections.abc import Collection
|
|
4
4
|
from typing import TYPE_CHECKING, cast
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
8
|
+
from anndata._core.xarray import Dataset2D
|
|
8
9
|
from ezarr.dataframe import EZDataFrame
|
|
9
10
|
|
|
10
11
|
import vdata.timepoint as tp
|
|
@@ -19,9 +20,9 @@ if TYPE_CHECKING:
|
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
def parse_timepoints_list(
|
|
22
|
-
timepoints_list:
|
|
23
|
+
timepoints_list: Collection[str | tp.TimePoint] | tp.TimePointNArray | None,
|
|
23
24
|
time_col_name: str | None,
|
|
24
|
-
obs: pd.DataFrame | EZDataFrame | TemporalDataFrameBase | None,
|
|
25
|
+
obs: pd.DataFrame | EZDataFrame | Dataset2D | TemporalDataFrameBase | None,
|
|
25
26
|
) -> tp.TimePointNArray | NDArrayView[tp.TimePoint] | None:
|
|
26
27
|
if timepoints_list is not None:
|
|
27
28
|
return tp.as_timepointarray(timepoints_list)
|
|
@@ -83,7 +83,7 @@ class VLayersArrayContainer(VTDFArrayContainer):
|
|
|
83
83
|
f"Column names of layer '{TDF_index}' ({tdf.columns}) do not match var's index. ({self._vdata.var.index})"
|
|
84
84
|
)
|
|
85
85
|
|
|
86
|
-
if not np.all(self._vdata.
|
|
86
|
+
if not np.all(self._vdata.timepoints_values == tdf.timepoints):
|
|
87
87
|
raise IncoherenceError(
|
|
88
88
|
f"Time points of layer '{TDF_index}' ({tdf.timepoints}) do not match time_point's index. ({self._vdata.timepoints.value.values})"
|
|
89
89
|
)
|
|
@@ -78,9 +78,9 @@ class VObsmArrayContainer(VTDFArrayContainer):
|
|
|
78
78
|
f"Index of TemporalDataFrame '{TDF_index}' ({tdf.index}) does not match obs' index. ({self._vdata.obs.index})"
|
|
79
79
|
)
|
|
80
80
|
|
|
81
|
-
if np.any(self._vdata.
|
|
81
|
+
if np.any(self._vdata.timepoints_values != tdf.timepoints):
|
|
82
82
|
raise IncoherenceError(
|
|
83
|
-
f"Time points of TemporalDataFrame '{TDF_index}' ({tdf.timepoints}) do not match
|
|
83
|
+
f"Time points of TemporalDataFrame '{TDF_index}' ({tdf.timepoints}) do not match vdata's timepoints. ({self._vdata.timepoints_values})"
|
|
84
84
|
)
|
|
85
85
|
|
|
86
86
|
tdf.lock_indices()
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import pickle
|
|
4
|
+
import shutil
|
|
4
5
|
from collections.abc import Collection, Mapping, MutableMapping, Sequence
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
from types import TracebackType
|
|
@@ -18,6 +19,7 @@ from zarr.core.common import AccessModeLiteral
|
|
|
18
19
|
from zarr.errors import GroupNotFoundError
|
|
19
20
|
from zarr.storage import LocalStore, ZipStore
|
|
20
21
|
|
|
22
|
+
import vdata
|
|
21
23
|
import vdata.timepoint as tp
|
|
22
24
|
from vdata._meta import PrettyRepr
|
|
23
25
|
from vdata._typing import IFS, PreSlicer, np_IFS
|
|
@@ -84,7 +86,7 @@ class VData(ez.SupportsEZReadWrite):
|
|
|
84
86
|
timepoints: pd.DataFrame | EZDataFrame | None = None,
|
|
85
87
|
uns: MutableMapping[str, Any] | None = None,
|
|
86
88
|
time_col_name: str | None = None,
|
|
87
|
-
timepoints_list:
|
|
89
|
+
timepoints_list: Collection[str | tp.TimePoint] | tp.TimePointNArray | None = None,
|
|
88
90
|
name: str = "",
|
|
89
91
|
):
|
|
90
92
|
"""
|
|
@@ -380,7 +382,6 @@ class VData(ez.SupportsEZReadWrite):
|
|
|
380
382
|
def timepoints(self) -> EZDataFrame:
|
|
381
383
|
"""
|
|
382
384
|
Get time points data.
|
|
383
|
-
:return: the time points DataFrame.
|
|
384
385
|
"""
|
|
385
386
|
return self._timepoints
|
|
386
387
|
|
|
@@ -395,14 +396,17 @@ class VData(ez.SupportsEZReadWrite):
|
|
|
395
396
|
raise VReadOnlyError
|
|
396
397
|
|
|
397
398
|
if not isinstance(df, (pd.DataFrame, EZDataFrame)): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
398
|
-
raise TypeError("'
|
|
399
|
+
raise TypeError("'timepoints' must be a pandas DataFrame.") # pyright: ignore[reportUnreachable]
|
|
399
400
|
|
|
400
401
|
elif df.shape[0] != self.n_timepoints:
|
|
401
|
-
raise ShapeError(f"'
|
|
402
|
+
raise ShapeError(f"'timepoints' has {df.shape[0]} rows, it should have {self.n_timepoints}.")
|
|
402
403
|
|
|
403
404
|
elif "value" not in df.columns:
|
|
404
405
|
raise ValueError("Time points DataFrame should contain a 'value' column.")
|
|
405
406
|
|
|
407
|
+
elif "unit" not in df.columns:
|
|
408
|
+
raise ValueError("Time points DataFrame should contain a 'unit' column.")
|
|
409
|
+
|
|
406
410
|
df["value"] = tp.as_timepointarray(df["value"])
|
|
407
411
|
self._timepoints = EZDataFrame(df)
|
|
408
412
|
|
|
@@ -410,28 +414,22 @@ class VData(ez.SupportsEZReadWrite):
|
|
|
410
414
|
def timepoints_values(self) -> tp.TimePointNArray | NDArrayView[tp.TimePoint]:
|
|
411
415
|
"""
|
|
412
416
|
Get the list of time points values (with the unit if possible).
|
|
413
|
-
|
|
414
|
-
:return: the list of time points values (with the unit if possible).
|
|
415
417
|
"""
|
|
416
|
-
return tp.as_timepointarray(self.
|
|
418
|
+
return tp.as_timepointarray(self.timepoints_strings)
|
|
417
419
|
|
|
418
420
|
@property
|
|
419
421
|
def timepoints_strings(self) -> list[str]:
|
|
420
422
|
"""
|
|
421
423
|
Get the list of time points as strings.
|
|
422
|
-
|
|
423
|
-
:return: the list of time points as strings.
|
|
424
424
|
"""
|
|
425
|
-
return [str(
|
|
425
|
+
return [str(value) + unit for value, unit in zip(self.timepoints.value, self.timepoints.unit)] # pyright: ignore[reportUnknownVariableType]
|
|
426
426
|
|
|
427
427
|
@property
|
|
428
428
|
def timepoints_numerical(self) -> list[float]:
|
|
429
429
|
"""
|
|
430
430
|
Get the list of bare values from the time points.
|
|
431
|
-
|
|
432
|
-
:return: the list of bare values from the time points.
|
|
433
431
|
"""
|
|
434
|
-
return [
|
|
432
|
+
return self.timepoints.value.values[:].tolist() # pyright: ignore[reportUnknownVariableType]
|
|
435
433
|
|
|
436
434
|
@property
|
|
437
435
|
def obs(self) -> TemporalDataFrameBase:
|
|
@@ -714,7 +712,7 @@ class VData(ez.SupportsEZReadWrite):
|
|
|
714
712
|
|
|
715
713
|
def write(self, file: str | Path | None = None, verbose: bool = True) -> None:
|
|
716
714
|
"""
|
|
717
|
-
Save this VData object
|
|
715
|
+
Save this VData object to a local file.
|
|
718
716
|
|
|
719
717
|
Args:
|
|
720
718
|
file: path to save the VData
|
|
@@ -742,16 +740,19 @@ class VData(ez.SupportsEZReadWrite):
|
|
|
742
740
|
write_vdata_to_csv(self, directory, sep, na_rep, index, header)
|
|
743
741
|
|
|
744
742
|
@classmethod
|
|
745
|
-
def read(cls, path: str | Path, mode: AccessModeLiteral = "a") -> VData:
|
|
743
|
+
def read(cls, path: str | Path, mode: AccessModeLiteral = "a", secure: bool = False, verbose: bool = True) -> VData:
|
|
746
744
|
"""
|
|
747
745
|
Read a saved VData from a local file.
|
|
748
746
|
|
|
749
747
|
Args:
|
|
750
748
|
- path: path to a h5 file.
|
|
751
|
-
- mode: mode for opening the h5 file.
|
|
749
|
+
- mode: mode for opening the h5 file. (default: "a")
|
|
750
|
+
- secure: create a temporary file to work on instead of opening the file at `path` directly. (default: False)
|
|
751
|
+
- verbose: verbose output in case of update. (default: True)
|
|
752
752
|
"""
|
|
753
|
-
|
|
754
|
-
|
|
753
|
+
path = Path(path)
|
|
754
|
+
if not path.suffix == ".vd":
|
|
755
|
+
raise IOError(f"Cannot read file with suffix '{path.suffix}', should be '.vd'")
|
|
755
756
|
|
|
756
757
|
try:
|
|
757
758
|
data = ez.EZDict[Any].open(path, mode=mode)
|
|
@@ -768,7 +769,18 @@ class VData(ez.SupportsEZReadWrite):
|
|
|
768
769
|
f"Found old VData with {version=} but could not update, please install `vdata[update]`"
|
|
769
770
|
)
|
|
770
771
|
|
|
771
|
-
_, data = update_vdata(data, verbose=
|
|
772
|
+
_, data = update_vdata(data, verbose=verbose)
|
|
773
|
+
|
|
774
|
+
if secure:
|
|
775
|
+
temp_path = path.with_stem("~" + path.stem)
|
|
776
|
+
|
|
777
|
+
if not temp_path.exists():
|
|
778
|
+
shutil.copytree(path, temp_path)
|
|
779
|
+
|
|
780
|
+
elif vdata.get_version(temp_path) < vdata.get_version(path):
|
|
781
|
+
raise FileExistsError(f"{path} was updated, but {temp_path} exists")
|
|
782
|
+
|
|
783
|
+
data = ez.EZDict[Any].open(temp_path, mode=mode)
|
|
772
784
|
|
|
773
785
|
return VData.__ez_read__(data)
|
|
774
786
|
|
|
@@ -700,11 +700,11 @@ class TemporalDataFrameBase(ABC, ez.SupportsEZReadWrite):
|
|
|
700
700
|
return len(self._timepoints_index.at(timepoint))
|
|
701
701
|
|
|
702
702
|
@property
|
|
703
|
-
def columns_num(self) ->
|
|
703
|
+
def columns_num(self) -> npt.NDArray[np_IFS]:
|
|
704
704
|
"""
|
|
705
705
|
Get the list of column names for numerical data.
|
|
706
706
|
"""
|
|
707
|
-
return self._columns_numerical
|
|
707
|
+
return np.asarray(self._columns_numerical)
|
|
708
708
|
|
|
709
709
|
@columns_num.setter
|
|
710
710
|
def columns_num(self, values: AnyNDArrayLike_IFS) -> None:
|
|
@@ -724,11 +724,11 @@ class TemporalDataFrameBase(ABC, ez.SupportsEZReadWrite):
|
|
|
724
724
|
return self._columns_numerical.shape[0]
|
|
725
725
|
|
|
726
726
|
@property
|
|
727
|
-
def columns_str(self) ->
|
|
727
|
+
def columns_str(self) -> npt.NDArray[np_IFS]:
|
|
728
728
|
"""
|
|
729
729
|
Get the list of column names for string data.
|
|
730
730
|
"""
|
|
731
|
-
return self._columns_string
|
|
731
|
+
return np.asarray(self._columns_string)
|
|
732
732
|
|
|
733
733
|
@columns_str.setter
|
|
734
734
|
def columns_str(self, values: AnyNDArrayLike_IFS) -> None:
|
|
@@ -112,7 +112,9 @@ class TimePointIndex(ez.SupportsEZReadWrite):
|
|
|
112
112
|
return TimePointIndex(timepoints, ranges)
|
|
113
113
|
|
|
114
114
|
def as_array(self) -> TimePointNArray:
|
|
115
|
-
return TimePointNArray(
|
|
115
|
+
return TimePointNArray(
|
|
116
|
+
np.repeat(self._timepoints, np.diff(self._ranges, prepend=0)), unit=self._timepoints.unit
|
|
117
|
+
)
|
|
116
118
|
|
|
117
119
|
def len(self, timepoint: TimePoint) -> int:
|
|
118
120
|
index_tp = np.where(self._timepoints[:] == timepoint)[0][0]
|
|
@@ -16,6 +16,7 @@ import vdata
|
|
|
16
16
|
from vdata.timepoint import TimePointIndex, TimePointNArray, TimePointZArray
|
|
17
17
|
from vdata.timepoint.array import as_timepointarray
|
|
18
18
|
from vdata.update.array import update_array
|
|
19
|
+
from vdata.update.utils import save_class_info
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
def _update_tdf_v0_to_v1(data: ch.H5Dict[Any], output_file: Path | None, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
@@ -63,7 +64,7 @@ def _update_tdf_v1_to_v2(data: ch.H5Dict[Any], output_file: Path | None, **kwarg
|
|
|
63
64
|
(data @ "timepoints_index" @ "timepoints").attributes.set(
|
|
64
65
|
__h5_type__="object",
|
|
65
66
|
__h5_class__=np.void(pickle.dumps(TimePointNArray, protocol=pickle.HIGHEST_PROTOCOL)),
|
|
66
|
-
unit="
|
|
67
|
+
unit=data["timepoints_array"][0][-1],
|
|
67
68
|
)
|
|
68
69
|
|
|
69
70
|
del data["timepoints_array"]
|
|
@@ -80,7 +81,6 @@ def _update_tdf_v2_to_v3(data: ch.H5Dict[Any], output_file: Path | None, **kwarg
|
|
|
80
81
|
"repeating_index": bool(data.attributes["repeating_index"]),
|
|
81
82
|
"locked_indices": bool(data.attributes["locked_indices"]),
|
|
82
83
|
"locked_columns": bool(data.attributes["locked_columns"]),
|
|
83
|
-
Attribute.EZType: EZType.Object,
|
|
84
84
|
}
|
|
85
85
|
)
|
|
86
86
|
|
|
@@ -90,23 +90,12 @@ def _update_tdf_v2_to_v3(data: ch.H5Dict[Any], output_file: Path | None, **kwarg
|
|
|
90
90
|
"timepoints": data @ "timepoints_index" @ "timepoints",
|
|
91
91
|
"ranges": data @ "timepoints_index" @ "ranges",
|
|
92
92
|
}
|
|
93
|
-
ez_data["timepoints_index"]
|
|
94
|
-
with warnings.catch_warnings(action="ignore", category=UnstableSpecificationWarning):
|
|
95
|
-
(ez_data @ "timepoints_index").create_array(
|
|
96
|
-
Attribute.EZClass,
|
|
97
|
-
data=np.void(pickle.dumps(TimePointIndex, protocol=pickle.HIGHEST_PROTOCOL)),
|
|
98
|
-
overwrite=True,
|
|
99
|
-
)
|
|
93
|
+
save_class_info(TimePointIndex, ez_data["timepoints_index"])
|
|
100
94
|
|
|
101
95
|
(ez_data @ "timepoints_index" @ "timepoints").attrs.put( # pyright: ignore[reportOperatorIssue]
|
|
102
|
-
{"unit": str(data["timepoints_index"]["timepoints"].attributes["unit"])
|
|
96
|
+
{"unit": str(data["timepoints_index"]["timepoints"].attributes["unit"])}
|
|
103
97
|
)
|
|
104
|
-
|
|
105
|
-
(ez_data @ "timepoints_index" @ "timepoints").create_array( # pyright: ignore[reportOperatorIssue]
|
|
106
|
-
Attribute.EZClass,
|
|
107
|
-
data=np.void(pickle.dumps(TimePointZArray, protocol=pickle.HIGHEST_PROTOCOL)),
|
|
108
|
-
overwrite=True,
|
|
109
|
-
)
|
|
98
|
+
save_class_info(TimePointZArray, ez_data @ "timepoints_index" @ "timepoints") # pyright: ignore[reportOperatorIssue]
|
|
110
99
|
|
|
111
100
|
ez_data["index"] = data @ "index"
|
|
112
101
|
ez_data["columns_numerical"] = data @ "columns_numerical"
|
|
@@ -114,12 +103,7 @@ def _update_tdf_v2_to_v3(data: ch.H5Dict[Any], output_file: Path | None, **kwarg
|
|
|
114
103
|
ez_data["array_numerical"] = data @ "numerical_array"
|
|
115
104
|
ez_data["array_string"] = data @ "string_array"
|
|
116
105
|
|
|
117
|
-
|
|
118
|
-
ez_data.create_array(
|
|
119
|
-
Attribute.EZClass,
|
|
120
|
-
data=np.void(pickle.dumps(vdata.TemporalDataFrame, protocol=pickle.HIGHEST_PROTOCOL)),
|
|
121
|
-
overwrite=True,
|
|
122
|
-
)
|
|
106
|
+
save_class_info(vdata.TemporalDataFrame, ez_data)
|
|
123
107
|
|
|
124
108
|
|
|
125
109
|
class tdf_updator(Protocol):
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import shutil
|
|
4
|
+
import warnings
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import Any
|
|
6
7
|
|
|
@@ -12,6 +13,7 @@ from h5dataframe import H5DataFrame
|
|
|
12
13
|
from tqdm.auto import tqdm
|
|
13
14
|
from zarr.codecs.numcodecs import LZ4
|
|
14
15
|
from zarr.core.array import CompressorsLike
|
|
16
|
+
from zarr.errors import ZarrUserWarning
|
|
15
17
|
|
|
16
18
|
import vdata
|
|
17
19
|
from vdata.data.name import WRITE_PROTOCOL_VERSION
|
|
@@ -133,21 +135,25 @@ def _get_output_file(data_file: Path, output_file: str | Path | None, from_versi
|
|
|
133
135
|
return ez_filename
|
|
134
136
|
|
|
135
137
|
|
|
138
|
+
with warnings.catch_warnings(action="ignore", category=ZarrUserWarning):
|
|
139
|
+
_default_compressors = (LZ4(),)
|
|
140
|
+
|
|
141
|
+
|
|
136
142
|
def update_vdata(
|
|
137
143
|
data: Path | str | ch.H5Dict[Any] | ez.EZDict[Any],
|
|
138
144
|
*,
|
|
139
145
|
output_file: str | Path | None = None,
|
|
140
146
|
verbose: bool = False,
|
|
141
|
-
compressors: CompressorsLike =
|
|
147
|
+
compressors: CompressorsLike = _default_compressors,
|
|
142
148
|
) -> tuple[int, ez.EZDict[Any]]:
|
|
143
149
|
"""
|
|
144
|
-
Update
|
|
150
|
+
Update a saved vdata from an older version.
|
|
145
151
|
|
|
146
152
|
Args:
|
|
147
153
|
data: path to the h5 file to update.
|
|
148
154
|
output_file: path to the updated output vdata file.
|
|
149
155
|
verbose: print a progress bar ? (default: False)
|
|
150
|
-
compressors:
|
|
156
|
+
compressors: zarr compressors to use when writing Arrays. (default: LZ4)
|
|
151
157
|
"""
|
|
152
158
|
if isinstance(data, ez.EZDict):
|
|
153
159
|
assert data.attrs.get("__vdata_write_version__") == WRITE_PROTOCOL_VERSION, (
|
|
@@ -175,7 +181,7 @@ def update_vdata(
|
|
|
175
181
|
|
|
176
182
|
for v in range(data_version, WRITE_PROTOCOL_VERSION):
|
|
177
183
|
progressBar: tqdm[Any] | NoBar = (
|
|
178
|
-
tqdm(total=nb_items_to_write, desc=f"
|
|
184
|
+
tqdm(total=nb_items_to_write, desc=f"Updating VData {filename} [version {v} => {v + 1}]", unit="object")
|
|
179
185
|
if verbose
|
|
180
186
|
else NoBar()
|
|
181
187
|
)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import pickle
|
|
2
|
+
import warnings
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import ezarr as ez
|
|
6
|
+
import numpy as np
|
|
7
|
+
from ezarr.names import Attribute, EZType
|
|
8
|
+
from zarr.errors import UnstableSpecificationWarning
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def save_class_info(klass: type, ez_data: ez.EZDict[Any]) -> None:
|
|
12
|
+
ez_data.attrs.update(
|
|
13
|
+
{
|
|
14
|
+
Attribute.EZType: EZType.Object,
|
|
15
|
+
}
|
|
16
|
+
)
|
|
17
|
+
with warnings.catch_warnings(action="ignore", category=UnstableSpecificationWarning):
|
|
18
|
+
ez_data.create_array(
|
|
19
|
+
Attribute.EZClass,
|
|
20
|
+
data=np.void(pickle.dumps(klass, protocol=pickle.HIGHEST_PROTOCOL)), # pyright: ignore[reportArgumentType]
|
|
21
|
+
overwrite=True,
|
|
22
|
+
)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import pickle
|
|
4
|
-
import warnings
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
from typing import Any, Protocol
|
|
7
6
|
|
|
@@ -11,11 +10,11 @@ import numpy as np
|
|
|
11
10
|
import numpy.typing as npt
|
|
12
11
|
import zarr
|
|
13
12
|
from ezarr.dataframe import EZDataFrame
|
|
14
|
-
from ezarr.names import Attribute, EZType
|
|
15
13
|
from h5dataframe import H5DataFrame
|
|
16
|
-
from zarr.errors import UnstableSpecificationWarning
|
|
17
14
|
|
|
15
|
+
import vdata.timepoint as tp
|
|
18
16
|
from vdata.update.array import update_array
|
|
17
|
+
from vdata.update.utils import save_class_info
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
def get_common_dtype(dt1: npt.DTypeLike, dt2: npt.DTypeLike) -> type[np.generic]:
|
|
@@ -76,24 +75,26 @@ def _update_vdf_v2_to_v3(data: ch.H5Dict[Any], output_file: Path | None, **kwarg
|
|
|
76
75
|
assert output_file is not None
|
|
77
76
|
ez_data = ez.EZDict[Any](zarr.open_group(output_file, path=data.file.name))
|
|
78
77
|
|
|
79
|
-
ez_data.
|
|
80
|
-
{
|
|
81
|
-
Attribute.EZType: EZType.Object,
|
|
82
|
-
}
|
|
83
|
-
)
|
|
78
|
+
is_timepoints = ez_data.group.basename == "timepoints"
|
|
84
79
|
|
|
85
80
|
compressors = kwargs.get("compressors")
|
|
86
81
|
with ez_data.parameters(compressors):
|
|
87
82
|
ez_data["index"] = data @ "index"
|
|
88
|
-
ez_data["arrays"] = {
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
)
|
|
83
|
+
ez_data["arrays"] = {
|
|
84
|
+
name: arr for name, arr in (data @ "arrays").items() if not is_timepoints or not name == "value"
|
|
85
|
+
}
|
|
86
|
+
ez_data["arrays"].attrs.put(
|
|
87
|
+
{"columns_order": [name for name in (data @ "arrays").keys() if not is_timepoints or not name == "value"]}
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
if is_timepoints:
|
|
91
|
+
tps = tp.as_timepointarray(data["arrays"]["value"])
|
|
92
|
+
ez_data["arrays"]["value"] = np.array(tps)
|
|
93
|
+
ez_data["arrays"]["unit"] = np.repeat(tps.unit, len(tps))
|
|
94
|
+
|
|
95
|
+
ez_data["arrays"].attrs["columns_order"] = ["value", "unit"] + ez_data["arrays"].attrs["columns_order"]
|
|
96
|
+
|
|
97
|
+
save_class_info(EZDataFrame, ez_data)
|
|
97
98
|
|
|
98
99
|
|
|
99
100
|
class vdf_updator(Protocol):
|
|
@@ -1,24 +1,26 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import shutil
|
|
4
|
+
from collections.abc import Collection, Mapping, Sequence
|
|
4
5
|
from itertools import islice
|
|
5
6
|
from math import ceil, floor
|
|
6
|
-
from
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Literal, TypeGuard
|
|
7
9
|
|
|
8
10
|
import numpy as np
|
|
9
11
|
import numpy.typing as npt
|
|
10
12
|
import zarr
|
|
13
|
+
from tqdm.auto import tqdm
|
|
11
14
|
|
|
12
15
|
from vdata.array_view import NDArrayView
|
|
16
|
+
from vdata.IO.errors import InvalidVDataFileError
|
|
13
17
|
|
|
14
18
|
if TYPE_CHECKING:
|
|
15
19
|
from vdata._typing import PreSlicer
|
|
16
20
|
|
|
17
|
-
_V = TypeVar("_V")
|
|
18
|
-
|
|
19
21
|
|
|
20
22
|
# misc ------------------------------------------------------------------------
|
|
21
|
-
def first_in(d: Mapping[Any,
|
|
23
|
+
def first_in[V](d: Mapping[Any, V]) -> V:
|
|
22
24
|
return next(iter(d.values()))
|
|
23
25
|
|
|
24
26
|
|
|
@@ -105,7 +107,7 @@ def repr_index(
|
|
|
105
107
|
|
|
106
108
|
|
|
107
109
|
# type coercion ---------------------------------------------------------------
|
|
108
|
-
def deep_dict_convert(obj:
|
|
110
|
+
def deep_dict_convert(obj: Any) -> dict[Any, Any]:
|
|
109
111
|
"""
|
|
110
112
|
'Deep' convert a mapping of any kind (and children mappings) into regular dictionaries.
|
|
111
113
|
|
|
@@ -118,4 +120,63 @@ def deep_dict_convert(obj: Mapping[Any, Any]) -> dict[Any, Any]:
|
|
|
118
120
|
if not isinstance(obj, Mapping):
|
|
119
121
|
return obj
|
|
120
122
|
|
|
121
|
-
return {k: deep_dict_convert(v) for k, v in obj.items()}
|
|
123
|
+
return {k: deep_dict_convert(v) for k, v in obj.items()} # pyright: ignore[reportUnknownVariableType]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
# copy ------------------------------------------------------------------------
|
|
127
|
+
def is_valid_storage(path: Path) -> bool:
|
|
128
|
+
if not path.exists() or not path.is_dir():
|
|
129
|
+
return False
|
|
130
|
+
|
|
131
|
+
sub_dir = [p.name for p in path.iterdir()]
|
|
132
|
+
|
|
133
|
+
for key in ("layers", "obs", "var", "timepoints", "zarr.json"):
|
|
134
|
+
if key not in sub_dir:
|
|
135
|
+
return False
|
|
136
|
+
|
|
137
|
+
return True
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def copy_vdata(
|
|
141
|
+
source: str | Path,
|
|
142
|
+
destination: str | Path,
|
|
143
|
+
exclude: list[Literal["obsm", "obsp", "varm", "varp", "uns"]],
|
|
144
|
+
verbose: bool = False,
|
|
145
|
+
) -> None:
|
|
146
|
+
source = Path(source)
|
|
147
|
+
destination = Path(destination).with_suffix(".vd")
|
|
148
|
+
|
|
149
|
+
if not source.exists():
|
|
150
|
+
raise FileNotFoundError("")
|
|
151
|
+
|
|
152
|
+
if not is_valid_storage(source):
|
|
153
|
+
raise InvalidVDataFileError(f"{source} is not a valid stored VData")
|
|
154
|
+
|
|
155
|
+
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
156
|
+
|
|
157
|
+
# for file in filter(lambda p: p.name not in exclude, source.iterdir()):
|
|
158
|
+
def _ignore(src: str, _) -> Sequence[str]:
|
|
159
|
+
if src == str(source):
|
|
160
|
+
return exclude
|
|
161
|
+
|
|
162
|
+
return ()
|
|
163
|
+
|
|
164
|
+
if verbose:
|
|
165
|
+
total = (
|
|
166
|
+
sum(
|
|
167
|
+
len([file for file in dir.rglob("*") if file.is_file()])
|
|
168
|
+
for dir in source.iterdir()
|
|
169
|
+
if dir.name not in exclude
|
|
170
|
+
)
|
|
171
|
+
+ 1
|
|
172
|
+
)
|
|
173
|
+
progress = tqdm(total=total, desc=f"Copying VData {source}", unit="files")
|
|
174
|
+
|
|
175
|
+
def _copy(src: str, dst: str) -> str:
|
|
176
|
+
progress.update()
|
|
177
|
+
return shutil.copy2(src, dst)
|
|
178
|
+
|
|
179
|
+
shutil.copytree(source, destination, ignore=_ignore, copy_function=_copy)
|
|
180
|
+
|
|
181
|
+
else:
|
|
182
|
+
shutil.copytree(source, destination, ignore=_ignore)
|
vdata-0.3.3/vdata/cli.py
DELETED
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
import argparse
|
|
2
|
-
import traceback
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
import ch5mpy as ch
|
|
6
|
-
from py import sys
|
|
7
|
-
|
|
8
|
-
from vdata.update.update import update_vdata
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def print_err(msg: str) -> None:
|
|
12
|
-
print("\033[31m[ERROR] " + msg + "\033[0m", file=sys.stderr)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def main() -> int:
|
|
16
|
-
parser = argparse.ArgumentParser(prog="vdata-update", description="Update a VData from an older version")
|
|
17
|
-
|
|
18
|
-
parser.add_argument("filename")
|
|
19
|
-
parser.add_argument("-o", "--out-file", default=None, type=str)
|
|
20
|
-
parser.add_argument("-v", "--verbose", default=False, action="store_true")
|
|
21
|
-
|
|
22
|
-
args = parser.parse_args()
|
|
23
|
-
|
|
24
|
-
data = ch.H5Dict.read(args.filename, mode=ch.H5Mode.READ_WRITE)
|
|
25
|
-
|
|
26
|
-
ez_filename = Path(data.filename)
|
|
27
|
-
ez_filename = ez_filename.with_stem("~" + ez_filename.stem)
|
|
28
|
-
|
|
29
|
-
try:
|
|
30
|
-
update_vdata(data, output_file=args.out_file, verbose=args.verbose)
|
|
31
|
-
|
|
32
|
-
except Exception as e:
|
|
33
|
-
print_err(" ".join(filter(lambda a: isinstance(a, str), e.args))) # pyright: ignore[reportUnnecessaryIsInstance]
|
|
34
|
-
|
|
35
|
-
if args.verbose:
|
|
36
|
-
traceback.print_tb(e.__traceback__)
|
|
37
|
-
|
|
38
|
-
return 1
|
|
39
|
-
|
|
40
|
-
print("\033[32m[Done]\033[0m")
|
|
41
|
-
return 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|