vdata 0.2.3__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vdata-0.2.3 → vdata-0.3.0}/PKG-INFO +14 -18
- vdata-0.3.0/pyproject.toml +60 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/IO/__init__.py +1 -2
- {vdata-0.2.3 → vdata-0.3.0}/vdata/IO/errors.py +10 -17
- {vdata-0.2.3 → vdata-0.3.0}/vdata/__init__.py +0 -4
- vdata-0.3.0/vdata/_meta.py +14 -0
- vdata-0.3.0/vdata/_typing.py +39 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/anndata_proxy/anndata.py +19 -34
- {vdata-0.2.3 → vdata-0.3.0}/vdata/anndata_proxy/containers.py +39 -45
- {vdata-0.2.3 → vdata-0.3.0}/vdata/anndata_proxy/dataframe.py +2 -11
- {vdata-0.2.3 → vdata-0.3.0}/vdata/array_view.py +53 -43
- vdata-0.3.0/vdata/cli.py +41 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/_indexing.py +10 -7
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/_parse/anndata.py +9 -8
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/_parse/data.py +79 -79
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/_parse/objects/layers.py +7 -7
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/_parse/objects/objects.py +6 -4
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/_parse/objects/obs.py +22 -20
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/_parse/objects/var.py +18 -16
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/_parse/time.py +14 -11
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/_parse/utils.py +2 -4
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/arrays/base.py +42 -59
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/arrays/layers.py +17 -32
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/arrays/lazy.py +8 -11
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/arrays/obs.py +48 -77
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/arrays/var.py +39 -61
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/arrays/view.py +40 -39
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/concatenate.py +19 -21
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/convert.py +30 -20
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/hash.py +5 -12
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/read.py +8 -7
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/vdata.py +105 -143
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/view.py +40 -61
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/write.py +47 -53
- {vdata-0.2.3 → vdata-0.3.0}/vdata/names.py +4 -5
- {vdata-0.2.3 → vdata-0.3.0}/vdata/tdf/_parse.py +23 -26
- {vdata-0.2.3 → vdata-0.3.0}/vdata/tdf/base.py +185 -187
- {vdata-0.2.3 → vdata-0.3.0}/vdata/tdf/dataframe.py +82 -93
- {vdata-0.2.3 → vdata-0.3.0}/vdata/tdf/index.py +19 -18
- {vdata-0.2.3 → vdata-0.3.0}/vdata/tdf/indexers.py +1 -13
- {vdata-0.2.3 → vdata-0.3.0}/vdata/tdf/indexing.py +11 -9
- {vdata-0.2.3 → vdata-0.3.0}/vdata/tdf/view.py +44 -39
- vdata-0.3.0/vdata/timepoint/__init__.py +15 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/timepoint/_functions.py +9 -11
- vdata-0.3.0/vdata/timepoint/_typing.py +3 -0
- vdata-0.3.0/vdata/timepoint/array.py +437 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/timepoint/index.py +40 -49
- {vdata-0.2.3 → vdata-0.3.0}/vdata/timepoint/range.py +16 -23
- {vdata-0.2.3 → vdata-0.3.0}/vdata/timepoint/timepoint.py +25 -25
- vdata-0.3.0/vdata/update/array.py +32 -0
- vdata-0.3.0/vdata/update/dict.py +41 -0
- vdata-0.3.0/vdata/update/tdf.py +127 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/update/update.py +92 -31
- vdata-0.3.0/vdata/update/vdf.py +97 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/utils.py +14 -18
- vdata-0.2.3/pyproject.toml +0 -41
- vdata-0.2.3/vdata/_meta.py +0 -3
- vdata-0.2.3/vdata/_typing.py +0 -45
- vdata-0.2.3/vdata/data/_file.py +0 -20
- vdata-0.2.3/vdata/timepoint/__init__.py +0 -6
- vdata-0.2.3/vdata/timepoint/_typing.py +0 -3
- vdata-0.2.3/vdata/timepoint/array.py +0 -240
- vdata-0.2.3/vdata/update/array.py +0 -21
- vdata-0.2.3/vdata/update/dict.py +0 -26
- vdata-0.2.3/vdata/update/tdf.py +0 -63
- vdata-0.2.3/vdata/update/vdf.py +0 -71
- {vdata-0.2.3 → vdata-0.3.0}/LICENSE +0 -0
- {vdata-0.2.3 → vdata-0.3.0}/README.md +0 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/IO/logger.conf +0 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/IO/logger.py +0 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/anndata_proxy/__init__.py +0 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/__init__.py +0 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/_parse/__init__.py +0 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/_parse/objects/__init__.py +0 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/_parse/objects/uns.py +0 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/data/arrays/__init__.py +0 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/py.typed +0 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/tdf/__init__.py +0 -0
- {vdata-0.2.3 → vdata-0.3.0}/vdata/update/__init__.py +0 -0
|
@@ -1,23 +1,20 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: vdata
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Annotated multivariate observation of timestamped data
|
|
5
|
-
License: LICENSE
|
|
6
5
|
Author: Matteo Bouvier
|
|
7
|
-
Author-email:
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
Requires-Dist:
|
|
15
|
-
Requires-Dist:
|
|
16
|
-
Requires-Dist: h5dataframe
|
|
17
|
-
Requires-
|
|
18
|
-
|
|
19
|
-
Requires-Dist: scipy (>=1.12.0,<2.0.0)
|
|
20
|
-
Requires-Dist: tqdm (>=4.66.1,<5.0.0)
|
|
6
|
+
Author-email: Matteo Bouvier <matteo.bouvier@hotmail.fr>
|
|
7
|
+
License-Expression: CECILL-B
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: tqdm>=4.66.1
|
|
10
|
+
Requires-Dist: anndata>=0.10.4
|
|
11
|
+
Requires-Dist: scipy>=1.12.0
|
|
12
|
+
Requires-Dist: numpy-indexed>=0.3.7
|
|
13
|
+
Requires-Dist: ch5mpy>=0.5.1
|
|
14
|
+
Requires-Dist: ezarr
|
|
15
|
+
Requires-Dist: h5dataframe>=0.2.3 ; extra == 'update'
|
|
16
|
+
Requires-Python: >=3.12
|
|
17
|
+
Provides-Extra: update
|
|
21
18
|
Description-Content-Type: text/markdown
|
|
22
19
|
|
|
23
20
|
# 🗂 VData
|
|
@@ -124,4 +121,3 @@ You can cite the **VData** pre-print as :
|
|
|
124
121
|
> Matteo Bouvier, Arnaud Bonnaffoux
|
|
125
122
|
>
|
|
126
123
|
> bioRxiv 2023.08.29.555297; doi: https://doi.org/10.1101/2023.08.29.555297
|
|
127
|
-
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "vdata"
|
|
3
|
+
version = "0.3.0"
|
|
4
|
+
description = "Annotated multivariate observation of timestamped data"
|
|
5
|
+
authors = [{ name = "Matteo Bouvier", email="matteo.bouvier@hotmail.fr"}]
|
|
6
|
+
license = "CECILL-B"
|
|
7
|
+
license-files = ["LICENSE"]
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
requires-python = ">=3.12"
|
|
10
|
+
|
|
11
|
+
dependencies = [
|
|
12
|
+
"tqdm>=4.66.1",
|
|
13
|
+
"anndata>=0.10.4",
|
|
14
|
+
"scipy>=1.12.0",
|
|
15
|
+
"numpy-indexed>=0.3.7",
|
|
16
|
+
"ch5mpy>=0.5.1",
|
|
17
|
+
"ezarr",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[dependency-groups]
|
|
21
|
+
dev = [
|
|
22
|
+
"pytest>=7.4.4",
|
|
23
|
+
"pytest-xdist>=3.8.0",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
docs= [
|
|
27
|
+
"mkdocs-material>=9.5.10",
|
|
28
|
+
# mkdocstrings = {extras = ["python"], version = "^0.24.0"}
|
|
29
|
+
"mkdocs>=1.5.3"
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
vdata-update = "vdata.cli:main"
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
update = [
|
|
37
|
+
"h5dataframe>=0.2.3",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[build-system]
|
|
41
|
+
requires = ["uv_build>=0.8.15,<0.9.0"]
|
|
42
|
+
build-backend = "uv_build"
|
|
43
|
+
|
|
44
|
+
[tool.uv.build-backend]
|
|
45
|
+
module-root = ""
|
|
46
|
+
|
|
47
|
+
[tool.uv.sources]
|
|
48
|
+
ezarr = { path = "../ezarr" }
|
|
49
|
+
|
|
50
|
+
[tool.ruff]
|
|
51
|
+
line-length = 120
|
|
52
|
+
extend-select = ["I"]
|
|
53
|
+
|
|
54
|
+
[tool.pyright]
|
|
55
|
+
reportAny = false
|
|
56
|
+
reportExplicitAny = false
|
|
57
|
+
reportUnknownMemberType = false
|
|
58
|
+
reportUnknownArgumentType = false
|
|
59
|
+
reportUnusedCallResult = false
|
|
60
|
+
reportMissingTypeStubs = false
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .errors import IncoherenceError, ShapeError, VBaseError,
|
|
1
|
+
from .errors import IncoherenceError, ShapeError, VBaseError, VLockError, VReadOnlyError
|
|
2
2
|
from .logger import generalLogger
|
|
3
3
|
|
|
4
4
|
__all__ = [
|
|
@@ -7,6 +7,5 @@ __all__ = [
|
|
|
7
7
|
"ShapeError",
|
|
8
8
|
"IncoherenceError",
|
|
9
9
|
"VLockError",
|
|
10
|
-
"VClosedFileError",
|
|
11
10
|
"VReadOnlyError",
|
|
12
11
|
]
|
|
@@ -1,14 +1,18 @@
|
|
|
1
|
-
from
|
|
1
|
+
from typing import override
|
|
2
2
|
|
|
3
|
+
from vdata.IO import logger
|
|
3
4
|
|
|
4
|
-
|
|
5
|
-
class VBaseError(
|
|
5
|
+
|
|
6
|
+
class VBaseError(BaseException):
|
|
6
7
|
"""
|
|
7
8
|
Base class for custom error. Error messages are redirected to the logger instead of being printed directly.
|
|
8
9
|
"""
|
|
10
|
+
|
|
9
11
|
def __init__(self, msg: str = ""):
|
|
10
|
-
|
|
12
|
+
super().__init__()
|
|
13
|
+
self.msg: str = msg
|
|
11
14
|
|
|
15
|
+
@override
|
|
12
16
|
def __str__(self) -> str:
|
|
13
17
|
logger.generalLogger.error(self.msg)
|
|
14
18
|
return self.msg
|
|
@@ -18,29 +22,18 @@ class ShapeError(VBaseError):
|
|
|
18
22
|
"""
|
|
19
23
|
Custom error for errors in variable shapes.
|
|
20
24
|
"""
|
|
21
|
-
pass
|
|
22
25
|
|
|
23
26
|
|
|
24
27
|
class IncoherenceError(VBaseError):
|
|
25
28
|
"""
|
|
26
29
|
Custom error for incoherent data formats.
|
|
27
30
|
"""
|
|
28
|
-
pass
|
|
29
31
|
|
|
30
32
|
|
|
31
33
|
class VLockError(VBaseError):
|
|
32
34
|
"""
|
|
33
35
|
Custom error for tdf lock errors.
|
|
34
36
|
"""
|
|
35
|
-
pass
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class VClosedFileError(VBaseError):
|
|
39
|
-
"""
|
|
40
|
-
Custom error for tdf lock errors.
|
|
41
|
-
"""
|
|
42
|
-
def __init__(self, msg: str = ""):
|
|
43
|
-
self.msg = "Closed backing file !"
|
|
44
37
|
|
|
45
38
|
|
|
46
39
|
class VReadOnlyError(VBaseError):
|
|
@@ -49,5 +42,5 @@ class VReadOnlyError(VBaseError):
|
|
|
49
42
|
"""
|
|
50
43
|
|
|
51
44
|
def __init__(self, msg: str = ""):
|
|
52
|
-
|
|
53
|
-
|
|
45
|
+
super().__init__(msg="Read-only file !")
|
|
46
|
+
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
"""Annotated, temporal and multivariate observation data."""
|
|
2
2
|
|
|
3
|
-
from ch5mpy import H5Mode
|
|
4
|
-
|
|
5
3
|
from vdata.data import VData, VDataView, concatenate, convert_anndata_to_vdata
|
|
6
4
|
from vdata.IO import (
|
|
7
5
|
IncoherenceError,
|
|
@@ -18,8 +16,6 @@ read_from_csv = VData.read_from_csv
|
|
|
18
16
|
read_from_anndata = VData.read_from_anndata
|
|
19
17
|
read_from_pickle = VData.read_from_pickle
|
|
20
18
|
|
|
21
|
-
mode = H5Mode
|
|
22
|
-
|
|
23
19
|
__all__ = [
|
|
24
20
|
"VData",
|
|
25
21
|
"TemporalDataFrame",
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from abc import ABCMeta
|
|
2
|
+
from typing import override
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class PrettyRepr(type):
|
|
6
|
+
@override
|
|
7
|
+
def __repr__(self) -> str:
|
|
8
|
+
return f"vdata.{self.__name__}"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PrettyReprABC(ABCMeta):
|
|
12
|
+
@override
|
|
13
|
+
def __repr__(self) -> str:
|
|
14
|
+
return f"vdata.{self.__name__}"
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from collections.abc import Collection, MutableMapping
|
|
2
|
+
from types import EllipsisType
|
|
3
|
+
from typing import SupportsIndex, TypedDict
|
|
4
|
+
|
|
5
|
+
import ezarr as ez
|
|
6
|
+
import numpy as np
|
|
7
|
+
import numpy.typing as npt
|
|
8
|
+
import zarr
|
|
9
|
+
|
|
10
|
+
import vdata.timepoint as tp
|
|
11
|
+
from vdata.array_view import NDArrayView
|
|
12
|
+
|
|
13
|
+
type IF = int | float | np.integer | np.floating
|
|
14
|
+
type IFS = IF | np.str_ | str
|
|
15
|
+
type np_IF = np.integer | np.floating
|
|
16
|
+
type np_IFS = np.integer | np.floating | np.str_
|
|
17
|
+
|
|
18
|
+
type AnyNDArrayLike[T: np.generic] = npt.NDArray[T] | NDArrayView[T] | zarr.Array
|
|
19
|
+
type AnyNDArrayLike_IF = npt.NDArray[np_IF] | NDArrayView[np_IF] | zarr.Array
|
|
20
|
+
|
|
21
|
+
type NDArrayList_IFS = npt.NDArray[np_IFS] | list[IFS]
|
|
22
|
+
type NDArrayLike_IFS = npt.NDArray[np_IFS] | zarr.Array
|
|
23
|
+
type AnyNDArrayLike_IFS = npt.NDArray[np_IFS] | NDArrayView[np_IFS] | zarr.Array
|
|
24
|
+
|
|
25
|
+
type Collection_IFS = Collection[IFS]
|
|
26
|
+
type DictLike[T: ez.SupportsEZReadWrite] = MutableMapping[str, T] | ez.EZDict[T]
|
|
27
|
+
|
|
28
|
+
type Slicer = IFS | tp.TimePoint | Collection[IFS | tp.TimePoint] | range | slice | EllipsisType
|
|
29
|
+
type MultiSlicer = Collection[IFS | tp.TimePoint] | range | slice | EllipsisType
|
|
30
|
+
type PreSlicer = IFS | tp.TimePoint | Collection[IFS | bool | tp.TimePoint] | range | slice | EllipsisType
|
|
31
|
+
type Indexer = SupportsIndex | slice | npt.NDArray[np.int_] | npt.NDArray[np.bool_] | None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class AttrDict(TypedDict):
|
|
35
|
+
name: str
|
|
36
|
+
timepoints_column_name: str | None
|
|
37
|
+
locked_indices: bool
|
|
38
|
+
locked_columns: bool
|
|
39
|
+
repeating_index: bool
|
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from collections.abc import Sequence
|
|
3
4
|
from pathlib import Path
|
|
4
|
-
from typing import TYPE_CHECKING, Any, Literal,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Literal, override
|
|
5
6
|
|
|
6
7
|
import numpy.typing as npt
|
|
7
8
|
import pandas as pd
|
|
8
9
|
from anndata import AnnData
|
|
9
|
-
from h5dataframe import H5DataFrame
|
|
10
10
|
from scipy import sparse
|
|
11
11
|
|
|
12
12
|
from vdata._typing import AnyNDArrayLike, DictLike
|
|
13
|
-
from vdata.anndata_proxy.containers import ArrayStack2DProxy,
|
|
13
|
+
from vdata.anndata_proxy.containers import ArrayStack2DProxy, EZDataFrameContainerProxy, TemporalDataFrameContainerProxy
|
|
14
14
|
from vdata.anndata_proxy.dataframe import DataFrameProxy_TDF
|
|
15
|
-
from vdata.data._file import NoData
|
|
16
15
|
|
|
17
16
|
if TYPE_CHECKING:
|
|
18
17
|
from vdata.data import VData, VDataView
|
|
@@ -30,9 +29,8 @@ class AnnDataProxy(AnnData): # type: ignore[misc]
|
|
|
30
29
|
Class faking to be an anndata.AnnData object but actually wrapping a VData.
|
|
31
30
|
"""
|
|
32
31
|
|
|
33
|
-
__slots__ = "_vdata", "_X", "_layers", "_obs", "_obsm", "_obsp", "_var", "_varm", "_varp", "_uns"
|
|
32
|
+
__slots__: tuple[str, ...] = "_vdata", "_X", "_layers", "_obs", "_obsm", "_obsp", "_var", "_varm", "_varp", "_uns"
|
|
34
33
|
|
|
35
|
-
# region magic methods
|
|
36
34
|
def __init__(self, vdata: VData | VDataView, X: str | None = None) -> None:
|
|
37
35
|
"""
|
|
38
36
|
Args:
|
|
@@ -51,10 +49,10 @@ class AnnDataProxy(AnnData): # type: ignore[misc]
|
|
|
51
49
|
self._layers = TemporalDataFrameContainerProxy(vdata, name="layers", columns=vdata.var.index)
|
|
52
50
|
self._obs = DataFrameProxy_TDF(vdata.obs)
|
|
53
51
|
self._obsm = TemporalDataFrameContainerProxy(vdata, name="obsm", columns=None)
|
|
54
|
-
self._obsp =
|
|
52
|
+
self._obsp = EZDataFrameContainerProxy(vdata.obsp, name="Obsp", index=vdata.obs.index, columns=vdata.obs.index)
|
|
55
53
|
self._var = vdata.var
|
|
56
|
-
self._varm =
|
|
57
|
-
self._varp =
|
|
54
|
+
self._varm = EZDataFrameContainerProxy(vdata.varm, name="Varm", index=vdata.var.index)
|
|
55
|
+
self._varp = EZDataFrameContainerProxy(vdata.varp, name="Varp", index=vdata.var.index, columns=vdata.var.index)
|
|
58
56
|
self._uns = vdata.uns
|
|
59
57
|
|
|
60
58
|
def __repr__(self) -> str:
|
|
@@ -74,9 +72,6 @@ class AnnDataProxy(AnnData): # type: ignore[misc]
|
|
|
74
72
|
def __setitem__(self, index: Any, val: int | float | npt.NDArray[Any] | sparse.spmatrix) -> None:
|
|
75
73
|
raise NotImplementedError
|
|
76
74
|
|
|
77
|
-
# endregion
|
|
78
|
-
|
|
79
|
-
# region attributes
|
|
80
75
|
@property
|
|
81
76
|
def _n_obs(self) -> int:
|
|
82
77
|
return self._vdata.n_obs_total
|
|
@@ -151,7 +146,7 @@ class AnnDataProxy(AnnData): # type: ignore[misc]
|
|
|
151
146
|
raise NotImplementedError
|
|
152
147
|
|
|
153
148
|
@property
|
|
154
|
-
def var(self) ->
|
|
149
|
+
def var(self) -> EZDataFrame:
|
|
155
150
|
"""One-dimensional annotation of variables/ features (`pd.DataFrame`)."""
|
|
156
151
|
return self._var
|
|
157
152
|
|
|
@@ -198,7 +193,7 @@ class AnnDataProxy(AnnData): # type: ignore[misc]
|
|
|
198
193
|
raise NotImplementedError
|
|
199
194
|
|
|
200
195
|
@property
|
|
201
|
-
def varm(self) ->
|
|
196
|
+
def varm(self) -> EZDataFrameContainerProxy:
|
|
202
197
|
return self._varm
|
|
203
198
|
|
|
204
199
|
@varm.setter
|
|
@@ -210,7 +205,7 @@ class AnnDataProxy(AnnData): # type: ignore[misc]
|
|
|
210
205
|
raise NotImplementedError
|
|
211
206
|
|
|
212
207
|
@property
|
|
213
|
-
def obsp(self) ->
|
|
208
|
+
def obsp(self) -> EZDataFrameContainerProxy:
|
|
214
209
|
return self._obsp
|
|
215
210
|
|
|
216
211
|
@obsp.setter
|
|
@@ -222,7 +217,7 @@ class AnnDataProxy(AnnData): # type: ignore[misc]
|
|
|
222
217
|
raise NotImplementedError
|
|
223
218
|
|
|
224
219
|
@property
|
|
225
|
-
def varp(self) ->
|
|
220
|
+
def varp(self) -> EZDataFrameContainerProxy:
|
|
226
221
|
return self._varp
|
|
227
222
|
|
|
228
223
|
@varp.setter
|
|
@@ -234,50 +229,42 @@ class AnnDataProxy(AnnData): # type: ignore[misc]
|
|
|
234
229
|
raise NotImplementedError
|
|
235
230
|
|
|
236
231
|
@property
|
|
237
|
-
|
|
238
|
-
if self._vdata.data is NoData._:
|
|
239
|
-
return None
|
|
240
|
-
return Path(self._vdata.data.filename)
|
|
241
|
-
|
|
242
|
-
@filename.setter
|
|
243
|
-
def filename(self, filename: Path | None) -> None:
|
|
244
|
-
raise NotImplementedError
|
|
245
|
-
|
|
246
|
-
# endregion
|
|
247
|
-
|
|
248
|
-
# region predicates
|
|
249
|
-
@property
|
|
232
|
+
@override
|
|
250
233
|
def isbacked(self) -> bool:
|
|
251
234
|
"""`True` if object is backed on disk, `False` otherwise."""
|
|
252
235
|
return self._vdata.is_backed
|
|
253
236
|
|
|
254
237
|
@property
|
|
238
|
+
@override
|
|
255
239
|
def is_view(self) -> bool:
|
|
256
240
|
"""`True` if object is view of another AnnData object, `False` otherwise."""
|
|
257
241
|
return self._vdata.is_view
|
|
258
242
|
|
|
259
|
-
# endregion
|
|
260
|
-
|
|
261
|
-
# region methods
|
|
262
243
|
def as_vdata(self) -> VData | VDataView:
|
|
263
244
|
return self._vdata
|
|
264
245
|
|
|
246
|
+
@override
|
|
265
247
|
def rename_categories(self, key: str, categories: Sequence[Any]) -> None:
|
|
266
248
|
raise NotImplementedError
|
|
267
249
|
|
|
250
|
+
@override
|
|
268
251
|
def strings_to_categoricals(self, df: pd.DataFrame | None = None) -> None:
|
|
269
252
|
raise NotImplementedError
|
|
270
253
|
|
|
254
|
+
@override
|
|
271
255
|
def _sanitize(self) -> None:
|
|
272
256
|
# prevent unwanted data modification in the underlying vdata object
|
|
273
257
|
return
|
|
274
258
|
|
|
259
|
+
@override
|
|
275
260
|
def _inplace_subset_var(self, index: Any) -> None:
|
|
276
261
|
self._init_from_vdata(self._vdata[skip_time_axis((slice(None), index))])
|
|
277
262
|
|
|
263
|
+
@override
|
|
278
264
|
def _inplace_subset_obs(self, index: Any) -> None:
|
|
279
265
|
self._init_from_vdata(self._vdata[skip_time_axis(index)])
|
|
280
266
|
|
|
267
|
+
@override
|
|
281
268
|
def copy(self, filename: Path | None = None) -> None:
|
|
282
269
|
"""Full copy, optionally on disk."""
|
|
283
270
|
raise NotImplementedError
|
|
@@ -290,5 +277,3 @@ class AnnDataProxy(AnnData): # type: ignore[misc]
|
|
|
290
277
|
as_dense: Sequence[str] = (),
|
|
291
278
|
) -> None:
|
|
292
279
|
raise NotImplementedError
|
|
293
|
-
|
|
294
|
-
# endregion
|
|
@@ -2,30 +2,42 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import operator as op
|
|
4
4
|
from functools import partialmethod
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import (
|
|
6
|
+
Any,
|
|
7
|
+
Callable,
|
|
8
|
+
ClassVar,
|
|
9
|
+
Collection,
|
|
10
|
+
ItemsView,
|
|
11
|
+
KeysView,
|
|
12
|
+
MutableMapping,
|
|
13
|
+
Union,
|
|
14
|
+
ValuesView,
|
|
15
|
+
cast,
|
|
16
|
+
override,
|
|
17
|
+
)
|
|
6
18
|
|
|
7
19
|
import numpy as np
|
|
8
20
|
import numpy.typing as npt
|
|
9
21
|
import scipy.sparse as ss
|
|
10
|
-
from h5dataframe import H5DataFrame
|
|
11
22
|
|
|
23
|
+
# from h5dataframe import EZDataFrame
|
|
12
24
|
import vdata
|
|
13
25
|
from vdata._typing import IFS, AnyNDArrayLike, AnyNDArrayLike_IFS
|
|
14
26
|
from vdata.tdf import TemporalDataFrame, TemporalDataFrameBase
|
|
15
27
|
|
|
16
|
-
_NP_IF =
|
|
28
|
+
type _NP_IF = np.integer | np.floating
|
|
17
29
|
|
|
18
30
|
|
|
19
31
|
class TemporalDataFrameContainerProxy:
|
|
20
|
-
__slots__ = "_vdata", "_tdfs", "_name", "_columns"
|
|
32
|
+
__slots__: tuple[str, ...] = "_vdata", "_tdfs", "_name", "_columns"
|
|
21
33
|
|
|
22
|
-
# region magic methods
|
|
23
34
|
def __init__(self, vdata: vdata.VData | vdata.VDataView, name: str, columns: Collection[IFS] | None) -> None:
|
|
24
|
-
self._vdata = vdata
|
|
35
|
+
self._vdata: vdata.VData | vdata.VDataView = vdata
|
|
25
36
|
self._tdfs: MutableMapping[str, TemporalDataFrameBase] = getattr(vdata, name)
|
|
26
|
-
self._name = name.capitalize()
|
|
27
|
-
self._columns = columns
|
|
37
|
+
self._name: str = name.capitalize()
|
|
38
|
+
self._columns: Collection[IFS] | None = columns
|
|
28
39
|
|
|
40
|
+
@override
|
|
29
41
|
def __repr__(self) -> str:
|
|
30
42
|
return f"{self._name} with keys: {', '.join(self._tdfs.keys())}"
|
|
31
43
|
|
|
@@ -46,9 +58,6 @@ class TemporalDataFrameContainerProxy:
|
|
|
46
58
|
def __contains__(self, key: str) -> bool:
|
|
47
59
|
return key in self.keys()
|
|
48
60
|
|
|
49
|
-
# endregion
|
|
50
|
-
|
|
51
|
-
# region methods
|
|
52
61
|
def keys(self) -> KeysView[str]:
|
|
53
62
|
return self._tdfs.keys()
|
|
54
63
|
|
|
@@ -58,21 +67,19 @@ class TemporalDataFrameContainerProxy:
|
|
|
58
67
|
def items(self) -> ItemsView[str, TemporalDataFrameBase]:
|
|
59
68
|
return self._tdfs.items()
|
|
60
69
|
|
|
61
|
-
# endregion
|
|
62
|
-
|
|
63
70
|
|
|
64
71
|
class ArrayStack2DProxy:
|
|
65
|
-
__slots__ = "_array_numeric", "_array_string", "layer_name"
|
|
66
|
-
ndim = 2
|
|
72
|
+
__slots__: tuple[str, ...] = "_array_numeric", "_array_string", "layer_name"
|
|
73
|
+
ndim: ClassVar[int] = 2
|
|
67
74
|
|
|
68
|
-
# region magic methods
|
|
69
75
|
def __init__(
|
|
70
76
|
self, array_numeric: AnyNDArrayLike[_NP_IF], array_string: AnyNDArrayLike[np.str_], layer_name: str | None
|
|
71
77
|
) -> None:
|
|
72
|
-
self._array_numeric = array_numeric if array_numeric.size else None
|
|
73
|
-
self._array_string = array_string if array_string.size else None
|
|
74
|
-
self.layer_name = layer_name
|
|
78
|
+
self._array_numeric: AnyNDArrayLike[_NP_IF] | None = array_numeric if array_numeric.size else None
|
|
79
|
+
self._array_string: AnyNDArrayLike[_NP_IF] | None = array_string if array_string.size else None
|
|
80
|
+
self.layer_name: str = layer_name
|
|
75
81
|
|
|
82
|
+
@override
|
|
76
83
|
def __repr__(self) -> str:
|
|
77
84
|
return repr(self.stack(n=5)) + "\n..." if self.shape[1] > 5 else ""
|
|
78
85
|
|
|
@@ -97,14 +104,14 @@ class ArrayStack2DProxy:
|
|
|
97
104
|
|
|
98
105
|
return cast(npt.NDArray[_NP_IF], operation(self._array_numeric, other))
|
|
99
106
|
|
|
100
|
-
__add__ = __radd__ = partialmethod(_op, operation=op.add)
|
|
101
|
-
__sub__ = __rsub__ = partialmethod(_op, operation=op.sub)
|
|
102
|
-
__mul__ = __rmul__ = partialmethod(_op, operation=op.mul)
|
|
103
|
-
__truediv__ = __rtruediv__ = partialmethod(_op, operation=op.truediv)
|
|
104
|
-
__gt__ = partialmethod(_op, operation=op.gt)
|
|
105
|
-
__ge__ = partialmethod(_op, operation=op.ge)
|
|
106
|
-
__lt__ = partialmethod(_op, operation=op.lt)
|
|
107
|
-
__le__ = partialmethod(_op, operation=op.le)
|
|
107
|
+
__add__ = __radd__ = partialmethod(_op, operation=op.add) # pyright: ignore[reportUnannotatedClassAttribute]
|
|
108
|
+
__sub__ = __rsub__ = partialmethod(_op, operation=op.sub) # pyright: ignore[reportUnannotatedClassAttribute]
|
|
109
|
+
__mul__ = __rmul__ = partialmethod(_op, operation=op.mul) # pyright: ignore[reportUnannotatedClassAttribute]
|
|
110
|
+
__truediv__ = __rtruediv__ = partialmethod(_op, operation=op.truediv) # pyright: ignore[reportUnannotatedClassAttribute]
|
|
111
|
+
__gt__ = partialmethod(_op, operation=op.gt) # pyright: ignore[reportUnannotatedClassAttribute]
|
|
112
|
+
__ge__ = partialmethod(_op, operation=op.ge) # pyright: ignore[reportUnannotatedClassAttribute]
|
|
113
|
+
__lt__ = partialmethod(_op, operation=op.lt) # pyright: ignore[reportUnannotatedClassAttribute]
|
|
114
|
+
__le__ = partialmethod(_op, operation=op.le) # pyright: ignore[reportUnannotatedClassAttribute]
|
|
108
115
|
|
|
109
116
|
def __array__(self, dtype: npt.DTypeLike = None) -> npt.NDArray[Any]:
|
|
110
117
|
if dtype is None:
|
|
@@ -112,9 +119,6 @@ class ArrayStack2DProxy:
|
|
|
112
119
|
|
|
113
120
|
return self.stack().astype(dtype)
|
|
114
121
|
|
|
115
|
-
# endregion
|
|
116
|
-
|
|
117
|
-
# region attributes
|
|
118
122
|
@property
|
|
119
123
|
def shape(self) -> tuple[int, int]:
|
|
120
124
|
if self._array_numeric is not None and self._array_string is not None:
|
|
@@ -141,9 +145,6 @@ class ArrayStack2DProxy:
|
|
|
141
145
|
|
|
142
146
|
return np.dtype(np.object_)
|
|
143
147
|
|
|
144
|
-
# endregion
|
|
145
|
-
|
|
146
|
-
# region methods
|
|
147
148
|
def stack(self, n: int | None = None) -> npt.NDArray[Any]:
|
|
148
149
|
_subset = slice(None) if n is None else slice(0, n)
|
|
149
150
|
|
|
@@ -174,16 +175,13 @@ class ArrayStack2DProxy:
|
|
|
174
175
|
None,
|
|
175
176
|
)
|
|
176
177
|
|
|
177
|
-
# endregion
|
|
178
|
-
|
|
179
178
|
|
|
180
|
-
class
|
|
181
|
-
__slots__ = "_h5dfs", "_index", "_columns", "_name", "_sparse_matrices"
|
|
179
|
+
class EZDataFrameContainerProxy:
|
|
180
|
+
__slots__: tuple[str, ...] = "_h5dfs", "_index", "_columns", "_name", "_sparse_matrices"
|
|
182
181
|
|
|
183
|
-
# region magic methods
|
|
184
182
|
def __init__(
|
|
185
183
|
self,
|
|
186
|
-
h5dfs: MutableMapping[str,
|
|
184
|
+
h5dfs: MutableMapping[str, EZDataFrame],
|
|
187
185
|
name: str,
|
|
188
186
|
index: AnyNDArrayLike_IFS,
|
|
189
187
|
columns: AnyNDArrayLike_IFS | None = None,
|
|
@@ -196,6 +194,7 @@ class H5DataFrameContainerProxy:
|
|
|
196
194
|
# FIXME : find better way
|
|
197
195
|
self._sparse_matrices: set[str] = set()
|
|
198
196
|
|
|
197
|
+
@override
|
|
199
198
|
def __repr__(self) -> str:
|
|
200
199
|
return f"{self._name} with keys: {', '.join(self._h5dfs.keys())}"
|
|
201
200
|
|
|
@@ -210,15 +209,10 @@ class H5DataFrameContainerProxy:
|
|
|
210
209
|
self._sparse_matrices.add(str(key))
|
|
211
210
|
value = np.array(value.todense())
|
|
212
211
|
|
|
213
|
-
self._h5dfs[str(key)] =
|
|
212
|
+
self._h5dfs[str(key)] = EZDataFrame(value, index=self._index, columns=self._columns)
|
|
214
213
|
|
|
215
214
|
def __contains__(self, key: str) -> bool:
|
|
216
215
|
return key in self._h5dfs.keys()
|
|
217
216
|
|
|
218
|
-
# endregion
|
|
219
|
-
|
|
220
|
-
# region methods
|
|
221
217
|
def keys(self) -> KeysView[str]:
|
|
222
218
|
return self._h5dfs.keys()
|
|
223
|
-
|
|
224
|
-
# endregion
|
|
@@ -7,20 +7,19 @@ import pandas as pd
|
|
|
7
7
|
|
|
8
8
|
from vdata._typing import IFS, Slicer
|
|
9
9
|
from vdata.tdf import TemporalDataFrameBase, TemporalDataFrameView
|
|
10
|
-
from vdata.timepoint import
|
|
10
|
+
from vdata.timepoint import TimePointNArray
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class DataFrameProxy_TDF:
|
|
14
14
|
__slots__ = ("_tdf",)
|
|
15
15
|
|
|
16
|
-
# region magic methods
|
|
17
16
|
def __init__(self, tdf: TemporalDataFrameBase) -> None:
|
|
18
17
|
self._tdf = tdf
|
|
19
18
|
|
|
20
19
|
def __repr__(self) -> str:
|
|
21
20
|
return f"Proxy<TDF -> DataFrame> for\n{self._tdf}"
|
|
22
21
|
|
|
23
|
-
def __getitem__(self, key: Slicer) -> pd.Series[int | float | str] |
|
|
22
|
+
def __getitem__(self, key: Slicer) -> pd.Series[int | float | str] | TimePointNArray:
|
|
24
23
|
if key == self._tdf.get_timepoints_column_name():
|
|
25
24
|
return self._tdf.timepoints_column
|
|
26
25
|
|
|
@@ -37,9 +36,6 @@ class DataFrameProxy_TDF:
|
|
|
37
36
|
def __iter__(self) -> Iterable[str]:
|
|
38
37
|
return iter(self.columns)
|
|
39
38
|
|
|
40
|
-
# endregion
|
|
41
|
-
|
|
42
|
-
# region attributes
|
|
43
39
|
@property
|
|
44
40
|
def index(self) -> pd.Index:
|
|
45
41
|
return pd.Index(self._tdf.index.values)
|
|
@@ -48,10 +44,5 @@ class DataFrameProxy_TDF:
|
|
|
48
44
|
def columns(self) -> pd.Index:
|
|
49
45
|
return pd.Index(np.concatenate((np.array([self._tdf.get_timepoints_column_name()]), self._tdf.columns)))
|
|
50
46
|
|
|
51
|
-
# endregion
|
|
52
|
-
|
|
53
|
-
# region methods
|
|
54
47
|
def keys(self) -> pd.Index:
|
|
55
48
|
return self.columns
|
|
56
|
-
|
|
57
|
-
# endregion
|