tsdf 0.5.0__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tsdf-0.5.0 → tsdf-0.5.2}/PKG-INFO +1 -1
- {tsdf-0.5.0 → tsdf-0.5.2}/pyproject.toml +1 -1
- {tsdf-0.5.0 → tsdf-0.5.2}/src/tsdf/__init__.py +16 -16
- {tsdf-0.5.0 → tsdf-0.5.2}/src/tsdf/numpy_utils.py +2 -0
- {tsdf-0.5.0 → tsdf-0.5.2}/src/tsdf/parse_metadata.py +25 -24
- {tsdf-0.5.0 → tsdf-0.5.2}/src/tsdf/read_binary.py +18 -15
- {tsdf-0.5.0 → tsdf-0.5.2}/src/tsdf/tsdfmetadata.py +6 -3
- {tsdf-0.5.0 → tsdf-0.5.2}/src/tsdf/validator.py +13 -7
- {tsdf-0.5.0 → tsdf-0.5.2}/LICENSE +0 -0
- {tsdf-0.5.0 → tsdf-0.5.2}/README.md +0 -0
- {tsdf-0.5.0 → tsdf-0.5.2}/src/tsdf/constants.py +0 -0
- {tsdf-0.5.0 → tsdf-0.5.2}/src/tsdf/file_utils.py +0 -0
- {tsdf-0.5.0 → tsdf-0.5.2}/src/tsdf/legacy_tsdf_utils.py +0 -0
- {tsdf-0.5.0 → tsdf-0.5.2}/src/tsdf/read_tsdf.py +0 -0
- {tsdf-0.5.0 → tsdf-0.5.2}/src/tsdf/write_binary.py +0 -0
- {tsdf-0.5.0 → tsdf-0.5.2}/src/tsdf/write_tsdf.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: tsdf
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: A Python library that provides methods for encoding and decoding TSDF (Time Series Data Format) data, which allows you to easily create, manipulate and serialize TSDF files in your Python code.
|
|
5
5
|
Home-page: https://github.com/biomarkersParkinson/tsdf
|
|
6
6
|
License: Apache-2.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "tsdf"
|
|
3
|
-
version = "0.5.
|
|
3
|
+
version = "0.5.2"
|
|
4
4
|
description = "A Python library that provides methods for encoding and decoding TSDF (Time Series Data Format) data, which allows you to easily create, manipulate and serialize TSDF files in your Python code."
|
|
5
5
|
authors = ["Peter Kok <p.kok@esciencecenter.nl>",
|
|
6
6
|
"Pablo Rodríguez <p.rodriguez-sanchez@esciencecenter.nl>",
|
|
@@ -8,34 +8,34 @@ from .read_tsdf import (
|
|
|
8
8
|
load_metadata_from_path,
|
|
9
9
|
load_metadatas_from_dir,
|
|
10
10
|
load_metadata_string,
|
|
11
|
-
load_metadata_legacy_file
|
|
11
|
+
load_metadata_legacy_file,
|
|
12
12
|
)
|
|
13
13
|
from .write_tsdf import (
|
|
14
14
|
write_metadata,
|
|
15
15
|
)
|
|
16
|
-
|
|
16
|
+
|
|
17
17
|
from .write_binary import (
|
|
18
18
|
write_binary_file,
|
|
19
19
|
write_dataframe_to_binaries,
|
|
20
20
|
)
|
|
21
21
|
from .read_binary import (
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
load_ndarray_from_binary,
|
|
23
|
+
load_dataframe_from_binaries,
|
|
24
24
|
)
|
|
25
25
|
|
|
26
26
|
from .tsdfmetadata import TSDFMetadata
|
|
27
27
|
|
|
28
28
|
__all__ = [
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
29
|
+
"load_metadata_file",
|
|
30
|
+
"load_metadata_from_path",
|
|
31
|
+
"load_metadatas_from_dir",
|
|
32
|
+
"load_metadata_string",
|
|
33
|
+
"load_metadata_legacy_file",
|
|
34
|
+
"write_metadata",
|
|
35
|
+
"write_binary_file",
|
|
36
|
+
"write_dataframe_to_binaries",
|
|
37
|
+
"load_ndarray_from_binary",
|
|
38
|
+
"load_dataframe_from_binaries",
|
|
39
|
+
"TSDFMetadata",
|
|
40
|
+
"constants",
|
|
41
41
|
]
|
|
@@ -61,6 +61,7 @@ _map_from_numpy_endianness = {
|
|
|
61
61
|
"<": "little",
|
|
62
62
|
">": "big",
|
|
63
63
|
"=": sys.byteorder,
|
|
64
|
+
"|": "not applicable",
|
|
64
65
|
}
|
|
65
66
|
""" Supported endianness values. """
|
|
66
67
|
|
|
@@ -79,6 +80,7 @@ def endianness_numpy_to_tsdf(data: np.ndarray) -> str:
|
|
|
79
80
|
_map_to_numpy_endianness = {
|
|
80
81
|
"little": "<",
|
|
81
82
|
"big": ">",
|
|
83
|
+
"not applicable": "|",
|
|
82
84
|
}
|
|
83
85
|
""" Supported endianness values. """
|
|
84
86
|
|
|
@@ -8,9 +8,10 @@ import os
|
|
|
8
8
|
from typing import Any, Dict, List
|
|
9
9
|
|
|
10
10
|
from tsdf import constants
|
|
11
|
-
from tsdf import tsdfmetadata
|
|
11
|
+
from tsdf import tsdfmetadata
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
|
|
14
|
+
def read_data(data: Any, source_path: str) -> Dict[str, "tsdfmetadata.TSDFMetadata"]:
|
|
14
15
|
"""
|
|
15
16
|
Function used to parse the JSON object containing TSDF metadata. It returns a
|
|
16
17
|
list of TSDFMetadata objects, where each object describes formatting of a binary file.
|
|
@@ -25,8 +26,10 @@ def read_data(data: Any, source_path: str) -> Dict[str, 'tsdfmetadata.TSDFMetada
|
|
|
25
26
|
|
|
26
27
|
# Check if the version is supported
|
|
27
28
|
version = data["metadata_version"]
|
|
28
|
-
if not
|
|
29
|
-
raise tsdfmetadata.TSDFMetadataFieldValueError(
|
|
29
|
+
if version not in constants.SUPPORTED_TSDF_VERSIONS:
|
|
30
|
+
raise tsdfmetadata.TSDFMetadataFieldValueError(
|
|
31
|
+
f"TSDF file version {version} not supported."
|
|
32
|
+
)
|
|
30
33
|
|
|
31
34
|
defined_properties: Dict[str, Any] = {}
|
|
32
35
|
return _read_struct(data, defined_properties.copy(), source_path, version)
|
|
@@ -34,7 +37,7 @@ def read_data(data: Any, source_path: str) -> Dict[str, 'tsdfmetadata.TSDFMetada
|
|
|
34
37
|
|
|
35
38
|
def _read_struct(
|
|
36
39
|
data: Any, defined_properties: Dict[str, Any], source_path, version: str
|
|
37
|
-
) -> Dict[str,
|
|
40
|
+
) -> Dict[str, "tsdfmetadata.TSDFMetadata"]:
|
|
38
41
|
"""
|
|
39
42
|
Recursive method used to parse the TSDF metadata in a hierarchical
|
|
40
43
|
order (from the root towards the leaves).
|
|
@@ -48,7 +51,7 @@ def _read_struct(
|
|
|
48
51
|
|
|
49
52
|
:raises tsdf_metadata.TSDFMetadataFieldError: if the TSDF metadata file is missing a mandatory field.
|
|
50
53
|
"""
|
|
51
|
-
all_streams: Dict[str,
|
|
54
|
+
all_streams: Dict[str, "tsdfmetadata.TSDFMetadata"] = {}
|
|
52
55
|
remaining_data = {}
|
|
53
56
|
leaf: bool = True
|
|
54
57
|
|
|
@@ -66,16 +69,14 @@ def _read_struct(
|
|
|
66
69
|
if leaf:
|
|
67
70
|
try:
|
|
68
71
|
bin_file_name = defined_properties["file_name"]
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
"A property 'file_name' is missing in the TSDF metadata file."
|
|
78
|
-
) from exc
|
|
72
|
+
except KeyError:
|
|
73
|
+
raise tsdfmetadata.TSDFMetadataFieldError.missing_field("file_name")
|
|
74
|
+
path = os.path.split(source_path)
|
|
75
|
+
file_dir = os.path.join(path[0])
|
|
76
|
+
meta_file_name = path[1]
|
|
77
|
+
all_streams[bin_file_name] = tsdfmetadata.TSDFMetadata(
|
|
78
|
+
defined_properties, file_dir, meta_file_name
|
|
79
|
+
)
|
|
79
80
|
|
|
80
81
|
# 3) If the current element is not a leaf, `remaining_data`` will contain lower
|
|
81
82
|
# levels of the TSDF structure.
|
|
@@ -158,18 +159,18 @@ def contains_tsdf_mandatory_fields(dictionary: Dict[str, Any]) -> bool:
|
|
|
158
159
|
:raises tsdf_metadata.TSDFMetadataFieldValueError: if the TSDF metadata file contains an invalid value.
|
|
159
160
|
"""
|
|
160
161
|
version_key = "metadata_version"
|
|
161
|
-
if not
|
|
162
|
-
raise tsdfmetadata.TSDFMetadataFieldError(
|
|
162
|
+
if version_key not in dictionary.keys():
|
|
163
|
+
raise tsdfmetadata.TSDFMetadataFieldError.missing_field(version_key)
|
|
163
164
|
|
|
164
165
|
version = dictionary[version_key]
|
|
165
166
|
for key in constants.MANDATORY_TSDF_KEYS[version]:
|
|
166
|
-
if not
|
|
167
|
-
raise tsdfmetadata.TSDFMetadataFieldError(
|
|
167
|
+
if key not in dictionary.keys():
|
|
168
|
+
raise tsdfmetadata.TSDFMetadataFieldError.missing_field(key)
|
|
168
169
|
units = "units"
|
|
169
170
|
channels = "channels"
|
|
170
171
|
if len(dictionary[units]) != len(dictionary[channels]):
|
|
171
172
|
raise tsdfmetadata.TSDFMetadataFieldValueError(
|
|
172
|
-
f"TSDF structure
|
|
173
|
+
f"TSDF metadata structure must specify equal number of {units} and {channels} for each binary file."
|
|
173
174
|
)
|
|
174
175
|
|
|
175
176
|
for key, value in dictionary.items():
|
|
@@ -203,8 +204,8 @@ def _check_tsdf_property_format(key: str, value, version: str) -> None:
|
|
|
203
204
|
|
|
204
205
|
|
|
205
206
|
def get_file_metadata_at_index(
|
|
206
|
-
metadata: Dict[str,
|
|
207
|
-
) ->
|
|
207
|
+
metadata: Dict[str, "tsdfmetadata.TSDFMetadata"], index: int
|
|
208
|
+
) -> "tsdfmetadata.TSDFMetadata":
|
|
208
209
|
"""
|
|
209
210
|
Returns the metadata object at the position defined by the index.
|
|
210
211
|
|
|
@@ -222,7 +223,7 @@ def get_file_metadata_at_index(
|
|
|
222
223
|
raise IndexError("The index is out of range.")
|
|
223
224
|
|
|
224
225
|
|
|
225
|
-
def confirm_dir_of_metadata(metadatas: List[
|
|
226
|
+
def confirm_dir_of_metadata(metadatas: List["tsdfmetadata.TSDFMetadata"]) -> None:
|
|
226
227
|
"""
|
|
227
228
|
The method is used to confirm whether all the metadata files are expected in the same directory.
|
|
228
229
|
|
|
@@ -8,49 +8,52 @@ import os
|
|
|
8
8
|
from typing import List, Union
|
|
9
9
|
import numpy as np
|
|
10
10
|
import pandas as pd
|
|
11
|
-
from tsdf import numpy_utils
|
|
11
|
+
from tsdf import numpy_utils
|
|
12
12
|
from tsdf import tsdfmetadata
|
|
13
13
|
from tsdf.constants import ConcatenationType
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
def
|
|
16
|
+
def load_dataframe_from_binaries(
|
|
17
|
+
metadatas: List["tsdfmetadata.TSDFMetadata"],
|
|
18
|
+
concatenation: ConcatenationType = ConcatenationType.none,
|
|
19
|
+
) -> Union[pd.DataFrame, List[pd.DataFrame]]:
|
|
17
20
|
"""
|
|
18
|
-
Load binary files associated with TSDF
|
|
21
|
+
Load content of binary files associated with TSDF into a pandas DataFrame. The data frames can be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none).
|
|
19
22
|
|
|
20
23
|
:param metadatas: list of TSDFMetadata objects.
|
|
21
|
-
:param concatenation: concatenation rule, i.e., determines whether the data frames (content of binary files) should be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or
|
|
24
|
+
:param concatenation: concatenation rule, i.e., determines whether the data frames (content of binary files) should be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none).
|
|
22
25
|
|
|
23
26
|
:return: pandas DataFrame containing the combined data.
|
|
24
27
|
"""
|
|
25
28
|
# Load the data
|
|
26
|
-
|
|
29
|
+
data_frames = []
|
|
27
30
|
for metadata in metadatas:
|
|
28
|
-
data =
|
|
31
|
+
data = load_ndarray_from_binary(metadata)
|
|
29
32
|
df = pd.DataFrame(data, columns=metadata.channels)
|
|
30
|
-
|
|
33
|
+
data_frames.append(df)
|
|
31
34
|
|
|
32
35
|
# Merge the data
|
|
33
36
|
if concatenation == ConcatenationType.rows:
|
|
34
|
-
return pd.concat(
|
|
37
|
+
return pd.concat(data_frames)
|
|
35
38
|
elif concatenation == ConcatenationType.columns:
|
|
36
|
-
return pd.concat(
|
|
39
|
+
return pd.concat(data_frames, axis=1)
|
|
37
40
|
elif concatenation == ConcatenationType.none:
|
|
38
|
-
return
|
|
41
|
+
return data_frames
|
|
39
42
|
|
|
40
43
|
|
|
41
|
-
def
|
|
42
|
-
metadata:
|
|
44
|
+
def load_ndarray_from_binary(
|
|
45
|
+
metadata: "tsdfmetadata.TSDFMetadata", start_row: int = 0, end_row: int = -1
|
|
43
46
|
) -> np.ndarray:
|
|
44
47
|
"""
|
|
45
48
|
Use metadata properties to load and return numpy array from a binary file (located the same directory where the metadata is saved).
|
|
46
|
-
|
|
49
|
+
|
|
47
50
|
:param metadata: TSDFMetadata object.
|
|
48
51
|
:param start_row: (optional) first row to load.
|
|
49
52
|
:param end_row: (optional) last row to load. If -1, load all rows.
|
|
50
53
|
|
|
51
54
|
:return: numpy array containing the data."""
|
|
52
55
|
metadata_dir = metadata.file_dir_path
|
|
53
|
-
|
|
56
|
+
|
|
54
57
|
bin_path = os.path.join(metadata_dir, metadata.file_name)
|
|
55
58
|
return _load_binary_file(
|
|
56
59
|
bin_path,
|
|
@@ -75,7 +78,7 @@ def _load_binary_file(
|
|
|
75
78
|
end_row: int = -1,
|
|
76
79
|
) -> np.ndarray:
|
|
77
80
|
"""
|
|
78
|
-
Use provided parameters to load and return a numpy array from a binary file
|
|
81
|
+
Use provided parameters to load and return a numpy array from a binary file.
|
|
79
82
|
|
|
80
83
|
:param bin_file_path: path to the binary file.
|
|
81
84
|
:param data_type: data type of the binary file.
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
from typing import Any, Dict, List
|
|
3
|
-
|
|
4
|
-
from tsdf import parse_metadata
|
|
3
|
+
|
|
4
|
+
from tsdf import parse_metadata
|
|
5
5
|
|
|
6
6
|
class TSDFMetadataFieldError(Exception):
|
|
7
7
|
"Raised when the TSDFMetadata is missing an obligatory field."
|
|
8
|
-
|
|
8
|
+
@classmethod
|
|
9
|
+
def missing_field(cls, field_name: str):
|
|
10
|
+
message = f"Value for the obligatory TSDF field '{field_name}' is missing in the provided TSDF metadata file."
|
|
11
|
+
return cls(message)
|
|
9
12
|
|
|
10
13
|
|
|
11
14
|
class TSDFMetadataFieldValueError(Exception):
|
|
@@ -4,6 +4,7 @@ import traceback
|
|
|
4
4
|
import json
|
|
5
5
|
from tsdf import read_tsdf, read_binary
|
|
6
6
|
|
|
7
|
+
|
|
7
8
|
def validate_tsdf_format(file_path):
|
|
8
9
|
try:
|
|
9
10
|
# Read the meta data (this will check for compulsory fields and such)
|
|
@@ -15,27 +16,31 @@ def validate_tsdf_format(file_path):
|
|
|
15
16
|
|
|
16
17
|
# Loop through all the files in the metadata
|
|
17
18
|
for file_name, file_metadata in metadata.items():
|
|
18
|
-
|
|
19
19
|
# print the file_metadata as json
|
|
20
20
|
# print(json.dumps(file_metadata.get_plain_tsdf_dict_copy(), indent=4))
|
|
21
21
|
|
|
22
22
|
# Load the binary data
|
|
23
|
-
binary_data = read_binary.
|
|
23
|
+
binary_data = read_binary.load_ndarray_from_binary(file_metadata)
|
|
24
24
|
|
|
25
25
|
# Success message
|
|
26
|
-
print(
|
|
26
|
+
print(
|
|
27
|
+
f"Successfully loaded binary file {file_name}, resulting shape: {binary_data.shape}"
|
|
28
|
+
)
|
|
27
29
|
|
|
28
30
|
return True
|
|
29
31
|
|
|
30
32
|
except Exception as e:
|
|
31
33
|
print(f"Error while validating: {e}")
|
|
32
|
-
#traceback.print_exc()
|
|
34
|
+
# traceback.print_exc()
|
|
33
35
|
return False
|
|
34
36
|
|
|
37
|
+
|
|
35
38
|
def main():
|
|
36
39
|
# Parse the arguments
|
|
37
|
-
parser = argparse.ArgumentParser(
|
|
38
|
-
|
|
40
|
+
parser = argparse.ArgumentParser(
|
|
41
|
+
description="Validate a file content against the TSDF format."
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument("file_path", help="Path to the file to validate")
|
|
39
44
|
args = parser.parse_args()
|
|
40
45
|
|
|
41
46
|
# Perform validation
|
|
@@ -44,5 +49,6 @@ def main():
|
|
|
44
49
|
# Exit with error code 1 if the validation failed
|
|
45
50
|
exit(0 if is_valid else 1)
|
|
46
51
|
|
|
47
|
-
|
|
52
|
+
|
|
53
|
+
if __name__ == "__main__":
|
|
48
54
|
main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|