tsdf 0.5.0__tar.gz → 0.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tsdf
3
- Version: 0.5.0
3
+ Version: 0.5.2
4
4
  Summary: A Python library that provides methods for encoding and decoding TSDF (Time Series Data Format) data, which allows you to easily create, manipulate and serialize TSDF files in your Python code.
5
5
  Home-page: https://github.com/biomarkersParkinson/tsdf
6
6
  License: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "tsdf"
3
- version = "0.5.0"
3
+ version = "0.5.2"
4
4
  description = "A Python library that provides methods for encoding and decoding TSDF (Time Series Data Format) data, which allows you to easily create, manipulate and serialize TSDF files in your Python code."
5
5
  authors = ["Peter Kok <p.kok@esciencecenter.nl>",
6
6
  "Pablo Rodríguez <p.rodriguez-sanchez@esciencecenter.nl>",
@@ -8,34 +8,34 @@ from .read_tsdf import (
8
8
  load_metadata_from_path,
9
9
  load_metadatas_from_dir,
10
10
  load_metadata_string,
11
- load_metadata_legacy_file
11
+ load_metadata_legacy_file,
12
12
  )
13
13
  from .write_tsdf import (
14
14
  write_metadata,
15
15
  )
16
-
16
+
17
17
  from .write_binary import (
18
18
  write_binary_file,
19
19
  write_dataframe_to_binaries,
20
20
  )
21
21
  from .read_binary import (
22
- load_binary_from_metadata,
23
- load_binaries_to_dataframe,
22
+ load_ndarray_from_binary,
23
+ load_dataframe_from_binaries,
24
24
  )
25
25
 
26
26
  from .tsdfmetadata import TSDFMetadata
27
27
 
28
28
  __all__ = [
29
- 'load_metadata_file',
30
- 'load_metadata_from_path',
31
- 'load_metadatas_from_dir',
32
- 'load_metadata_string',
33
- 'load_metadata_legacy_file',
34
- 'write_metadata',
35
- 'write_binary_file',
36
- 'write_dataframe_to_binaries',
37
- 'load_binary_from_metadata',
38
- 'load_binaries_to_dataframe',
39
- 'TSDFMetadata',
40
- 'constants'
29
+ "load_metadata_file",
30
+ "load_metadata_from_path",
31
+ "load_metadatas_from_dir",
32
+ "load_metadata_string",
33
+ "load_metadata_legacy_file",
34
+ "write_metadata",
35
+ "write_binary_file",
36
+ "write_dataframe_to_binaries",
37
+ "load_ndarray_from_binary",
38
+ "load_dataframe_from_binaries",
39
+ "TSDFMetadata",
40
+ "constants",
41
41
  ]
@@ -61,6 +61,7 @@ _map_from_numpy_endianness = {
61
61
  "<": "little",
62
62
  ">": "big",
63
63
  "=": sys.byteorder,
64
+ "|": "not applicable",
64
65
  }
65
66
  """ Supported endianness values. """
66
67
 
@@ -79,6 +80,7 @@ def endianness_numpy_to_tsdf(data: np.ndarray) -> str:
79
80
  _map_to_numpy_endianness = {
80
81
  "little": "<",
81
82
  "big": ">",
83
+ "not applicable": "|",
82
84
  }
83
85
  """ Supported endianness values. """
84
86
 
@@ -8,9 +8,10 @@ import os
8
8
  from typing import Any, Dict, List
9
9
 
10
10
  from tsdf import constants
11
- from tsdf import tsdfmetadata
11
+ from tsdf import tsdfmetadata
12
12
 
13
- def read_data(data: Any, source_path: str) -> Dict[str, 'tsdfmetadata.TSDFMetadata']:
13
+
14
+ def read_data(data: Any, source_path: str) -> Dict[str, "tsdfmetadata.TSDFMetadata"]:
14
15
  """
15
16
  Function used to parse the JSON object containing TSDF metadata. It returns a
16
17
  list of TSDFMetadata objects, where each object describes formatting of a binary file.
@@ -25,8 +26,10 @@ def read_data(data: Any, source_path: str) -> Dict[str, 'tsdfmetadata.TSDFMetada
25
26
 
26
27
  # Check if the version is supported
27
28
  version = data["metadata_version"]
28
- if not version in constants.SUPPORTED_TSDF_VERSIONS:
29
- raise tsdfmetadata.TSDFMetadataFieldValueError(f"TSDF file version {version} not supported.")
29
+ if version not in constants.SUPPORTED_TSDF_VERSIONS:
30
+ raise tsdfmetadata.TSDFMetadataFieldValueError(
31
+ f"TSDF file version {version} not supported."
32
+ )
30
33
 
31
34
  defined_properties: Dict[str, Any] = {}
32
35
  return _read_struct(data, defined_properties.copy(), source_path, version)
@@ -34,7 +37,7 @@ def read_data(data: Any, source_path: str) -> Dict[str, 'tsdfmetadata.TSDFMetada
34
37
 
35
38
  def _read_struct(
36
39
  data: Any, defined_properties: Dict[str, Any], source_path, version: str
37
- ) -> Dict[str, 'tsdfmetadata.TSDFMetadata']:
40
+ ) -> Dict[str, "tsdfmetadata.TSDFMetadata"]:
38
41
  """
39
42
  Recursive method used to parse the TSDF metadata in a hierarchical
40
43
  order (from the root towards the leaves).
@@ -48,7 +51,7 @@ def _read_struct(
48
51
 
49
52
  :raises tsdf_metadata.TSDFMetadataFieldError: if the TSDF metadata file is missing a mandatory field.
50
53
  """
51
- all_streams: Dict[str, 'tsdfmetadata.TSDFMetadata'] = {}
54
+ all_streams: Dict[str, "tsdfmetadata.TSDFMetadata"] = {}
52
55
  remaining_data = {}
53
56
  leaf: bool = True
54
57
 
@@ -66,16 +69,14 @@ def _read_struct(
66
69
  if leaf:
67
70
  try:
68
71
  bin_file_name = defined_properties["file_name"]
69
- path = os.path.split(source_path)
70
- file_dir = os.path.join(path[0])
71
- meta_file_name = path[1]
72
- all_streams[bin_file_name] = tsdfmetadata.TSDFMetadata(
73
- defined_properties, file_dir, meta_file_name
74
- )
75
- except tsdfmetadata.TSDFMetadataFieldError as exc:
76
- raise tsdfmetadata.TSDFMetadataFieldError(
77
- "A property 'file_name' is missing in the TSDF metadata file."
78
- ) from exc
72
+ except KeyError:
73
+ raise tsdfmetadata.TSDFMetadataFieldError.missing_field("file_name")
74
+ path = os.path.split(source_path)
75
+ file_dir = os.path.join(path[0])
76
+ meta_file_name = path[1]
77
+ all_streams[bin_file_name] = tsdfmetadata.TSDFMetadata(
78
+ defined_properties, file_dir, meta_file_name
79
+ )
79
80
 
80
81
  # 3) If the current element is not a leaf, `remaining_data`` will contain lower
81
82
  # levels of the TSDF structure.
@@ -158,18 +159,18 @@ def contains_tsdf_mandatory_fields(dictionary: Dict[str, Any]) -> bool:
158
159
  :raises tsdf_metadata.TSDFMetadataFieldValueError: if the TSDF metadata file contains an invalid value.
159
160
  """
160
161
  version_key = "metadata_version"
161
- if not version_key in dictionary.keys():
162
- raise tsdfmetadata.TSDFMetadataFieldError(f"TSDF structure is missing key '{version_key}'")
162
+ if version_key not in dictionary.keys():
163
+ raise tsdfmetadata.TSDFMetadataFieldError.missing_field(version_key)
163
164
 
164
165
  version = dictionary[version_key]
165
166
  for key in constants.MANDATORY_TSDF_KEYS[version]:
166
- if not key in dictionary.keys():
167
- raise tsdfmetadata.TSDFMetadataFieldError(f"TSDF structure is missing key '{key}'")
167
+ if key not in dictionary.keys():
168
+ raise tsdfmetadata.TSDFMetadataFieldError.missing_field(key)
168
169
  units = "units"
169
170
  channels = "channels"
170
171
  if len(dictionary[units]) != len(dictionary[channels]):
171
172
  raise tsdfmetadata.TSDFMetadataFieldValueError(
172
- f"TSDF structure requires equal number of {units} and {channels}"
173
+ f"TSDF metadata structure must specify equal number of {units} and {channels} for each binary file."
173
174
  )
174
175
 
175
176
  for key, value in dictionary.items():
@@ -203,8 +204,8 @@ def _check_tsdf_property_format(key: str, value, version: str) -> None:
203
204
 
204
205
 
205
206
  def get_file_metadata_at_index(
206
- metadata: Dict[str, 'tsdfmetadata.TSDFMetadata'], index: int
207
- ) -> 'tsdfmetadata.TSDFMetadata':
207
+ metadata: Dict[str, "tsdfmetadata.TSDFMetadata"], index: int
208
+ ) -> "tsdfmetadata.TSDFMetadata":
208
209
  """
209
210
  Returns the metadata object at the position defined by the index.
210
211
 
@@ -222,7 +223,7 @@ def get_file_metadata_at_index(
222
223
  raise IndexError("The index is out of range.")
223
224
 
224
225
 
225
- def confirm_dir_of_metadata(metadatas: List['tsdfmetadata.TSDFMetadata']) -> None:
226
+ def confirm_dir_of_metadata(metadatas: List["tsdfmetadata.TSDFMetadata"]) -> None:
226
227
  """
227
228
  The method is used to confirm whether all the metadata files are expected in the same directory.
228
229
 
@@ -8,49 +8,52 @@ import os
8
8
  from typing import List, Union
9
9
  import numpy as np
10
10
  import pandas as pd
11
- from tsdf import numpy_utils
11
+ from tsdf import numpy_utils
12
12
  from tsdf import tsdfmetadata
13
13
  from tsdf.constants import ConcatenationType
14
14
 
15
15
 
16
- def load_binaries_to_dataframe(metadatas: '[tsdfmetadata.TSDFMetadata]', concatenation: ConcatenationType = ConcatenationType.none) -> Union[pd.DataFrame, List[pd.DataFrame]]:
16
+ def load_dataframe_from_binaries(
17
+ metadatas: List["tsdfmetadata.TSDFMetadata"],
18
+ concatenation: ConcatenationType = ConcatenationType.none,
19
+ ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
17
20
  """
18
- Load binary files associated with TSDF and return a combined pandas DataFrame.
21
+ Load content of binary files associated with TSDF into a pandas DataFrame. The data frames can be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none).
19
22
 
20
23
  :param metadatas: list of TSDFMetadata objects.
21
- :param concatenation: concatenation rule, i.e., determines whether the data frames (content of binary files) should be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or not concatenated (ConcatenationType.none), but provided as a list of data frames.
24
+ :param concatenation: concatenation rule, i.e., determines whether the data frames (content of binary files) should be concatenated horizontally (ConcatenationType.columns), vertically (ConcatenationType.rows) or provided as a list of data frames (ConcatenationType.none).
22
25
 
23
26
  :return: pandas DataFrame containing the combined data.
24
27
  """
25
28
  # Load the data
26
- dataFrames = []
29
+ data_frames = []
27
30
  for metadata in metadatas:
28
- data = load_binary_from_metadata(metadata)
31
+ data = load_ndarray_from_binary(metadata)
29
32
  df = pd.DataFrame(data, columns=metadata.channels)
30
- dataFrames.append(df)
33
+ data_frames.append(df)
31
34
 
32
35
  # Merge the data
33
36
  if concatenation == ConcatenationType.rows:
34
- return pd.concat(dataFrames)
37
+ return pd.concat(data_frames)
35
38
  elif concatenation == ConcatenationType.columns:
36
- return pd.concat(dataFrames, axis=1)
39
+ return pd.concat(data_frames, axis=1)
37
40
  elif concatenation == ConcatenationType.none:
38
- return dataFrames
41
+ return data_frames
39
42
 
40
43
 
41
- def load_binary_from_metadata(
42
- metadata: 'tsdfmetadata.TSDFMetadata', start_row: int = 0, end_row: int = -1
44
+ def load_ndarray_from_binary(
45
+ metadata: "tsdfmetadata.TSDFMetadata", start_row: int = 0, end_row: int = -1
43
46
  ) -> np.ndarray:
44
47
  """
45
48
  Use metadata properties to load and return numpy array from a binary file (located the same directory where the metadata is saved).
46
-
49
+
47
50
  :param metadata: TSDFMetadata object.
48
51
  :param start_row: (optional) first row to load.
49
52
  :param end_row: (optional) last row to load. If -1, load all rows.
50
53
 
51
54
  :return: numpy array containing the data."""
52
55
  metadata_dir = metadata.file_dir_path
53
-
56
+
54
57
  bin_path = os.path.join(metadata_dir, metadata.file_name)
55
58
  return _load_binary_file(
56
59
  bin_path,
@@ -75,7 +78,7 @@ def _load_binary_file(
75
78
  end_row: int = -1,
76
79
  ) -> np.ndarray:
77
80
  """
78
- Use provided parameters to load and return a numpy array from a binary file
81
+ Use provided parameters to load and return a numpy array from a binary file.
79
82
 
80
83
  :param bin_file_path: path to the binary file.
81
84
  :param data_type: data type of the binary file.
@@ -1,11 +1,14 @@
1
1
  import copy
2
2
  from typing import Any, Dict, List
3
- from numpy import ndarray
4
- from tsdf import parse_metadata, read_binary
3
+
4
+ from tsdf import parse_metadata
5
5
 
6
6
  class TSDFMetadataFieldError(Exception):
7
7
  "Raised when the TSDFMetadata is missing an obligatory field."
8
- pass
8
+ @classmethod
9
+ def missing_field(cls, field_name: str):
10
+ message = f"Value for the obligatory TSDF field '{field_name}' is missing in the provided TSDF metadata file."
11
+ return cls(message)
9
12
 
10
13
 
11
14
  class TSDFMetadataFieldValueError(Exception):
@@ -4,6 +4,7 @@ import traceback
4
4
  import json
5
5
  from tsdf import read_tsdf, read_binary
6
6
 
7
+
7
8
  def validate_tsdf_format(file_path):
8
9
  try:
9
10
  # Read the meta data (this will check for compulsory fields and such)
@@ -15,27 +16,31 @@ def validate_tsdf_format(file_path):
15
16
 
16
17
  # Loop through all the files in the metadata
17
18
  for file_name, file_metadata in metadata.items():
18
-
19
19
  # print the file_metadata as json
20
20
  # print(json.dumps(file_metadata.get_plain_tsdf_dict_copy(), indent=4))
21
21
 
22
22
  # Load the binary data
23
- binary_data = read_binary.load_binary_from_metadata(file_metadata)
23
+ binary_data = read_binary.load_ndarray_from_binary(file_metadata)
24
24
 
25
25
  # Success message
26
- print(f"Successfully loaded binary file {file_name}, resulting shape: {binary_data.shape}")
26
+ print(
27
+ f"Successfully loaded binary file {file_name}, resulting shape: {binary_data.shape}"
28
+ )
27
29
 
28
30
  return True
29
31
 
30
32
  except Exception as e:
31
33
  print(f"Error while validating: {e}")
32
- #traceback.print_exc()
34
+ # traceback.print_exc()
33
35
  return False
34
36
 
37
+
35
38
  def main():
36
39
  # Parse the arguments
37
- parser = argparse.ArgumentParser(description='Validate a file content against the TSDF format.')
38
- parser.add_argument('file_path', help='Path to the file to validate')
40
+ parser = argparse.ArgumentParser(
41
+ description="Validate a file content against the TSDF format."
42
+ )
43
+ parser.add_argument("file_path", help="Path to the file to validate")
39
44
  args = parser.parse_args()
40
45
 
41
46
  # Perform validation
@@ -44,5 +49,6 @@ def main():
44
49
  # Exit with error code 1 if the validation failed
45
50
  exit(0 if is_valid else 1)
46
51
 
47
- if __name__ == '__main__':
52
+
53
+ if __name__ == "__main__":
48
54
  main()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes