vantage6-algorithm-tools 4.3.4__tar.gz → 4.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/PKG-INFO +1 -1
  2. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/client/__init__.py +7 -3
  3. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/client/_version.py +1 -1
  4. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/tools/decorators.py +24 -24
  5. vantage6-algorithm-tools-4.4.0/vantage6/algorithm/tools/exceptions.py +138 -0
  6. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/tools/util.py +8 -0
  7. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/tools/wrap.py +22 -6
  8. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6_algorithm_tools.egg-info/PKG-INFO +1 -1
  9. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6_algorithm_tools.egg-info/requires.txt +1 -1
  10. vantage6-algorithm-tools-4.3.4/vantage6/algorithm/tools/exceptions.py +0 -4
  11. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/setup.cfg +0 -0
  12. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/setup.py +0 -0
  13. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/tests/algorithm_module.py +0 -0
  14. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/tests/test_deserialization.py +0 -0
  15. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/tests/test_docker_wrapper.py +0 -0
  16. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/tests/test_serialization.py +0 -0
  17. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/tools/mock_client.py +0 -0
  18. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/tools/preprocessing/__init__.py +0 -0
  19. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/tools/preprocessing/functions.py +0 -0
  20. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/tools/wrappers.py +0 -0
  21. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6_algorithm_tools.egg-info/SOURCES.txt +0 -0
  22. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6_algorithm_tools.egg-info/dependency_links.txt +0 -0
  23. {vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6_algorithm_tools.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vantage6-algorithm-tools
3
- Version: 4.3.4
3
+ Version: 4.4.0
4
4
  Summary: Vantage6 algorithm tools
5
5
  Home-page: https://github.com/vantage6/vantage6
6
6
  Requires-Python: >=3.6
@@ -258,9 +258,13 @@ class AlgorithmClient(ClientBase):
258
258
  self.parent.log.info("--> Attempting to decode results!")
259
259
  result = None
260
260
  if response.get("result"):
261
- result = json_lib.loads(
262
- base64s_to_bytes(response.get("result")).decode()
263
- )
261
+ try:
262
+ result = json_lib.loads(
263
+ base64s_to_bytes(response.get("result")).decode()
264
+ )
265
+ except Exception as e:
266
+ self.parent.log.error("Unable to load results")
267
+ self.parent.log.debug(e)
264
268
  return result
265
269
 
266
270
  def from_task(self, task_id: int) -> list[Any]:
@@ -7,7 +7,7 @@ with open(os.path.join(here, "__build__")) as fp:
7
7
  __build__ = json.load(fp)
8
8
 
9
9
  # Module version
10
- version_info = (4, 3, 4, "final", __build__, 0)
10
+ version_info = (4, 4, 0, "final", __build__, 0)
11
11
 
12
12
  # Module version stage suffix map
13
13
  _specifier_ = {"alpha": "a", "beta": "b", "candidate": "rc", "final": ""}
@@ -10,7 +10,7 @@ import pandas as pd
10
10
 
11
11
  from vantage6.algorithm.client import AlgorithmClient
12
12
  from vantage6.algorithm.tools.mock_client import MockAlgorithmClient
13
- from vantage6.algorithm.tools.util import info, error, warn, get_env_var
13
+ from vantage6.algorithm.tools.util import info, error, warn
14
14
  from vantage6.algorithm.tools.wrappers import load_data
15
15
  from vantage6.algorithm.tools.preprocessing import preprocess_data
16
16
 
@@ -93,12 +93,12 @@ def _algorithm_client() -> callable:
93
93
  if mock_client is not None:
94
94
  return func(mock_client, *args, **kwargs)
95
95
  # read server address from the environment
96
- host = get_env_var("HOST")
97
- port = get_env_var("PORT")
98
- api_path = get_env_var("API_PATH")
96
+ host = os.environ["HOST"]
97
+ port = os.environ["PORT"]
98
+ api_path = os.environ["API_PATH"]
99
99
 
100
100
  # read token from the environment
101
- token_file = get_env_var("TOKEN_FILE")
101
+ token_file = os.environ["TOKEN_FILE"]
102
102
  info("Reading token")
103
103
  with open(token_file) as fp:
104
104
  token = fp.read().strip()
@@ -195,7 +195,7 @@ def data(number_of_databases: int = 1) -> callable:
195
195
 
196
196
  # do any data preprocessing here
197
197
  info(f"Applying preprocessing for database '{label}'")
198
- env_prepro = get_env_var(f"{label.upper()}_PREPROCESSING")
198
+ env_prepro = os.environ.get(f"{label.upper()}_PREPROCESSING")
199
199
  if env_prepro is not None:
200
200
  preprocess = json.loads(env_prepro)
201
201
  data_ = preprocess_data(data_, preprocess)
@@ -309,7 +309,7 @@ def metadata(func: callable) -> callable:
309
309
  >>> def my_algorithm(metadata: RunMetaData, <other arguments>):
310
310
  >>> pass
311
311
  """
312
- token_file = get_env_var("TOKEN_FILE")
312
+ token_file = os.environ["TOKEN_FILE"]
313
313
  info("Reading token")
314
314
  with open(token_file) as fp:
315
315
  token = fp.read().strip()
@@ -322,10 +322,10 @@ def metadata(func: callable) -> callable:
322
322
  node_id=payload["node_id"],
323
323
  collaboration_id=payload["collaboration_id"],
324
324
  organization_id=payload["organization_id"],
325
- temporary_directory=Path(get_env_var("TEMPORARY_FOLDER")),
326
- output_file=Path(get_env_var("OUTPUT_FILE")),
327
- input_file=Path(get_env_var("INPUT_FILE")),
328
- token_file=Path(get_env_var("TOKEN_FILE")),
325
+ temporary_directory=Path(os.environ["TEMPORARY_FOLDER"]),
326
+ output_file=Path(os.environ["OUTPUT_FILE"]),
327
+ input_file=Path(os.environ["INPUT_FILE"]),
328
+ token_file=Path(os.environ["TOKEN_FILE"]),
329
329
  )
330
330
  return func(metadata, *args, **kwargs)
331
331
 
@@ -355,11 +355,11 @@ def get_ohdsi_metadata(label: str) -> OHDSIMetaData:
355
355
  for var in expected_env_vars:
356
356
  _check_environment_var_exists_or_exit(f"{label_}_DB_PARAM_{var}")
357
357
 
358
- tmp = Path(get_env_var("TEMPORARY_FOLDER"))
358
+ tmp = Path(os.environ["TEMPORARY_FOLDER"])
359
359
  metadata = OHDSIMetaData(
360
- database=get_env_var(f"{label_}_DB_PARAM_CDM_DATABASE"),
361
- cdm_schema=get_env_var(f"{label_}_DB_PARAM_CDM_SCHEMA"),
362
- results_schema=get_env_var(f"{label_}_DB_PARAM_RESULTS_SCHEMA"),
360
+ database=os.environ[f"{label_}_DB_PARAM_CDM_DATABASE"],
361
+ cdm_schema=os.environ[f"{label_}_DB_PARAM_CDM_SCHEMA"],
362
+ results_schema=os.environ[f"{label_}_DB_PARAM_RESULTS_SCHEMA"],
363
363
  incremental_folder=tmp / "incremental",
364
364
  cohort_statistics_folder=tmp / "cohort_statistics",
365
365
  export_folder=tmp / "export",
@@ -417,10 +417,10 @@ def _create_omop_database_connection(label: str) -> callable:
417
417
  _check_environment_var_exists_or_exit(f"{label_}_DB_PARAM_{var}")
418
418
 
419
419
  info("Reading OHDSI environment variables")
420
- dbms = get_env_var(f"{label_}_DB_PARAM_DBMS")
421
- uri = get_env_var(f"{label_}_DATABASE_URI")
422
- user = get_env_var(f"{label_}_DB_PARAM_USER")
423
- password = get_env_var(f"{label_}_DB_PARAM_PASSWORD")
420
+ dbms = os.environ[f"{label_}_DB_PARAM_DBMS"]
421
+ uri = os.environ[f"{label_}_DATABASE_URI"]
422
+ user = os.environ[f"{label_}_DB_PARAM_USER"]
423
+ password = os.environ[f"{label_}_DB_PARAM_PASSWORD"]
424
424
  info(f" - dbms: {dbms}")
425
425
  info(f" - uri: {uri}")
426
426
  info(f" - user: {user}")
@@ -460,20 +460,20 @@ def _get_data_from_label(label: str) -> pd.DataFrame:
460
460
  Data from the database
461
461
  """
462
462
  # Load the input data from the input file - this may e.g. include the
463
- database_uri = get_env_var(f"{label.upper()}_DATABASE_URI")
463
+ database_uri = os.environ[f"{label.upper()}_DATABASE_URI"]
464
464
  info(f"Using '{database_uri}' with label '{label}' as database")
465
465
 
466
466
  # Get the database type from the environment variable, this variable is
467
467
  # set by the vantage6 node based on its configuration file.
468
- database_type = get_env_var(f"{label.upper()}_DATABASE_TYPE", "csv").lower()
468
+ database_type = os.environ.get(f"{label.upper()}_DATABASE_TYPE", "csv").lower()
469
469
 
470
470
  # Load the data based on the database type. Try to provide environment
471
471
  # variables that should be available for some data types.
472
472
  return load_data(
473
473
  database_uri,
474
474
  database_type,
475
- query=get_env_var(f"{label.upper()}_QUERY"),
476
- sheet_name=get_env_var(f"{label.upper()}_SHEET_NAME"),
475
+ query=os.environ.get(f"{label.upper()}_QUERY"),
476
+ sheet_name=os.environ.get(f"{label.upper()}_SHEET_NAME"),
477
477
  )
478
478
 
479
479
 
@@ -488,7 +488,7 @@ def _get_user_database_labels() -> list[str]:
488
488
  """
489
489
  # read the labels that the user requested, which is a comma
490
490
  # separated list of labels.
491
- labels = get_env_var("USER_REQUESTED_DATABASE_LABELS")
491
+ labels = os.environ["USER_REQUESTED_DATABASE_LABELS"]
492
492
  return labels.split(",")
493
493
 
494
494
 
@@ -0,0 +1,138 @@
1
+ class AlgorithmError(Exception):
2
+ """Generic exception raised when an algorithm fails."""
3
+
4
+
5
+ # ---------------- Privacy exceptions ----------------
6
+ class PrivacyViolation(AlgorithmError):
7
+ """Generic exception raised for data privacy concerns."""
8
+
9
+
10
+ class PrivacyThresholdViolation(PrivacyViolation):
11
+ """
12
+ Raised when privacy threshold is violated.
13
+
14
+ Example usage:
15
+ - The number of rows in the data is too low.
16
+ - Returning the results of the algorithm would violate privacy.
17
+ """
18
+
19
+
20
+ # ---------------- Data exceptions ----------------
21
+
22
+
23
+ class DataError(AlgorithmError):
24
+ """Generic error raised with data handling."""
25
+
26
+
27
+ class DataReadError(DataError):
28
+ """Raised when data reading fails.
29
+
30
+ Example usage:
31
+ - File not found.
32
+ - File is not in the right format.
33
+ - File is not readable.
34
+ - File is empty.
35
+ """
36
+
37
+
38
+ class DataTypeError(DataError):
39
+ """Raised when data type is invalid.
40
+
41
+ Example usage:
42
+ - String column is selected by user for numeric operation.
43
+ """
44
+
45
+
46
+ # ---------------- Runtime exceptions ----------------
47
+ class AlgorithmRuntimeError(AlgorithmError):
48
+ """Generic error raised when an algorithm fails at runtime."""
49
+
50
+
51
+ class AlgorithmExecutionError(AlgorithmRuntimeError):
52
+ """Raised when algorithm function fails.
53
+
54
+ Use when the algorithm function raises an exception.
55
+ """
56
+
57
+
58
+ class MaxIterationsReached(AlgorithmRuntimeError):
59
+ """Raised when the maximum number of iterations is reached."""
60
+
61
+
62
+ class ConvergenceError(AlgorithmRuntimeError):
63
+ """Raised when the algorithm fails to converge."""
64
+
65
+
66
+ # ---------------- Client exceptions ----------------
67
+
68
+
69
+ class ClientError(AlgorithmError):
70
+ """Generic error raised when call to the algorithm client fails."""
71
+
72
+
73
+ # the most common client errors are defined separately for clarity
74
+ class SubtakCreationError(ClientError):
75
+ """Raised when subtask creation fails."""
76
+
77
+
78
+ class CollectOrganizationError(ClientError):
79
+ """Raised when organization collection fails."""
80
+
81
+
82
+ class CollectResultsError(ClientError):
83
+ """Raised when result collection fails."""
84
+
85
+
86
+ # ---------------- Input exceptions ----------------
87
+
88
+
89
+ class InputError(AlgorithmError):
90
+ """Generic error raised with algorithm input handling.
91
+ Example usage:
92
+ - User input is invalid.
93
+ - Subtask fails due to invalid input received from the parent task.
94
+ """
95
+
96
+
97
+ class UserInputError(InputError):
98
+ """Raised when user input is invalid.
99
+
100
+ Example usage:
101
+ - User input is not in the expected format.
102
+ """
103
+
104
+
105
+ class DeserializationError(InputError):
106
+ """Raised when result deserialization fails."""
107
+
108
+
109
+ # TODO v5+ remove this alias, which is there for backwards compatibility
110
+ DeserializationException = DeserializationError
111
+
112
+
113
+ class EnvironmentVariableError(InputError):
114
+ """Raised when environment variable is not found."""
115
+
116
+
117
+ # ---------------- Initialization exceptions ----------------
118
+
119
+
120
+ class AlgorithmInitializationError(AlgorithmError):
121
+ """Generic error raised when algorithm initialization fails."""
122
+
123
+
124
+ class AlgorithmModuleNotFoundError(AlgorithmInitializationError):
125
+ """
126
+ Raised when the algorithm module is not found.
127
+
128
+ Note that if this error is raised, the algorithm image is not built correctly.
129
+ """
130
+
131
+
132
+ class MethodNotFoundError(AlgorithmInitializationError):
133
+ """
134
+ Raised when the algorithm method is not found.
135
+
136
+ This error may be raised if the user calls a non-existing method, or if the
137
+ algorithm image is not built correctly.
138
+ """
@@ -1,6 +1,8 @@
1
1
  import sys
2
2
  import os
3
3
  import base64
4
+ import binascii
5
+
4
6
  from vantage6.common.globals import STRING_ENCODING, ENV_VAR_EQUALS_REPLACEMENT
5
7
 
6
8
 
@@ -40,6 +42,9 @@ def error(msg: str) -> None:
40
42
  sys.stdout.write(f"error > {msg}\n")
41
43
 
42
44
 
45
+ # TODO v5+ move this function to wrap.py and no longer expose it to be used by
46
+ # algorithms but as part of _decode_env_vars. It is kept here for backwards
47
+ # compatibility with 4.2/4.3 algorithms
43
48
  def get_env_var(var_name: str, default: str | None = None) -> str:
44
49
  """
45
50
  Get the value of an environment variable. Environment variables are encoded
@@ -69,3 +74,6 @@ def get_env_var(var_name: str, default: str | None = None) -> str:
69
74
  return base64.b32decode(encoded_env_var_value).decode(STRING_ENCODING)
70
75
  except KeyError:
71
76
  return default
77
+ except binascii.Error:
78
+ # If the decoding fails, return the original value
79
+ return os.environ[var_name]
@@ -7,7 +7,7 @@ from typing import Any
7
7
  from vantage6.common.client import deserialization
8
8
  from vantage6.common import serialization
9
9
  from vantage6.algorithm.tools.util import info, error, get_env_var
10
- from vantage6.algorithm.tools.exceptions import DeserializationException
10
+ from vantage6.algorithm.tools.exceptions import DeserializationError
11
11
 
12
12
 
13
13
  def wrap_algorithm(log_traceback: bool = True) -> None:
@@ -52,8 +52,11 @@ def wrap_algorithm(log_traceback: bool = True) -> None:
52
52
  exit(1)
53
53
  info(f"wrapper for {module}")
54
54
 
55
+ # Decode environment variables that are encoded by the node.
56
+ _decode_env_vars()
57
+
55
58
  # read input from the mounted input file.
56
- input_file = get_env_var("INPUT_FILE")
59
+ input_file = os.environ["INPUT_FILE"]
57
60
  info(f"Reading input file {input_file}")
58
61
  input_data = load_input(input_file)
59
62
 
@@ -63,7 +66,7 @@ def wrap_algorithm(log_traceback: bool = True) -> None:
63
66
 
64
67
  # write output from the method to mounted output file. Which will be
65
68
  # transferred back to the server by the node-instance.
66
- output_file = get_env_var("OUTPUT_FILE")
69
+ output_file = os.environ["OUTPUT_FILE"]
67
70
  info(f"Writing output to {output_file}")
68
71
 
69
72
  _write_output(output, output_file)
@@ -145,14 +148,14 @@ def load_input(input_file: str) -> Any:
145
148
 
146
149
  Raises
147
150
  ------
148
- DeserializationException
151
+ DeserializationError
149
152
  Failed to deserialize input data
150
153
  """
151
154
  with open(input_file, "rb") as fp:
152
155
  try:
153
156
  input_data = deserialization.deserialize(fp)
154
- except DeserializationException:
155
- raise DeserializationException("Could not deserialize input")
157
+ except DeserializationError:
158
+ raise DeserializationError("Could not deserialize input")
156
159
  return input_data
157
160
 
158
161
 
@@ -170,3 +173,16 @@ def _write_output(output: Any, output_file: str) -> None:
170
173
  with open(output_file, "wb") as fp:
171
174
  serialized = serialization.serialize(output)
172
175
  fp.write(serialized)
176
+
177
+
178
+ def _decode_env_vars() -> None:
179
+ """
180
+ Decode environment variables that are encoded by the node
181
+
182
+ Note that environment variables may be present that are not specific to vantage6,
183
+ such as HOME, PATH, etc. These are not encoded by the node and should not be
184
+ decoded here. The `get_env_var` function handles these properly so that the
185
+ original value is returned if the environment variable is not encoded.
186
+ """
187
+ for env_var in os.environ:
188
+ os.environ[env_var] = get_env_var(env_var)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vantage6-algorithm-tools
3
- Version: 4.3.4
3
+ Version: 4.4.0
4
4
  Summary: Vantage6 algorithm tools
5
5
  Home-page: https://github.com/vantage6/vantage6
6
6
  Requires-Python: >=3.6
@@ -3,7 +3,7 @@ pandas>=1.5.3
3
3
  PyJWT==2.6.0
4
4
  pyfiglet==0.8.post1
5
5
  SPARQLWrapper>=2.0.0
6
- vantage6-common==4.3.4
6
+ vantage6-common==4.4.0
7
7
 
8
8
  [dev]
9
9
  black
@@ -1,4 +0,0 @@
1
- class DeserializationException(Exception):
2
- """Raised when result deserialization fails."""
3
-
4
- pass