PyPI - vantage6-algorithm-tools - Versions diffs - 4.3.4__tar.gz → 4.4.0__tar.gz - Mend

vantage6-algorithm-tools 4.3.4tar.gz → 4.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vantage6-algorithm-tools
-Version: 4.3.4
+Version: 4.4.0
 Summary: Vantage6 algorithm tools
 Home-page: https://github.com/vantage6/vantage6
 Requires-Python: >=3.6

{vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/client/__init__.py RENAMED Viewed

@@ -258,9 +258,13 @@ class AlgorithmClient(ClientBase):
             self.parent.log.info("--> Attempting to decode results!")
             result = None
             if response.get("result"):
-                result = json_lib.loads(
-                    base64s_to_bytes(response.get("result")).decode()
-                )
+                try:
+                    result = json_lib.loads(
+                        base64s_to_bytes(response.get("result")).decode()
+                    )
+                except Exception as e:
+                    self.parent.log.error("Unable to load results")
+                    self.parent.log.debug(e)
             return result
         def from_task(self, task_id: int) -> list[Any]:

{vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/client/_version.py RENAMED Viewed

@@ -7,7 +7,7 @@ with open(os.path.join(here, "__build__")) as fp:
     __build__ = json.load(fp)
 # Module version
-version_info = (4, 3, 4, "final", __build__, 0)
+version_info = (4, 4, 0, "final", __build__, 0)
 # Module version stage suffix map
 _specifier_ = {"alpha": "a", "beta": "b", "candidate": "rc", "final": ""}

{vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/tools/decorators.py RENAMED Viewed

@@ -10,7 +10,7 @@ import pandas as pd
 from vantage6.algorithm.client import AlgorithmClient
 from vantage6.algorithm.tools.mock_client import MockAlgorithmClient
-from vantage6.algorithm.tools.util import info, error, warn, get_env_var
+from vantage6.algorithm.tools.util import info, error, warn
 from vantage6.algorithm.tools.wrappers import load_data
 from vantage6.algorithm.tools.preprocessing import preprocess_data
@@ -93,12 +93,12 @@ def _algorithm_client() -> callable:
             if mock_client is not None:
                 return func(mock_client, *args, **kwargs)
             # read server address from the environment
-            host = get_env_var("HOST")
-            port = get_env_var("PORT")
-            api_path = get_env_var("API_PATH")
+            host = os.environ["HOST"]
+            port = os.environ["PORT"]
+            api_path = os.environ["API_PATH"]
             # read token from the environment
-            token_file = get_env_var("TOKEN_FILE")
+            token_file = os.environ["TOKEN_FILE"]
             info("Reading token")
             with open(token_file) as fp:
                 token = fp.read().strip()
@@ -195,7 +195,7 @@ def data(number_of_databases: int = 1) -> callable:
                 # do any data preprocessing here
                 info(f"Applying preprocessing for database '{label}'")
-                env_prepro = get_env_var(f"{label.upper()}_PREPROCESSING")
+                env_prepro = os.environ.get(f"{label.upper()}_PREPROCESSING")
                 if env_prepro is not None:
                     preprocess = json.loads(env_prepro)
                     data_ = preprocess_data(data_, preprocess)
@@ -309,7 +309,7 @@ def metadata(func: callable) -> callable:
         >>> def my_algorithm(metadata: RunMetaData, <other arguments>):
         >>>     pass
         """
-        token_file = get_env_var("TOKEN_FILE")
+        token_file = os.environ["TOKEN_FILE"]
         info("Reading token")
         with open(token_file) as fp:
             token = fp.read().strip()
@@ -322,10 +322,10 @@ def metadata(func: callable) -> callable:
             node_id=payload["node_id"],
             collaboration_id=payload["collaboration_id"],
             organization_id=payload["organization_id"],
-            temporary_directory=Path(get_env_var("TEMPORARY_FOLDER")),
-            output_file=Path(get_env_var("OUTPUT_FILE")),
-            input_file=Path(get_env_var("INPUT_FILE")),
-            token_file=Path(get_env_var("TOKEN_FILE")),
+            temporary_directory=Path(os.environ["TEMPORARY_FOLDER"]),
+            output_file=Path(os.environ["OUTPUT_FILE"]),
+            input_file=Path(os.environ["INPUT_FILE"]),
+            token_file=Path(os.environ["TOKEN_FILE"]),
         )
         return func(metadata, *args, **kwargs)
@@ -355,11 +355,11 @@ def get_ohdsi_metadata(label: str) -> OHDSIMetaData:
     for var in expected_env_vars:
         _check_environment_var_exists_or_exit(f"{label_}_DB_PARAM_{var}")
-    tmp = Path(get_env_var("TEMPORARY_FOLDER"))
+    tmp = Path(os.environ["TEMPORARY_FOLDER"])
     metadata = OHDSIMetaData(
-        database=get_env_var(f"{label_}_DB_PARAM_CDM_DATABASE"),
-        cdm_schema=get_env_var(f"{label_}_DB_PARAM_CDM_SCHEMA"),
-        results_schema=get_env_var(f"{label_}_DB_PARAM_RESULTS_SCHEMA"),
+        database=os.environ[f"{label_}_DB_PARAM_CDM_DATABASE"],
+        cdm_schema=os.environ[f"{label_}_DB_PARAM_CDM_SCHEMA"],
+        results_schema=os.environ[f"{label_}_DB_PARAM_RESULTS_SCHEMA"],
         incremental_folder=tmp / "incremental",
         cohort_statistics_folder=tmp / "cohort_statistics",
         export_folder=tmp / "export",
@@ -417,10 +417,10 @@ def _create_omop_database_connection(label: str) -> callable:
         _check_environment_var_exists_or_exit(f"{label_}_DB_PARAM_{var}")
     info("Reading OHDSI environment variables")
-    dbms = get_env_var(f"{label_}_DB_PARAM_DBMS")
-    uri = get_env_var(f"{label_}_DATABASE_URI")
-    user = get_env_var(f"{label_}_DB_PARAM_USER")
-    password = get_env_var(f"{label_}_DB_PARAM_PASSWORD")
+    dbms = os.environ[f"{label_}_DB_PARAM_DBMS"]
+    uri = os.environ[f"{label_}_DATABASE_URI"]
+    user = os.environ[f"{label_}_DB_PARAM_USER"]
+    password = os.environ[f"{label_}_DB_PARAM_PASSWORD"]
     info(f" - dbms: {dbms}")
     info(f" - uri: {uri}")
     info(f" - user: {user}")
@@ -460,20 +460,20 @@ def _get_data_from_label(label: str) -> pd.DataFrame:
         Data from the database
     """
     # Load the input data from the input file - this may e.g. include the
-    database_uri = get_env_var(f"{label.upper()}_DATABASE_URI")
+    database_uri = os.environ[f"{label.upper()}_DATABASE_URI"]
     info(f"Using '{database_uri}' with label '{label}' as database")
     # Get the database type from the environment variable, this variable is
     # set by the vantage6 node based on its configuration file.
-    database_type = get_env_var(f"{label.upper()}_DATABASE_TYPE", "csv").lower()
+    database_type = os.environ.get(f"{label.upper()}_DATABASE_TYPE", "csv").lower()
     # Load the data based on the database type. Try to provide environment
     # variables that should be available for some data types.
     return load_data(
         database_uri,
         database_type,
-        query=get_env_var(f"{label.upper()}_QUERY"),
-        sheet_name=get_env_var(f"{label.upper()}_SHEET_NAME"),
+        query=os.environ.get(f"{label.upper()}_QUERY"),
+        sheet_name=os.environ.get(f"{label.upper()}_SHEET_NAME"),
     )
@@ -488,7 +488,7 @@ def _get_user_database_labels() -> list[str]:
     """
     # read the labels that the user requested, which is a comma
     # separated list of labels.
-    labels = get_env_var("USER_REQUESTED_DATABASE_LABELS")
+    labels = os.environ["USER_REQUESTED_DATABASE_LABELS"]
     return labels.split(",")

vantage6-algorithm-tools-4.4.0/vantage6/algorithm/tools/exceptions.py ADDED Viewed

@@ -0,0 +1,138 @@
+class AlgorithmError(Exception):
+    """Generic exception raised when an algorithm fails."""
+# ---------------- Privacy exceptions ----------------
+class PrivacyViolation(AlgorithmError):
+    """Generic exception raised for data privacy concerns."""
+class PrivacyThresholdViolation(PrivacyViolation):
+    """
+    Raised when privacy threshold is violated.
+    Example usage:
+    - The number of rows in the data is too low.
+    - Returning the results of the algorithm would violate privacy.
+    """
+# ---------------- Data exceptions ----------------
+class DataError(AlgorithmError):
+    """Generic error raised with data handling."""
+class DataReadError(DataError):
+    """Raised when data reading fails.
+    Example usage:
+    - File not found.
+    - File is not in the right format.
+    - File is not readable.
+    - File is empty.
+    """
+class DataTypeError(DataError):
+    """Raised when data type is invalid.
+    Example usage:
+    - String column is selected by user for numeric operation.
+    """
+# ---------------- Runtime exceptions ----------------
+class AlgorithmRuntimeError(AlgorithmError):
+    """Generic error raised when an algorithm fails at runtime."""
+class AlgorithmExecutionError(AlgorithmRuntimeError):
+    """Raised when algorithm function fails.
+    Use when the algorithm function raises an exception.
+    """
+class MaxIterationsReached(AlgorithmRuntimeError):
+    """Raised when the maximum number of iterations is reached."""
+class ConvergenceError(AlgorithmRuntimeError):
+    """Raised when the algorithm fails to converge."""
+# ---------------- Client exceptions ----------------
+class ClientError(AlgorithmError):
+    """Generic error raised when call to the algorithm client fails."""
+# the most common client errors are defined separately for clarity
+class SubtakCreationError(ClientError):
+    """Raised when subtask creation fails."""
+class CollectOrganizationError(ClientError):
+    """Raised when organization collection fails."""
+class CollectResultsError(ClientError):
+    """Raised when result collection fails."""
+# ---------------- Input exceptions ----------------
+class InputError(AlgorithmError):
+    """Generic error raised with algorithm input handling.
+    Example usage:
+    - User input is invalid.
+    - Subtask fails due to invalid input received from the parent task.
+    """
+class UserInputError(InputError):
+    """Raised when user input is invalid.
+    Example usage:
+    - User input is not in the expected format.
+    """
+class DeserializationError(InputError):
+    """Raised when result deserialization fails."""
+# TODO v5+ remove this alias, which is there for backwards compatibility
+DeserializationException = DeserializationError
+class EnvironmentVariableError(InputError):
+    """Raised when environment variable is not found."""
+# ---------------- Initialization exceptions ----------------
+class AlgorithmInitializationError(AlgorithmError):
+    """Generic error raised when algorithm initialization fails."""
+class AlgorithmModuleNotFoundError(AlgorithmInitializationError):
+    """
+    Raised when the algorithm module is not found.
+    Note that if this error is raised, the algorithm image is not built correctly.
+    """
+class MethodNotFoundError(AlgorithmInitializationError):
+    """
+    Raised when the algorithm method is not found.
+    This error may be raised if the user calls a non-existing method, or if the
+    algorithm image is not built correctly.
+    """

{vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/tools/util.py RENAMED Viewed

@@ -1,6 +1,8 @@
 import sys
 import os
 import base64
+import binascii
 from vantage6.common.globals import STRING_ENCODING, ENV_VAR_EQUALS_REPLACEMENT
@@ -40,6 +42,9 @@ def error(msg: str) -> None:
     sys.stdout.write(f"error > {msg}\n")
+# TODO v5+ move this function to wrap.py and no longer expose it to be used by
+# algorithms but as part of _decode_env_vars. It is kept here for backwards
+# compatibility with 4.2/4.3 algorithms
 def get_env_var(var_name: str, default: str | None = None) -> str:
     """
     Get the value of an environment variable. Environment variables are encoded
@@ -69,3 +74,6 @@ def get_env_var(var_name: str, default: str | None = None) -> str:
         return base64.b32decode(encoded_env_var_value).decode(STRING_ENCODING)
     except KeyError:
         return default
+    except binascii.Error:
+        # If the decoding fails, return the original value
+        return os.environ[var_name]

{vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6/algorithm/tools/wrap.py RENAMED Viewed

@@ -7,7 +7,7 @@ from typing import Any
 from vantage6.common.client import deserialization
 from vantage6.common import serialization
 from vantage6.algorithm.tools.util import info, error, get_env_var
-from vantage6.algorithm.tools.exceptions import DeserializationException
+from vantage6.algorithm.tools.exceptions import DeserializationError
 def wrap_algorithm(log_traceback: bool = True) -> None:
@@ -52,8 +52,11 @@ def wrap_algorithm(log_traceback: bool = True) -> None:
         exit(1)
     info(f"wrapper for {module}")
+    # Decode environment variables that are encoded by the node.
+    _decode_env_vars()
     # read input from the mounted input file.
-    input_file = get_env_var("INPUT_FILE")
+    input_file = os.environ["INPUT_FILE"]
     info(f"Reading input file {input_file}")
     input_data = load_input(input_file)
@@ -63,7 +66,7 @@ def wrap_algorithm(log_traceback: bool = True) -> None:
     # write output from the method to mounted output file. Which will be
     # transferred back to the server by the node-instance.
-    output_file = get_env_var("OUTPUT_FILE")
+    output_file = os.environ["OUTPUT_FILE"]
     info(f"Writing output to {output_file}")
     _write_output(output, output_file)
@@ -145,14 +148,14 @@ def load_input(input_file: str) -> Any:
     Raises
     ------
-    DeserializationException
+    DeserializationError
         Failed to deserialize input data
     """
     with open(input_file, "rb") as fp:
         try:
             input_data = deserialization.deserialize(fp)
-        except DeserializationException:
-            raise DeserializationException("Could not deserialize input")
+        except DeserializationError:
+            raise DeserializationError("Could not deserialize input")
     return input_data
@@ -170,3 +173,16 @@ def _write_output(output: Any, output_file: str) -> None:
     with open(output_file, "wb") as fp:
         serialized = serialization.serialize(output)
         fp.write(serialized)
+def _decode_env_vars() -> None:
+    """
+    Decode environment variables that are encoded by the node
+    Note that environment variables may be present that are not specific to vantage6,
+    such as HOME, PATH, etc. These are not encoded by the node and should not be
+    decoded here. The `get_env_var` function handles these properly so that the
+    original value is returned if the environment variable is not encoded.
+    """
+    for env_var in os.environ:
+        os.environ[env_var] = get_env_var(env_var)

{vantage6-algorithm-tools-4.3.4 → vantage6-algorithm-tools-4.4.0}/vantage6_algorithm_tools.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vantage6-algorithm-tools
-Version: 4.3.4
+Version: 4.4.0
 Summary: Vantage6 algorithm tools
 Home-page: https://github.com/vantage6/vantage6
 Requires-Python: >=3.6