PyPI - triggerflow - Versions diffs - 0.1.12__py3-none-any.whl → 0.2__py3-none-any.whl - Mend

triggerflow 0.1.12py3-none-any.whl → 0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

triggerflow/mlflow_wrapper.py CHANGED Viewed

@@ -1,34 +1,40 @@
 # trigger_mlflow.py
-import mlflow
+import datetime
+import logging
 import os
-import mlflow.pyfunc
 import tempfile
 from pathlib import Path
-from typing import Dict, Any
+from typing import Any
+import mlflow
+import mlflow.pyfunc
 from mlflow.tracking import MlflowClient
 from .core import TriggerModel
+logger = logging.getLogger(__name__)
-def setup_mlflow(mlflow_uri: str = None,
-                 web_eos_url: str = None,
-                 web_eos_path: str = None,
-                 model_name: str = None,
-                 experiment_name: str = None,
-                 run_name: str = None,
-                 experiment_id: str = None,
-                 run_id: str = None,
-                 creat_web_eos_dir: bool = False,
-                 save_env_file: bool = False,
-                 auto_configure: bool = False
-                 ):
+def setup_mlflow(
+        mlflow_uri: str = None,
+        web_eos_url: str = None,
+        web_eos_path: str = None,
+        model_name: str = None,
+        experiment_name: str = None,
+        run_name: str = None,
+        experiment_id: str = None,
+        run_id: str = None,
+        creat_web_eos_dir: bool = False,
+        save_env_file: bool = False,
+        auto_configure: bool = False
+    ):
     # Set the MLflow tracking URI
     if mlflow_uri is None:
         mlflow_uri = os.getenv('MLFLOW_URI', 'https://ngt.cern.ch/models')
     mlflow.set_tracking_uri(mlflow_uri)
     os.environ["MLFLOW_URI"] = mlflow_uri
-    print(f"Using MLflow tracking URI: {mlflow_uri}")
+    logger.info(f"Using MLflow tracking URI: {mlflow_uri}")
     # Set the model name
     if model_name is None:
@@ -37,7 +43,7 @@ def setup_mlflow(mlflow_uri: str = None,
         else:
             model_name = os.getenv('CI_COMMIT_BRANCH', 'Test-Model')
     os.environ["MLFLOW_MODEL_NAME"] = model_name
-    print(f"Using model name: {model_name}")
+    logger.info(f"Using model name: {model_name}")
     # Set the experiment name
@@ -47,7 +53,7 @@ def setup_mlflow(mlflow_uri: str = None,
         else:
             experiment_name = os.getenv('CI_COMMIT_BRANCH', 'Test-Training-Torso')
     os.environ["MLFLOW_EXPERIMENT_NAME"] = experiment_name
-    print(f"Using experiment name: {experiment_name}")
+    logger.info(f"Using experiment name: {experiment_name}")
     # Set the run name
@@ -58,10 +64,9 @@ def setup_mlflow(mlflow_uri: str = None,
             else:
                 run_name = f"{os.getenv('CI_PIPELINE_ID')}"
         else:
-            import datetime
             run_name = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
     os.environ["MLFLOW_RUN_NAME"] = run_name
-    print(f"Using run name: {run_name}")
+    logger.info(f"Using run name: {run_name}")
     # Create a new experiment or get the existing one
@@ -73,7 +78,7 @@ def setup_mlflow(mlflow_uri: str = None,
                 experiment_id = mlflow.create_experiment(experiment_name)
             except mlflow.exceptions.MlflowException:
                 experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id
     check_experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id
     if str(check_experiment_id) != str(experiment_id):
         raise ValueError(f"Provided experiment_id {experiment_id} does not match the ID of experiment_name {experiment_name} ({check_experiment_id})")
@@ -85,7 +90,7 @@ def setup_mlflow(mlflow_uri: str = None,
     mlflow.set_experiment(experiment_id=experiment_id)
     os.environ["MLFLOW_EXPERIMENT_ID"] = experiment_id
-    print(f"Using experiment ID: {experiment_id}")
+    logger.info(f"Using experiment ID: {experiment_id}")
     # Start a new MLflow run
@@ -99,9 +104,9 @@ def setup_mlflow(mlflow_uri: str = None,
     check_run_info = mlflow.get_run(run_id)
     if str(check_run_info.info.experiment_id) != str(experiment_id):
         raise ValueError(f"Provided run_id {run_id} does not belong to experiment_id {experiment_id} (found {check_run_info.info.experiment_id})")
     os.environ["MLFLOW_RUN_ID"] = run_id
-    print(f"Started run with ID: {run_id}")
+    logger.info(f"Started run with ID: {run_id}")
     if creat_web_eos_dir:
@@ -109,21 +114,21 @@ def setup_mlflow(mlflow_uri: str = None,
         if web_eos_url is None:
             web_eos_url = os.getenv('WEB_EOS_URL', 'https://ngt-modeltraining.web.cern.ch/')
         os.environ["WEB_EOS_URL"] = web_eos_url
-        print(f"Using WEB_EOS_URL: {web_eos_url}")
+        logger.info(f"Using WEB_EOS_URL: {web_eos_url}")
         # Set the web_eos_path
         if web_eos_path is None:
             web_eos_path = os.getenv('WEB_EOS_PATH', '/eos/user/m/mlflowngt/backend/www')
         os.environ["WEB_EOS_PATH"] = web_eos_path
-        print(f"Using WEB_EOS_PATH: {web_eos_path}")
+        logger.info(f"Using WEB_EOS_PATH: {web_eos_path}")
         # Create WebEOS experiment dir
         web_eos_experiment_dir = os.path.join(web_eos_path, experiment_name, run_name)
         web_eos_experiment_url = os.path.join(web_eos_url, experiment_name, run_name)
         os.makedirs(web_eos_experiment_dir, exist_ok=True)
-        print(f"Created WebEOS experiment directory: {web_eos_experiment_dir}")
-        print(f"Using WebEOS experiment URL: {web_eos_experiment_url}")
+        logger.info(f"Created WebEOS experiment directory: {web_eos_experiment_dir}")
+        logger.info(f"Using WebEOS experiment URL: {web_eos_experiment_url}")
     else:
         web_eos_url=None
         web_eos_path=None
@@ -133,7 +138,7 @@ def setup_mlflow(mlflow_uri: str = None,
     # Save environment variables to a file for later steps in CI/CD pipelines
     if save_env_file and os.getenv("CI") == "true":
-        print(f"Saving MLflow environment variables to {os.getenv('CI_ENV_FILE', 'mlflow.env')}")
+        logger.info(f"Saving MLflow environment variables to {os.getenv('CI_ENV_FILE', 'mlflow.env')}")
         with open(os.getenv('CI_ENV_FILE', 'mlflow.env'), 'a') as f:
             f.write(f"MLFLOW_URI={mlflow_uri}\n")
             f.write(f"MLFLOW_MODEL_NAME={model_name}\n")
@@ -149,8 +154,8 @@ def setup_mlflow(mlflow_uri: str = None,
                 f.write(f"WEB_EOS_EXPERIMENT_URL={web_eos_experiment_url}\n")
             if auto_configure:
-                print("Auto_configure is set to true. Exporting AUTO_CONFIGURE=true")
-                f.write(f"AUTO_CONFIGURE=true\n")
+                logger.info("Auto_configure is set to true. Exporting AUTO_CONFIGURE=true")
+                f.write("AUTO_CONFIGURE=true\n")
     return {
         "experiment_name": experiment_name,
@@ -166,17 +171,17 @@ def setup_mlflow(mlflow_uri: str = None,
     }
 if os.getenv("AUTO_CONFIGURE") == "true":
-    print("AUTO_CONFIGURE is true and running in CI environment. Setting up mlflow...")
+    logger.info("AUTO_CONFIGURE is true and running in CI environment. Setting up mlflow...")
     setup_mlflow()
 else:
-    print("AUTO_CONFIGURE is not set. Skipping mlflow run setup")
+    logger.info("AUTO_CONFIGURE is not set. Skipping mlflow run setup")
 class MLflowWrapper(mlflow.pyfunc.PythonModel):
     """PyFunc wrapper for TriggerModel; backend can be set at runtime."""
     def load_context(self, context):
-        archive_path = Path(context.artifacts["trigger_model"])
+        archive_path = Path(context.artifacts["triggerflow"])
         self.model = TriggerModel.load(archive_path)
-        self.backend = "software"
+        self.backend = "software"
     def predict(self, context, model_input):
         if self.backend == "software":
@@ -198,22 +203,22 @@ class MLflowWrapper(mlflow.pyfunc.PythonModel):
         return {"error": "Model info not available"}
-def _get_pip_requirements(trigger_model: TriggerModel) -> list:
+def _get_pip_requirements(triggerflow: TriggerModel) -> list:
     requirements = ["numpy"]
-    if trigger_model.ml_backend == "keras":
+    if triggerflow.ml_backend == "keras":
         requirements.extend(["tensorflow", "keras"])
-    elif trigger_model.ml_backend == "xgboost":
+    elif triggerflow.ml_backend == "xgboost":
         requirements.append("xgboost")
-    if trigger_model.compiler == "hls4ml":
+    if triggerflow.compiler == "hls4ml":
         requirements.append("hls4ml")
-    elif trigger_model.compiler == "conifer":
+    elif triggerflow.compiler == "conifer":
         requirements.append("conifer")
-    if hasattr(trigger_model, "model_qonnx") and trigger_model.model_qonnx is not None:
+    if hasattr(triggerflow, "model_qonnx") and triggerflow.model_qonnx is not None:
         requirements.append("qonnx")
     return requirements
-def log_model(trigger_model: TriggerModel, registered_model_name: str = None, artifact_path: str = "TriggerModel"):
+def log_model(triggerflow: TriggerModel, registered_model_name: str, artifact_path: str = "TriggerModel"):
     """Log a TriggerModel as a PyFunc model and register it in the Model Registry."""
     if not registered_model_name:
         if not os.getenv("MLFLOW_MODEL_NAME"):
@@ -227,13 +232,13 @@ def log_model(trigger_model: TriggerModel, registered_model_name: str = None, ar
     run = mlflow.active_run()
     with tempfile.TemporaryDirectory() as tmpdir:
         archive_path = Path(tmpdir) / "triggermodel.tar.xz"
-        trigger_model.save(archive_path)
+        triggerflow.save(archive_path)
         mlflow.pyfunc.log_model(
             artifact_path=artifact_path,
             python_model=MLflowWrapper(),
-            artifacts={"trigger_model": str(archive_path)},
-            pip_requirements=_get_pip_requirements(trigger_model)
+            artifacts={"triggerflow": str(archive_path)},
+            pip_requirements=_get_pip_requirements(triggerflow)
         )
         # register model (always required)
@@ -255,11 +260,11 @@ def load_model(model_uri: str) -> mlflow.pyfunc.PyFuncModel:
 def load_full_model(model_uri: str) -> TriggerModel:
     local_path = mlflow.artifacts.download_artifacts(model_uri)
-    archive_path = Path(local_path) / "trigger_model" / "triggermodel.tar.xz"
+    archive_path = Path(local_path) / "triggerflow" / "triggermodel.tar.xz"
     return TriggerModel.load(archive_path)
-def get_model_info(model_uri: str) -> Dict[str, Any]:
+def get_model_info(model_uri: str) -> dict[str, Any]:
     model = mlflow.pyfunc.load_model(model_uri)
     if hasattr(model._model_impl, "get_model_info"):
         return model._model_impl.get_model_info()

triggerflow/starter/.gitignore ADDED Viewed

@@ -0,0 +1,143 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+.vscode/
+info.log
+# IntelliJ
+.idea/
+*.iml
+out/
+.idea_modules/

triggerflow/starter/README.md ADDED Viewed

File without changes

triggerflow/starter/cookiecutter.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+    "project_name": "triggerflow-pipeline",
+    "repo_name": "{{ cookiecutter.project_name.strip().replace(' ', '-').replace('_', '-').lower() }}",
+    "python_package": "{{ cookiecutter.project_name.strip().replace(' ', '_').replace('-', '_').lower() }}"
+}

triggerflow/starter/prompts.yml ADDED Viewed

@@ -0,0 +1,9 @@
+project_name:
+  title: "Project Name"
+  text: |
+    Please enter a human readable name for your new project.
+    Spaces, hyphens, and underscores are allowed.
+  regex_validator: "^[\\w -]{2,}$"
+  error_message: |
+    It must contain only alphanumeric symbols, spaces, underscores and hyphens and
+    be at least 2 characters long.

triggerflow/starter/{{ cookiecutter.repo_name }}/.dvcignore ADDED Viewed

@@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore

triggerflow/starter/{{ cookiecutter.repo_name }}/.gitignore ADDED Viewed

@@ -0,0 +1,143 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+.vscode/
+info.log
+# IntelliJ
+.idea/
+*.iml
+out/
+.idea_modules/

triggerflow/starter/{{ cookiecutter.repo_name }}/.gitlab-ci.yml ADDED Viewed

@@ -0,0 +1,56 @@
+image: continuumio/miniconda3
+stages:
+  - load
+  - preprocess
+  - train
+  - validate
+  - compile
+variables:
+  KEDRO_ENV: "base" # TODO: add production env
+before_script:
+  - eval "$(conda shell.bash hook)"
+  - conda env create --file=environment.yml
+  - conda activate triggerflow
+load_data:
+  stage: load
+  script:
+    - kedro run --pipeline=load_data
+  artifacts:
+    paths:
+      - data/02_loaded/
+preprocess_data:
+  stage: preprocess
+  script:
+    - kedro run --pipeline=data_processing
+  artifacts:
+    paths:
+      - data/03_preprocessed/
+train_model:
+  stage: train
+  script:
+    - kedro run --pipeline=model_training
+  artifacts:
+    paths:
+      - data/04_models/
+validate_model:
+  stage: validate
+  script:
+    - kedro run --pipeline=model_validation
+  artifacts:
+    paths:
+      - data/05_validation/
+compile:
+  stage: compile
+  script:
+    - kedro run --pipeline=compile
+  artifacts:
+    paths:
+      - data/06_compile/

triggerflow/starter/{{ cookiecutter.repo_name }}/README.md ADDED Viewed

@@ -0,0 +1,29 @@
+# {{ cookiecutter.project_name }}
+- conda env create --file=environment.yml
+- conda activate {{ cookiecutter.project_name }}
+## Data versioning
+When a dataset changes one can do (TODO: add this to pipeline to compare the hash):
+- dvc add data/01_raw/companies.csv
+- git add data/01_raw/companies.csv.dvc
+- git commit -m "Track dataset changes with DVC"
+## Run CI local
+- brew install gitlab-ci-local
+- gitlab-ci-local --list
+- gitlab-ci-local
+## ToDos:
+- move functionality of uhh_mlatl1 to pipeline
+- if case in base dataloader for classification or not
+- add model evaluation steps
+- automation of dvc in CI pipeline
+- move {{ cookiecutter.project_name }} meta data json to dvc
+- add linting and type checking
+- write tests
+- write out reporting / logging / plots etc.
+- track plots with dvc?
+- cross check pipeline afterwards with {{ cookiecutter.project_name }} team
+- make starter pipeline as template
+- add {{ cookiecutter.project_name }} model

triggerflow/starter/{{ cookiecutter.repo_name }}/conf/README.md ADDED Viewed

@@ -0,0 +1,26 @@
+# What is this for?
+This folder should be used to store configuration files used by Kedro or by separate tools.
+This file can be used to provide users with instructions for how to reproduce local configuration with their own credentials. You can edit the file however you like, but you may wish to retain the information below and add your own section in the [Instructions](#Instructions) section.
+## Local configuration
+The `local` folder should be used for configuration that is either user-specific (e.g. IDE configuration) or protected (e.g. security keys).
+> *Note:* Please do not check in any local configuration to version control.
+## Base configuration
+The `base` folder is for shared configuration, such as non-sensitive and project-related configuration that may be shared across team members.
+WARNING: Please do not put access credentials in the base configuration folder.
+## Instructions
+## Need help?
+[Find out more about configuration from the Kedro documentation](https://docs.kedro.org/en/stable/kedro_project_setup/configuration.html).

triggerflow 0.1.12__py3-none-any.whl → 0.2__py3-none-any.whl

triggerflow 0.1.12py3-none-any.whl → 0.2py3-none-any.whl