PyPI - triggerflow - Versions diffs - 0.1.4__py3-none-any.whl → 0.2.4__py3-none-any.whl - Mend

triggerflow 0.1.4py3-none-any.whl → 0.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

triggerflow/mlflow_wrapper.py CHANGED Viewed

@@ -1,19 +1,187 @@
 # trigger_mlflow.py
-import mlflow
-import mlflow.pyfunc
+import datetime
+import logging
+import os
 import tempfile
 from pathlib import Path
-from typing import Dict, Any
+from typing import Any
+import mlflow
+import mlflow.pyfunc
 from mlflow.tracking import MlflowClient
-from core import TriggerModel
+from .core import TriggerModel
+logger = logging.getLogger(__name__)
+def setup_mlflow(
+        mlflow_uri: str = None,
+        web_eos_url: str = None,
+        web_eos_path: str = None,
+        model_name: str = None,
+        experiment_name: str = None,
+        run_name: str = None,
+        experiment_id: str = None,
+        run_id: str = None,
+        creat_web_eos_dir: bool = False,
+        save_env_file: bool = False,
+        auto_configure: bool = False
+    ):
+    # Set the MLflow tracking URI
+    if mlflow_uri is None:
+        mlflow_uri = os.getenv('MLFLOW_URI', 'https://ngt.cern.ch/models')
+    mlflow.set_tracking_uri(mlflow_uri)
+    os.environ["MLFLOW_URI"] = mlflow_uri
+    logger.info(f"Using MLflow tracking URI: {mlflow_uri}")
+    # Set the model name
+    if model_name is None:
+        if os.getenv('MLFLOW_MODEL_NAME'):
+            model_name = os.getenv('MLFLOW_MODEL_NAME')
+        else:
+            model_name = os.getenv('CI_COMMIT_BRANCH', 'Test-Model')
+    os.environ["MLFLOW_MODEL_NAME"] = model_name
+    logger.info(f"Using model name: {model_name}")
+    # Set the experiment name
+    if experiment_name is None:
+        if os.getenv('MLFLOW_EXPERIMENT_NAME'):
+            experiment_name = os.getenv('MLFLOW_EXPERIMENT_NAME')
+        else:
+            experiment_name = os.getenv('CI_COMMIT_BRANCH', 'Test-Training-Torso')
+    os.environ["MLFLOW_EXPERIMENT_NAME"] = experiment_name
+    logger.info(f"Using experiment name: {experiment_name}")
+    # Set the run name
+    if run_name is None:
+        if os.getenv('CI') == 'true':
+            if os.getenv('CI_PARENT_PIPELINE_ID'):
+                run_name = f"{os.getenv('CI_PARENT_PIPELINE_ID')}-{os.getenv('CI_PIPELINE_ID')}"
+            else:
+                run_name = f"{os.getenv('CI_PIPELINE_ID')}"
+        else:
+            run_name = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+    os.environ["MLFLOW_RUN_NAME"] = run_name
+    logger.info(f"Using run name: {run_name}")
+    # Create a new experiment or get the existing one
+    if experiment_id is None:
+        if os.getenv("MLFLOW_EXPERIMENT_ID"):
+            experiment_id = os.getenv("MLFLOW_EXPERIMENT_ID")
+        else:
+            try:
+                experiment_id = mlflow.create_experiment(experiment_name)
+            except mlflow.exceptions.MlflowException:
+                experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id
+    check_experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id
+    if str(check_experiment_id) != str(experiment_id):
+        raise ValueError(f"Provided experiment_id {experiment_id} does not match the ID of experiment_name {experiment_name} ({check_experiment_id})")
+    # if mlflow.get_experiment_by_name(experiment_name).experiment_id is None:
+    #     experiment_id = mlflow.create_experiment(experiment_name)
+    # else:
+    #     experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id
+    mlflow.set_experiment(experiment_id=experiment_id)
+    os.environ["MLFLOW_EXPERIMENT_ID"] = experiment_id
+    logger.info(f"Using experiment ID: {experiment_id}")
+    # Start a new MLflow run
+    if run_id is None:
+        if os.getenv("MLFLOW_RUN_ID"):
+            run_id = os.getenv("MLFLOW_RUN_ID")
+        else:
+            with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run:
+                run_id = run.info.run_id
+    check_run_info = mlflow.get_run(run_id)
+    if str(check_run_info.info.experiment_id) != str(experiment_id):
+        raise ValueError(f"Provided run_id {run_id} does not belong to experiment_id {experiment_id} (found {check_run_info.info.experiment_id})")
+    os.environ["MLFLOW_RUN_ID"] = run_id
+    logger.info(f"Started run with ID: {run_id}")
+    if creat_web_eos_dir:
+        # Set the web_eos_url
+        if web_eos_url is None:
+            web_eos_url = os.getenv('WEB_EOS_URL', 'https://ngt-modeltraining.web.cern.ch/')
+        os.environ["WEB_EOS_URL"] = web_eos_url
+        logger.info(f"Using WEB_EOS_URL: {web_eos_url}")
+        # Set the web_eos_path
+        if web_eos_path is None:
+            web_eos_path = os.getenv('WEB_EOS_PATH', '/eos/user/m/mlflowngt/backend/www')
+        os.environ["WEB_EOS_PATH"] = web_eos_path
+        logger.info(f"Using WEB_EOS_PATH: {web_eos_path}")
+        # Create WebEOS experiment dir
+        web_eos_experiment_dir = os.path.join(web_eos_path, experiment_name, run_name)
+        web_eos_experiment_url = os.path.join(web_eos_url, experiment_name, run_name)
+        os.makedirs(web_eos_experiment_dir, exist_ok=True)
+        logger.info(f"Created WebEOS experiment directory: {web_eos_experiment_dir}")
+        logger.info(f"Using WebEOS experiment URL: {web_eos_experiment_url}")
+    else:
+        web_eos_url=None
+        web_eos_path=None
+        web_eos_experiment_dir=None
+        web_eos_experiment_url=None
+    # Save environment variables to a file for later steps in CI/CD pipelines
+    if save_env_file and os.getenv("CI") == "true":
+        logger.info(f"Saving MLflow environment variables to {os.getenv('CI_ENV_FILE', 'mlflow.env')}")
+        with open(os.getenv('CI_ENV_FILE', 'mlflow.env'), 'a') as f:
+            f.write(f"MLFLOW_URI={mlflow_uri}\n")
+            f.write(f"MLFLOW_MODEL_NAME={model_name}\n")
+            f.write(f"MLFLOW_EXPERIMENT_NAME={experiment_name}\n")
+            f.write(f"MLFLOW_RUN_NAME={run_name}\n")
+            f.write(f"MLFLOW_EXPERIMENT_ID={experiment_id}\n")
+            f.write(f"MLFLOW_RUN_ID={run_id}\n")
+            if creat_web_eos_dir:
+                f.write(f"WEB_EOS_URL={web_eos_url}\n")
+                f.write(f"WEB_EOS_PATH={web_eos_path}\n")
+                f.write(f"WEB_EOS_EXPERIMENT_DIR={web_eos_experiment_dir}\n")
+                f.write(f"WEB_EOS_EXPERIMENT_URL={web_eos_experiment_url}\n")
+            if auto_configure:
+                logger.info("Auto_configure is set to true. Exporting AUTO_CONFIGURE=true")
+                f.write("AUTO_CONFIGURE=true\n")
+    return {
+        "experiment_name": experiment_name,
+        "run_name": run_name,
+        "experiment_id": experiment_id,
+        "run_id": run_id,
+        "mlflow_uri": mlflow_uri,
+        "model_name": model_name,
+        "web_eos_url": web_eos_url,
+        "web_eos_path": web_eos_path,
+        "web_eos_experiment_dir": web_eos_experiment_dir,
+        "web_eos_experiment_url": web_eos_experiment_url,
+    }
+if os.getenv("AUTO_CONFIGURE") == "true":
+    logger.info("AUTO_CONFIGURE is true and running in CI environment. Setting up mlflow...")
+    setup_mlflow()
+else:
+    logger.info("AUTO_CONFIGURE is not set. Skipping mlflow run setup")
 class MLflowWrapper(mlflow.pyfunc.PythonModel):
     """PyFunc wrapper for TriggerModel; backend can be set at runtime."""
     def load_context(self, context):
-        archive_path = Path(context.artifacts["trigger_model"])
+        archive_path = Path(context.artifacts["triggerflow"])
         self.model = TriggerModel.load(archive_path)
-        self.backend = "software"
+        self.backend = "software"
     def predict(self, context, model_input):
         if self.backend == "software":
@@ -35,25 +203,28 @@ class MLflowWrapper(mlflow.pyfunc.PythonModel):
         return {"error": "Model info not available"}
-def _get_pip_requirements(trigger_model: TriggerModel) -> list:
+def _get_pip_requirements(triggerflow: TriggerModel) -> list:
     requirements = ["numpy"]
-    if trigger_model.ml_backend == "keras":
+    if triggerflow.ml_backend == "keras":
         requirements.extend(["tensorflow", "keras"])
-    elif trigger_model.ml_backend == "xgboost":
+    elif triggerflow.ml_backend == "xgboost":
         requirements.append("xgboost")
-    if trigger_model.compiler == "hls4ml":
+    if triggerflow.compiler == "hls4ml":
         requirements.append("hls4ml")
-    elif trigger_model.compiler == "conifer":
+    elif triggerflow.compiler == "conifer":
         requirements.append("conifer")
-    if hasattr(trigger_model, "model_qonnx") and trigger_model.model_qonnx is not None:
+    if hasattr(triggerflow, "model_qonnx") and triggerflow.model_qonnx is not None:
         requirements.append("qonnx")
     return requirements
-def log_model(trigger_model: TriggerModel, registered_model_name: str, artifact_path: str = "TriggerModel"):
+def log_model(triggerflow: TriggerModel, registered_model_name: str, artifact_path: str = "TriggerModel"):
     """Log a TriggerModel as a PyFunc model and register it in the Model Registry."""
     if not registered_model_name:
-        raise ValueError("registered_model_name must be provided and non-empty")
+        if not os.getenv("MLFLOW_MODEL_NAME"):
+            raise ValueError("registered_model_name must be provided and non-empty")
+        else:
+            registered_model_name = os.getenv("MLFLOW_MODEL_NAME")
     if mlflow.active_run() is None:
         raise RuntimeError("No active MLflow run. Start a run before logging.")
@@ -61,13 +232,13 @@ def log_model(trigger_model: TriggerModel, registered_model_name: str, artifact_
     run = mlflow.active_run()
     with tempfile.TemporaryDirectory() as tmpdir:
         archive_path = Path(tmpdir) / "triggermodel.tar.xz"
-        trigger_model.save(archive_path)
+        triggerflow.save(archive_path)
         mlflow.pyfunc.log_model(
             artifact_path=artifact_path,
             python_model=MLflowWrapper(),
-            artifacts={"trigger_model": str(archive_path)},
-            pip_requirements=_get_pip_requirements(trigger_model)
+            artifacts={"triggerflow": str(archive_path)},
+            pip_requirements=_get_pip_requirements(triggerflow)
         )
         # register model (always required)
@@ -89,11 +260,11 @@ def load_model(model_uri: str) -> mlflow.pyfunc.PyFuncModel:
 def load_full_model(model_uri: str) -> TriggerModel:
     local_path = mlflow.artifacts.download_artifacts(model_uri)
-    archive_path = Path(local_path) / "trigger_model" / "triggermodel.tar.xz"
+    archive_path = Path(local_path) / "triggerflow" / "triggermodel.tar.xz"
     return TriggerModel.load(archive_path)
-def get_model_info(model_uri: str) -> Dict[str, Any]:
+def get_model_info(model_uri: str) -> dict[str, Any]:
     model = mlflow.pyfunc.load_model(model_uri)
     if hasattr(model._model_impl, "get_model_info"):
         return model._model_impl.get_model_info()

triggerflow/starter/.gitignore ADDED Viewed

@@ -0,0 +1,143 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+.vscode/
+info.log
+# IntelliJ
+.idea/
+*.iml
+out/
+.idea_modules/

triggerflow/starter/README.md ADDED Viewed

File without changes

triggerflow/starter/cookiecutter.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+    "project_name": "triggerflow-pipeline",
+    "repo_name": "{{ cookiecutter.project_name.strip().replace(' ', '-').replace('_', '-').lower() }}",
+    "python_package": "{{ cookiecutter.project_name.strip().replace(' ', '_').replace('-', '_').lower() }}"
+}

triggerflow/starter/prompts.yml ADDED Viewed

@@ -0,0 +1,9 @@
+project_name:
+  title: "Project Name"
+  text: |
+    Please enter a human readable name for your new project.
+    Spaces, hyphens, and underscores are allowed.
+  regex_validator: "^[\\w -]{2,}$"
+  error_message: |
+    It must contain only alphanumeric symbols, spaces, underscores and hyphens and
+    be at least 2 characters long.

triggerflow/starter/{{ cookiecutter.repo_name }}/.dvcignore ADDED Viewed

@@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore

triggerflow/starter/{{ cookiecutter.repo_name }}/.gitignore ADDED Viewed

@@ -0,0 +1,143 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+.vscode/
+info.log
+# IntelliJ
+.idea/
+*.iml
+out/
+.idea_modules/

triggerflow/starter/{{ cookiecutter.repo_name }}/.gitlab-ci.yml ADDED Viewed

@@ -0,0 +1,56 @@
+image: continuumio/miniconda3
+stages:
+  - load
+  - preprocess
+  - train
+  - validate
+  - compile
+variables:
+  KEDRO_ENV: "base" # TODO: add production env
+before_script:
+  - eval "$(conda shell.bash hook)"
+  - conda env create --file=environment.yml
+  - conda activate triggerflow
+load_data:
+  stage: load
+  script:
+    - kedro run --pipeline=load_data
+  artifacts:
+    paths:
+      - data/02_loaded/
+preprocess_data:
+  stage: preprocess
+  script:
+    - kedro run --pipeline=data_processing
+  artifacts:
+    paths:
+      - data/03_preprocessed/
+train_model:
+  stage: train
+  script:
+    - kedro run --pipeline=model_training
+  artifacts:
+    paths:
+      - data/04_models/
+validate_model:
+  stage: validate
+  script:
+    - kedro run --pipeline=model_validation
+  artifacts:
+    paths:
+      - data/05_validation/
+compile:
+  stage: compile
+  script:
+    - kedro run --pipeline=compile
+  artifacts:
+    paths:
+      - data/06_compile/

triggerflow/starter/{{ cookiecutter.repo_name }}/README.md ADDED Viewed

@@ -0,0 +1,29 @@
+# {{ cookiecutter.project_name }}
+- conda env create --file=environment.yml
+- conda activate {{ cookiecutter.project_name }}
+## Data versioning
+When a dataset changes one can do (TODO: add this to pipeline to compare the hash):
+- dvc add data/01_raw/companies.csv
+- git add data/01_raw/companies.csv.dvc
+- git commit -m "Track dataset changes with DVC"
+## Run CI local
+- brew install gitlab-ci-local
+- gitlab-ci-local --list
+- gitlab-ci-local
+## ToDos:
+- move functionality of uhh_mlatl1 to pipeline
+- if case in base dataloader for classification or not
+- add model evaluation steps
+- automation of dvc in CI pipeline
+- move {{ cookiecutter.project_name }} meta data json to dvc
+- add linting and type checking
+- write tests
+- write out reporting / logging / plots etc.
+- track plots with dvc?
+- cross check pipeline afterwards with {{ cookiecutter.project_name }} team
+- make starter pipeline as template
+- add {{ cookiecutter.project_name }} model

triggerflow/starter/{{ cookiecutter.repo_name }}/conf/README.md ADDED Viewed

@@ -0,0 +1,26 @@
+# What is this for?
+This folder should be used to store configuration files used by Kedro or by separate tools.
+This file can be used to provide users with instructions for how to reproduce local configuration with their own credentials. You can edit the file however you like, but you may wish to retain the information below and add your own section in the [Instructions](#Instructions) section.
+## Local configuration
+The `local` folder should be used for configuration that is either user-specific (e.g. IDE configuration) or protected (e.g. security keys).
+> *Note:* Please do not check in any local configuration to version control.
+## Base configuration
+The `base` folder is for shared configuration, such as non-sensitive and project-related configuration that may be shared across team members.
+WARNING: Please do not put access credentials in the base configuration folder.
+## Instructions
+## Need help?
+[Find out more about configuration from the Kedro documentation](https://docs.kedro.org/en/stable/kedro_project_setup/configuration.html).

triggerflow 0.1.4__py3-none-any.whl → 0.2.4__py3-none-any.whl

triggerflow 0.1.4py3-none-any.whl → 0.2.4py3-none-any.whl