PyPI - truefoundry - Versions diffs - 0.11.3rc2__py3-none-any.whl → 0.11.4__py3-none-any.whl - Mend

truefoundry 0.11.3rc2py3-none-any.whl → 0.11.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of truefoundry might be problematic. Click here for more details.

Files changed (8) hide show

truefoundry/ml/artifact/truefoundry_artifact_repo.py CHANGED Viewed

@@ -654,7 +654,7 @@ class MlFoundryArtifactsRepository:
                 artifact_identifier=self.artifact_identifier, paths=[remote_file_path]
             )[0]
-        if progress_bar is None or not progress_bar.disable:
+        if progress_bar is None or progress_bar.disable:
             logger.info("Downloading %s to %s", remote_file_path, local_path)
         if progress_bar is not None:

truefoundry/ml/integrations/__init__.py ADDED Viewed

File without changes

truefoundry/ml/integrations/huggingface/__init__.py ADDED Viewed

File without changes

truefoundry/ml/integrations/huggingface/trainer_callback.py ADDED Viewed

@@ -0,0 +1,198 @@
+import logging
+import math
+import os
+from typing import TYPE_CHECKING, Any, Dict, Optional
+import numpy as np
+from truefoundry import ml
+try:
+    from transformers.integrations.integration_utils import rewrite_logs
+    from transformers.trainer_callback import TrainerCallback
+except ImportError as e:
+    raise ImportError(
+        "Importing this module requires `transformers` to be installed"
+    ) from e
+if TYPE_CHECKING:
+    from transformers.trainer_callback import TrainerControl, TrainerState
+    from transformers.training_args import TrainingArguments
+    from truefoundry.ml import MlFoundryRun
+logger = logging.getLogger(__name__)
+class TrueFoundryMLCallback(TrainerCallback):
+    def __init__(
+        self,
+        run: "MlFoundryRun",
+        log_checkpoints: bool = True,
+        checkpoint_artifact_name: Optional[str] = None,
+        auto_end_run_on_train_end: bool = False,
+    ):
+        """
+        Args:
+            run: The run entity to log metrics to.
+            log_checkpoints: Whether to log checkpoints or not, defaults to True.
+            checkpoint_artifact_name: The name of the artifact to log checkpoints to, required if log_checkpoints is True.
+            auto_end_run_on_train_end: Whether to end the run automatically when training ends, defaults to False.
+            Usage:
+                from transformers import Trainer
+                from truefoundry.ml.integrations.huggingface.trainer_callback import TrueFoundryMLCallback
+                from truefoundry.ml import get_client
+                client = get_client()
+                run = client.create_run(ml_repo="my-ml-repo", run_name="my-run", auto_end=False)
+                callback = TrueFoundryMLCallback(
+                    run=run,
+                    log_checkpoints=True,
+                    checkpoint_artifact_name="my-checkpoint",
+                    auto_end_run_on_train_end=True,
+                )
+                trainer = Trainer(
+                    ...,
+                    callbacks=[callback]
+                )
+        """
+        self._run = run
+        self._log_checkpoints = log_checkpoints
+        if self._log_checkpoints and not checkpoint_artifact_name:
+            raise ValueError(
+                "`checkpoint_artifact_name` is required when `log_checkpoints` is True"
+            )
+        self._checkpoint_artifact_name = checkpoint_artifact_name
+        self._auto_end_run_on_train_end = auto_end_run_on_train_end
+    @classmethod
+    def with_managed_run(
+        cls,
+        ml_repo: str,
+        run_name: Optional[str] = None,
+        log_checkpoints: bool = True,
+        checkpoint_artifact_name: Optional[str] = None,
+        auto_end_run_on_train_end: bool = True,
+    ) -> "TrueFoundryMLCallback":
+        """
+        Args:
+            ml_repo: The name of the ML Repository to log metrics and data to.
+            run_name: The name of the run, if not provided, a random name will be generated.
+            log_checkpoints: Whether to log checkpoints or not, defaults to True.
+            checkpoint_artifact_name: The name of the artifact to log checkpoints to, required if log_checkpoints is True.
+            auto_end_run_on_train_end: Whether to end the run automatically when training ends, defaults to True.
+        Usage:
+            from transformers import Trainer
+            from truefoundry.ml.integrations.huggingface.trainer_callback import TrueFoundryMLCallback
+            callback = TrueFoundryMLCallback.with_managed_run(
+                ml_repo="my-ml-repo",
+                run_name="my-run",
+                log_checkpoints=True,
+                checkpoint_artifact_name="my-checkpoint",
+                auto_end_run_on_train_end=True,
+            )
+            trainer = Trainer(
+                ...,
+                callbacks=[callback]
+            )
+        """
+        run = ml.get_client().create_run(
+            ml_repo=ml_repo, run_name=run_name, auto_end=False
+        )
+        return cls(
+            run=run,
+            log_checkpoints=log_checkpoints,
+            checkpoint_artifact_name=checkpoint_artifact_name,
+            auto_end_run_on_train_end=auto_end_run_on_train_end,
+        )
+    def _drop_non_finite_values(self, dct: Dict[str, Any]) -> Dict[str, Any]:
+        sanitized = {}
+        for k, v in dct.items():
+            if isinstance(v, (int, float, np.integer, np.floating)) and math.isfinite(
+                v
+            ):
+                sanitized[k] = v
+            else:
+                logger.warning(
+                    f'Trainer is attempting to log a value of "{v}" of'
+                    f' type {type(v)} for key "{k}" as a metric.'
+                    " Mlfoundry's log_metric() only accepts finite float and"
+                    " int types so we dropped this attribute."
+                )
+        return sanitized
+    @property
+    def run(self) -> "MlFoundryRun":
+        return self._run
+    # noinspection PyMethodOverriding
+    def on_log(
+        self,
+        args: "TrainingArguments",
+        state: "TrainerState",
+        control: "TrainerControl",
+        logs: Optional[Dict[str, Any]] = None,
+        **kwargs,
+    ):
+        logs = logs or {}
+        if not state.is_world_process_zero:
+            return
+        metrics = self._drop_non_finite_values(logs)
+        self._run.log_metrics(rewrite_logs(metrics), step=state.global_step)
+    def on_save(
+        self,
+        args: "TrainingArguments",
+        state: "TrainerState",
+        control: "TrainerControl",
+        **kwargs,
+    ):
+        if not state.is_world_process_zero:
+            return
+        if not self._log_checkpoints:
+            return
+        if not self._checkpoint_artifact_name:
+            return
+        ckpt_dir = f"checkpoint-{state.global_step}"
+        artifact_path = os.path.join(args.output_dir, ckpt_dir)
+        description = None
+        _job_name = os.getenv("TFY_INTERNAL_COMPONENT_NAME")
+        _job_run_name = os.getenv("TFY_INTERNAL_JOB_RUN_NAME")
+        if _job_name:
+            description = f"Checkpoint from job={_job_name} run={_job_run_name}"
+        logger.info(f"Uploading checkpoint {ckpt_dir} ...")
+        metadata = {}
+        for log in state.log_history:
+            if isinstance(log, dict) and log.get("step") == state.global_step:
+                metadata = log.copy()
+        metadata = self._drop_non_finite_values(metadata)
+        self._run.log_artifact(
+            name=self._checkpoint_artifact_name,
+            artifact_paths=[(artifact_path, None)],
+            metadata=metadata,
+            step=state.global_step,
+            description=description,
+        )
+    def on_train_end(
+        self,
+        args: "TrainingArguments",
+        state: "TrainerState",
+        control: "TrainerControl",
+        **kwargs,
+    ):
+        """
+        Event called at the end of training.
+        """
+        if self._auto_end_run_on_train_end:
+            self._run.end()

{truefoundry-0.11.3rc2.dist-info → truefoundry-0.11.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: truefoundry
-Version: 0.11.3rc2
+Version: 0.11.4
 Summary: TrueFoundry CLI
 Author-email: TrueFoundry Team <abhishek@truefoundry.com>
 Requires-Python: <3.14,>=3.8.1

{truefoundry-0.11.3rc2.dist-info → truefoundry-0.11.4.dist-info}/RECORD RENAMED Viewed

@@ -349,7 +349,7 @@ truefoundry/ml/_autogen/models/schema.py,sha256=a_bp42MMPUbwO3407m0UW2W8EOhnxZXf
 truefoundry/ml/_autogen/models/signature.py,sha256=rBjpxUIsEeWM0sIyYG5uCJB18DKHR4k5yZw8TzuoP48,4987
 truefoundry/ml/_autogen/models/utils.py,sha256=c7RtSLXhOLcP8rjuUtfnMdaKVTZvvbsmw98gPAkAFrs,24371
 truefoundry/ml/artifact/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-truefoundry/ml/artifact/truefoundry_artifact_repo.py,sha256=hbgLxSoihkLVuICzRueuh8iAIc-yruCW5TuMXYQ-aCU,35692
+truefoundry/ml/artifact/truefoundry_artifact_repo.py,sha256=8BFKaXDxutw8bPJLnDI0bO0oNS_xJKo2ijubc2PLFsU,35688
 truefoundry/ml/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 truefoundry/ml/cli/cli.py,sha256=MwpY7z_NEeJE_XIP7XbZELjNeu2vpMmohttHCKDRk54,335
 truefoundry/ml/cli/utils.py,sha256=j6_mZ4Spn114mz3P4QQ8jx0tmorXIuyQnHXVUSDvZi4,1035
@@ -357,6 +357,9 @@ truefoundry/ml/cli/commands/__init__.py,sha256=diDUiRUX4l6TtNLI4iF-ZblczkELM7FRV
 truefoundry/ml/cli/commands/download.py,sha256=N9MhsEQ3U24v_OmnMZT8Q4SoAi38Sm7a21unrACOSDw,2573
 truefoundry/ml/cli/commands/model_init.py,sha256=INyUAU6hiFClI8cZqX5hgnrtNbeKxlZxrjFrjzStU18,2664
 truefoundry/ml/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+truefoundry/ml/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+truefoundry/ml/integrations/huggingface/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+truefoundry/ml/integrations/huggingface/trainer_callback.py,sha256=Zu5AUbH_ct8I1dHyNYJQZBj9Y__hKo0sc2OxpPXJARE,6952
 truefoundry/ml/log_types/__init__.py,sha256=g4u4D4Jaj0aBK5GtrLV88-qThKZR9pSZ17vFEkN-LmM,125
 truefoundry/ml/log_types/plot.py,sha256=LDh4uy6z2P_a2oPM2lc85c0lt8utVvunohzeMawFjZw,7572
 truefoundry/ml/log_types/pydantic_base.py,sha256=eBlw_AEyAz4iJKDP4zgJOCFWcldwQqpf7FADW1jzIQY,272
@@ -383,7 +386,7 @@ truefoundry/workflow/remote_filesystem/__init__.py,sha256=LQ95ViEjJ7Ts4JcCGOxMPs
 truefoundry/workflow/remote_filesystem/logger.py,sha256=em2l7D6sw7xTLDP0kQSLpgfRRCLpN14Qw85TN7ujQcE,1022
 truefoundry/workflow/remote_filesystem/tfy_signed_url_client.py,sha256=xcT0wQmQlgzcj0nP3tJopyFSVWT1uv3nhiTIuwfXYeg,12342
 truefoundry/workflow/remote_filesystem/tfy_signed_url_fs.py,sha256=nSGPZu0Gyd_jz0KsEE-7w_BmnTD8CVF1S8cUJoxaCbc,13305
-truefoundry-0.11.3rc2.dist-info/METADATA,sha256=JnuUsg_bJq6c07XAzjN-khBwx5sDL9nVkdA7NEGGvlk,2762
-truefoundry-0.11.3rc2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-truefoundry-0.11.3rc2.dist-info/entry_points.txt,sha256=xVjn7RMN-MW2-9f7YU-bBdlZSvvrwzhpX1zmmRmsNPU,98
-truefoundry-0.11.3rc2.dist-info/RECORD,,
+truefoundry-0.11.4.dist-info/METADATA,sha256=RD0XhZ5hvcV7BAguapQ9yYssfoEDIXwNUi11w5riKtc,2759
+truefoundry-0.11.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+truefoundry-0.11.4.dist-info/entry_points.txt,sha256=xVjn7RMN-MW2-9f7YU-bBdlZSvvrwzhpX1zmmRmsNPU,98
+truefoundry-0.11.4.dist-info/RECORD,,

{truefoundry-0.11.3rc2.dist-info → truefoundry-0.11.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{truefoundry-0.11.3rc2.dist-info → truefoundry-0.11.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

truefoundry 0.11.3rc2__py3-none-any.whl → 0.11.4__py3-none-any.whl

Potentially problematic release.

truefoundry 0.11.3rc2py3-none-any.whl → 0.11.4py3-none-any.whl