truefoundry 0.11.3rc2__py3-none-any.whl → 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truefoundry might be problematic. Click here for more details.
- truefoundry/deploy/_autogen/models.py +10 -9
- truefoundry/ml/artifact/truefoundry_artifact_repo.py +1 -1
- truefoundry/ml/integrations/__init__.py +0 -0
- truefoundry/ml/integrations/huggingface/__init__.py +0 -0
- truefoundry/ml/integrations/huggingface/trainer_callback.py +198 -0
- {truefoundry-0.11.3rc2.dist-info → truefoundry-0.11.5.dist-info}/METADATA +2 -2
- {truefoundry-0.11.3rc2.dist-info → truefoundry-0.11.5.dist-info}/RECORD +9 -6
- {truefoundry-0.11.3rc2.dist-info → truefoundry-0.11.5.dist-info}/WHEEL +0 -0
- {truefoundry-0.11.3rc2.dist-info → truefoundry-0.11.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# generated by datamodel-codegen:
|
|
2
2
|
# filename: application.json
|
|
3
|
-
# timestamp: 2025-
|
|
3
|
+
# timestamp: 2025-08-06T19:58:42+00:00
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
@@ -157,8 +157,8 @@ class CanaryStep(BaseModel):
|
|
|
157
157
|
...,
|
|
158
158
|
description="Percentage of total traffic to be shifted to the canary release.\nThe rest will continue to go to the existing deployment",
|
|
159
159
|
)
|
|
160
|
-
pause_duration: conint(ge=0) = Field(
|
|
161
|
-
|
|
160
|
+
pause_duration: Optional[conint(ge=0)] = Field(
|
|
161
|
+
None,
|
|
162
162
|
description="Duration for which to pause the release. The release process will wait for these seconds before proceeding to the next step.\nIf this is not set, the step will pause indefinitely on this step",
|
|
163
163
|
)
|
|
164
164
|
|
|
@@ -1088,11 +1088,11 @@ class TrueFoundryInteractiveLogin(BaseModel):
|
|
|
1088
1088
|
|
|
1089
1089
|
|
|
1090
1090
|
class VolumeBrowser(BaseModel):
|
|
1091
|
-
username: constr(regex=r"^[a-z][a-z0-9]{1,8}[a-z0-9]$") = Field(
|
|
1092
|
-
|
|
1091
|
+
username: Optional[constr(regex=r"^[a-z][a-z0-9]{1,8}[a-z0-9]$")] = Field(
|
|
1092
|
+
None, description="Username for logging in the volume browser."
|
|
1093
1093
|
)
|
|
1094
|
-
password_secret_fqn: constr(regex=r"^tfy-secret:\/\/.+:.+:.+$") = Field(
|
|
1095
|
-
|
|
1094
|
+
password_secret_fqn: Optional[constr(regex=r"^tfy-secret:\/\/.+:.+:.+$")] = Field(
|
|
1095
|
+
None,
|
|
1096
1096
|
description="TFY Secret containing the password for logging in the volume browser.",
|
|
1097
1097
|
)
|
|
1098
1098
|
endpoint: Endpoint
|
|
@@ -1282,11 +1282,11 @@ class HealthProbe(BaseModel):
|
|
|
1282
1282
|
1,
|
|
1283
1283
|
description="Time to wait for a response from the endpoint before considering it down",
|
|
1284
1284
|
)
|
|
1285
|
-
success_threshold: conint(ge=1, le=
|
|
1285
|
+
success_threshold: conint(ge=1, le=5000) = Field(
|
|
1286
1286
|
1,
|
|
1287
1287
|
description="Number of successful responses from the endpoint before container is considered healthy",
|
|
1288
1288
|
)
|
|
1289
|
-
failure_threshold: conint(ge=1, le=
|
|
1289
|
+
failure_threshold: conint(ge=1, le=5000) = Field(
|
|
1290
1290
|
3,
|
|
1291
1291
|
description="Number of consecutive failures before the container is considered down",
|
|
1292
1292
|
)
|
|
@@ -1593,6 +1593,7 @@ class BaseService(BaseModel):
|
|
|
1593
1593
|
kustomize: Optional[Kustomize] = None
|
|
1594
1594
|
liveness_probe: Optional[HealthProbe] = None
|
|
1595
1595
|
readiness_probe: Optional[HealthProbe] = None
|
|
1596
|
+
startup_probe: Optional[HealthProbe] = None
|
|
1596
1597
|
workspace_fqn: Optional[str] = Field(
|
|
1597
1598
|
None, description="Fully qualified name of the workspace"
|
|
1598
1599
|
)
|
|
@@ -654,7 +654,7 @@ class MlFoundryArtifactsRepository:
|
|
|
654
654
|
artifact_identifier=self.artifact_identifier, paths=[remote_file_path]
|
|
655
655
|
)[0]
|
|
656
656
|
|
|
657
|
-
if progress_bar is None or
|
|
657
|
+
if progress_bar is None or progress_bar.disable:
|
|
658
658
|
logger.info("Downloading %s to %s", remote_file_path, local_path)
|
|
659
659
|
|
|
660
660
|
if progress_bar is not None:
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import math
|
|
3
|
+
import os
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Dict, Optional
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from truefoundry import ml
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from transformers.integrations.integration_utils import rewrite_logs
|
|
12
|
+
from transformers.trainer_callback import TrainerCallback
|
|
13
|
+
except ImportError as e:
|
|
14
|
+
raise ImportError(
|
|
15
|
+
"Importing this module requires `transformers` to be installed"
|
|
16
|
+
) from e
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from transformers.trainer_callback import TrainerControl, TrainerState
|
|
20
|
+
from transformers.training_args import TrainingArguments
|
|
21
|
+
|
|
22
|
+
from truefoundry.ml import MlFoundryRun
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TrueFoundryMLCallback(TrainerCallback):
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
run: "MlFoundryRun",
|
|
31
|
+
log_checkpoints: bool = True,
|
|
32
|
+
checkpoint_artifact_name: Optional[str] = None,
|
|
33
|
+
auto_end_run_on_train_end: bool = False,
|
|
34
|
+
):
|
|
35
|
+
"""
|
|
36
|
+
Args:
|
|
37
|
+
run: The run entity to log metrics to.
|
|
38
|
+
log_checkpoints: Whether to log checkpoints or not, defaults to True.
|
|
39
|
+
checkpoint_artifact_name: The name of the artifact to log checkpoints to, required if log_checkpoints is True.
|
|
40
|
+
auto_end_run_on_train_end: Whether to end the run automatically when training ends, defaults to False.
|
|
41
|
+
|
|
42
|
+
Usage:
|
|
43
|
+
from transformers import Trainer
|
|
44
|
+
from truefoundry.ml.integrations.huggingface.trainer_callback import TrueFoundryMLCallback
|
|
45
|
+
from truefoundry.ml import get_client
|
|
46
|
+
|
|
47
|
+
client = get_client()
|
|
48
|
+
run = client.create_run(ml_repo="my-ml-repo", run_name="my-run", auto_end=False)
|
|
49
|
+
|
|
50
|
+
callback = TrueFoundryMLCallback(
|
|
51
|
+
run=run,
|
|
52
|
+
log_checkpoints=True,
|
|
53
|
+
checkpoint_artifact_name="my-checkpoint",
|
|
54
|
+
auto_end_run_on_train_end=True,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
trainer = Trainer(
|
|
58
|
+
...,
|
|
59
|
+
callbacks=[callback]
|
|
60
|
+
)
|
|
61
|
+
"""
|
|
62
|
+
self._run = run
|
|
63
|
+
self._log_checkpoints = log_checkpoints
|
|
64
|
+
if self._log_checkpoints and not checkpoint_artifact_name:
|
|
65
|
+
raise ValueError(
|
|
66
|
+
"`checkpoint_artifact_name` is required when `log_checkpoints` is True"
|
|
67
|
+
)
|
|
68
|
+
self._checkpoint_artifact_name = checkpoint_artifact_name
|
|
69
|
+
self._auto_end_run_on_train_end = auto_end_run_on_train_end
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def with_managed_run(
|
|
73
|
+
cls,
|
|
74
|
+
ml_repo: str,
|
|
75
|
+
run_name: Optional[str] = None,
|
|
76
|
+
log_checkpoints: bool = True,
|
|
77
|
+
checkpoint_artifact_name: Optional[str] = None,
|
|
78
|
+
auto_end_run_on_train_end: bool = True,
|
|
79
|
+
) -> "TrueFoundryMLCallback":
|
|
80
|
+
"""
|
|
81
|
+
Args:
|
|
82
|
+
ml_repo: The name of the ML Repository to log metrics and data to.
|
|
83
|
+
run_name: The name of the run, if not provided, a random name will be generated.
|
|
84
|
+
log_checkpoints: Whether to log checkpoints or not, defaults to True.
|
|
85
|
+
checkpoint_artifact_name: The name of the artifact to log checkpoints to, required if log_checkpoints is True.
|
|
86
|
+
auto_end_run_on_train_end: Whether to end the run automatically when training ends, defaults to True.
|
|
87
|
+
|
|
88
|
+
Usage:
|
|
89
|
+
from transformers import Trainer
|
|
90
|
+
from truefoundry.ml.integrations.huggingface.trainer_callback import TrueFoundryMLCallback
|
|
91
|
+
|
|
92
|
+
callback = TrueFoundryMLCallback.with_managed_run(
|
|
93
|
+
ml_repo="my-ml-repo",
|
|
94
|
+
run_name="my-run",
|
|
95
|
+
log_checkpoints=True,
|
|
96
|
+
checkpoint_artifact_name="my-checkpoint",
|
|
97
|
+
auto_end_run_on_train_end=True,
|
|
98
|
+
)
|
|
99
|
+
trainer = Trainer(
|
|
100
|
+
...,
|
|
101
|
+
callbacks=[callback]
|
|
102
|
+
)
|
|
103
|
+
"""
|
|
104
|
+
run = ml.get_client().create_run(
|
|
105
|
+
ml_repo=ml_repo, run_name=run_name, auto_end=False
|
|
106
|
+
)
|
|
107
|
+
return cls(
|
|
108
|
+
run=run,
|
|
109
|
+
log_checkpoints=log_checkpoints,
|
|
110
|
+
checkpoint_artifact_name=checkpoint_artifact_name,
|
|
111
|
+
auto_end_run_on_train_end=auto_end_run_on_train_end,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def _drop_non_finite_values(self, dct: Dict[str, Any]) -> Dict[str, Any]:
|
|
115
|
+
sanitized = {}
|
|
116
|
+
for k, v in dct.items():
|
|
117
|
+
if isinstance(v, (int, float, np.integer, np.floating)) and math.isfinite(
|
|
118
|
+
v
|
|
119
|
+
):
|
|
120
|
+
sanitized[k] = v
|
|
121
|
+
else:
|
|
122
|
+
logger.warning(
|
|
123
|
+
f'Trainer is attempting to log a value of "{v}" of'
|
|
124
|
+
f' type {type(v)} for key "{k}" as a metric.'
|
|
125
|
+
" Mlfoundry's log_metric() only accepts finite float and"
|
|
126
|
+
" int types so we dropped this attribute."
|
|
127
|
+
)
|
|
128
|
+
return sanitized
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def run(self) -> "MlFoundryRun":
|
|
132
|
+
return self._run
|
|
133
|
+
|
|
134
|
+
# noinspection PyMethodOverriding
|
|
135
|
+
def on_log(
|
|
136
|
+
self,
|
|
137
|
+
args: "TrainingArguments",
|
|
138
|
+
state: "TrainerState",
|
|
139
|
+
control: "TrainerControl",
|
|
140
|
+
logs: Optional[Dict[str, Any]] = None,
|
|
141
|
+
**kwargs,
|
|
142
|
+
):
|
|
143
|
+
logs = logs or {}
|
|
144
|
+
if not state.is_world_process_zero:
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
metrics = self._drop_non_finite_values(logs)
|
|
148
|
+
self._run.log_metrics(rewrite_logs(metrics), step=state.global_step)
|
|
149
|
+
|
|
150
|
+
def on_save(
|
|
151
|
+
self,
|
|
152
|
+
args: "TrainingArguments",
|
|
153
|
+
state: "TrainerState",
|
|
154
|
+
control: "TrainerControl",
|
|
155
|
+
**kwargs,
|
|
156
|
+
):
|
|
157
|
+
if not state.is_world_process_zero:
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
if not self._log_checkpoints:
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
if not self._checkpoint_artifact_name:
|
|
164
|
+
return
|
|
165
|
+
|
|
166
|
+
ckpt_dir = f"checkpoint-{state.global_step}"
|
|
167
|
+
artifact_path = os.path.join(args.output_dir, ckpt_dir)
|
|
168
|
+
description = None
|
|
169
|
+
_job_name = os.getenv("TFY_INTERNAL_COMPONENT_NAME")
|
|
170
|
+
_job_run_name = os.getenv("TFY_INTERNAL_JOB_RUN_NAME")
|
|
171
|
+
if _job_name:
|
|
172
|
+
description = f"Checkpoint from job={_job_name} run={_job_run_name}"
|
|
173
|
+
logger.info(f"Uploading checkpoint {ckpt_dir} ...")
|
|
174
|
+
metadata = {}
|
|
175
|
+
for log in state.log_history:
|
|
176
|
+
if isinstance(log, dict) and log.get("step") == state.global_step:
|
|
177
|
+
metadata = log.copy()
|
|
178
|
+
metadata = self._drop_non_finite_values(metadata)
|
|
179
|
+
self._run.log_artifact(
|
|
180
|
+
name=self._checkpoint_artifact_name,
|
|
181
|
+
artifact_paths=[(artifact_path, None)],
|
|
182
|
+
metadata=metadata,
|
|
183
|
+
step=state.global_step,
|
|
184
|
+
description=description,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
def on_train_end(
|
|
188
|
+
self,
|
|
189
|
+
args: "TrainingArguments",
|
|
190
|
+
state: "TrainerState",
|
|
191
|
+
control: "TrainerControl",
|
|
192
|
+
**kwargs,
|
|
193
|
+
):
|
|
194
|
+
"""
|
|
195
|
+
Event called at the end of training.
|
|
196
|
+
"""
|
|
197
|
+
if self._auto_end_run_on_train_end:
|
|
198
|
+
self._run.end()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: truefoundry
|
|
3
|
-
Version: 0.11.
|
|
3
|
+
Version: 0.11.5
|
|
4
4
|
Summary: TrueFoundry CLI
|
|
5
5
|
Author-email: TrueFoundry Team <abhishek@truefoundry.com>
|
|
6
6
|
Requires-Python: <3.14,>=3.8.1
|
|
@@ -30,7 +30,7 @@ Requires-Dist: requirements-parser<0.12.0,>=0.11.0
|
|
|
30
30
|
Requires-Dist: rich-click<2.0.0,>=1.2.1
|
|
31
31
|
Requires-Dist: rich<14.0.0,>=13.7.1
|
|
32
32
|
Requires-Dist: tqdm<5.0.0,>=4.0.0
|
|
33
|
-
Requires-Dist: truefoundry-sdk<0.2.0,>=0.1.
|
|
33
|
+
Requires-Dist: truefoundry-sdk<0.2.0,>=0.1.10
|
|
34
34
|
Requires-Dist: typing-extensions>=4.0
|
|
35
35
|
Requires-Dist: urllib3<3,>=1.26.18
|
|
36
36
|
Requires-Dist: yq<4.0.0,>=3.1.0
|
|
@@ -54,7 +54,7 @@ truefoundry/common/utils.py,sha256=P0FuAadoJGdpieUORLSN-PiFnkyoGO-K2cS4OPITBWg,6
|
|
|
54
54
|
truefoundry/common/warnings.py,sha256=xDMhR_-ZGC40Ycaj6nlFb5MYPexn8WbKCHd4FlflTXQ,705
|
|
55
55
|
truefoundry/deploy/__init__.py,sha256=sP-6Nv-_uV2o3knWcNSGV07j_Hkq0lfUkfZffBg-Hfo,2874
|
|
56
56
|
truefoundry/deploy/python_deploy_codegen.py,sha256=k19_m5DGsUyjOUCSKwIVP8vDna2sq01tHABsUfoVpW4,8019
|
|
57
|
-
truefoundry/deploy/_autogen/models.py,sha256=
|
|
57
|
+
truefoundry/deploy/_autogen/models.py,sha256=e75fSAlUJhPW3IN9Lg3ogSnCR9crIuHAsZaDSCNvkS0,75977
|
|
58
58
|
truefoundry/deploy/builder/__init__.py,sha256=VR07ZB7ziONEBbVgg1JdRTWY7t4qJjJTMhc2VodXYdA,5036
|
|
59
59
|
truefoundry/deploy/builder/constants.py,sha256=amUkHoHvVKzGv0v_knfiioRuKiJM0V0xW0diERgWiI0,508
|
|
60
60
|
truefoundry/deploy/builder/docker_service.py,sha256=sm7GWeIqyrKaZpxskdLejZlsxcZnM3BTDJr6orvPN4E,3948
|
|
@@ -349,7 +349,7 @@ truefoundry/ml/_autogen/models/schema.py,sha256=a_bp42MMPUbwO3407m0UW2W8EOhnxZXf
|
|
|
349
349
|
truefoundry/ml/_autogen/models/signature.py,sha256=rBjpxUIsEeWM0sIyYG5uCJB18DKHR4k5yZw8TzuoP48,4987
|
|
350
350
|
truefoundry/ml/_autogen/models/utils.py,sha256=c7RtSLXhOLcP8rjuUtfnMdaKVTZvvbsmw98gPAkAFrs,24371
|
|
351
351
|
truefoundry/ml/artifact/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
352
|
-
truefoundry/ml/artifact/truefoundry_artifact_repo.py,sha256=
|
|
352
|
+
truefoundry/ml/artifact/truefoundry_artifact_repo.py,sha256=8BFKaXDxutw8bPJLnDI0bO0oNS_xJKo2ijubc2PLFsU,35688
|
|
353
353
|
truefoundry/ml/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
354
354
|
truefoundry/ml/cli/cli.py,sha256=MwpY7z_NEeJE_XIP7XbZELjNeu2vpMmohttHCKDRk54,335
|
|
355
355
|
truefoundry/ml/cli/utils.py,sha256=j6_mZ4Spn114mz3P4QQ8jx0tmorXIuyQnHXVUSDvZi4,1035
|
|
@@ -357,6 +357,9 @@ truefoundry/ml/cli/commands/__init__.py,sha256=diDUiRUX4l6TtNLI4iF-ZblczkELM7FRV
|
|
|
357
357
|
truefoundry/ml/cli/commands/download.py,sha256=N9MhsEQ3U24v_OmnMZT8Q4SoAi38Sm7a21unrACOSDw,2573
|
|
358
358
|
truefoundry/ml/cli/commands/model_init.py,sha256=INyUAU6hiFClI8cZqX5hgnrtNbeKxlZxrjFrjzStU18,2664
|
|
359
359
|
truefoundry/ml/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
360
|
+
truefoundry/ml/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
361
|
+
truefoundry/ml/integrations/huggingface/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
362
|
+
truefoundry/ml/integrations/huggingface/trainer_callback.py,sha256=Zu5AUbH_ct8I1dHyNYJQZBj9Y__hKo0sc2OxpPXJARE,6952
|
|
360
363
|
truefoundry/ml/log_types/__init__.py,sha256=g4u4D4Jaj0aBK5GtrLV88-qThKZR9pSZ17vFEkN-LmM,125
|
|
361
364
|
truefoundry/ml/log_types/plot.py,sha256=LDh4uy6z2P_a2oPM2lc85c0lt8utVvunohzeMawFjZw,7572
|
|
362
365
|
truefoundry/ml/log_types/pydantic_base.py,sha256=eBlw_AEyAz4iJKDP4zgJOCFWcldwQqpf7FADW1jzIQY,272
|
|
@@ -383,7 +386,7 @@ truefoundry/workflow/remote_filesystem/__init__.py,sha256=LQ95ViEjJ7Ts4JcCGOxMPs
|
|
|
383
386
|
truefoundry/workflow/remote_filesystem/logger.py,sha256=em2l7D6sw7xTLDP0kQSLpgfRRCLpN14Qw85TN7ujQcE,1022
|
|
384
387
|
truefoundry/workflow/remote_filesystem/tfy_signed_url_client.py,sha256=xcT0wQmQlgzcj0nP3tJopyFSVWT1uv3nhiTIuwfXYeg,12342
|
|
385
388
|
truefoundry/workflow/remote_filesystem/tfy_signed_url_fs.py,sha256=nSGPZu0Gyd_jz0KsEE-7w_BmnTD8CVF1S8cUJoxaCbc,13305
|
|
386
|
-
truefoundry-0.11.
|
|
387
|
-
truefoundry-0.11.
|
|
388
|
-
truefoundry-0.11.
|
|
389
|
-
truefoundry-0.11.
|
|
389
|
+
truefoundry-0.11.5.dist-info/METADATA,sha256=BUNv9InD1VRtqlp8rb4eQkqKpCUgKQfFsrq9OjTgm9Q,2760
|
|
390
|
+
truefoundry-0.11.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
391
|
+
truefoundry-0.11.5.dist-info/entry_points.txt,sha256=xVjn7RMN-MW2-9f7YU-bBdlZSvvrwzhpX1zmmRmsNPU,98
|
|
392
|
+
truefoundry-0.11.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|