truefoundry 0.11.3rc1__py3-none-any.whl → 0.11.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truefoundry might be problematic. Click here for more details.
- truefoundry/common/constants.py +2 -0
- truefoundry/deploy/__init__.py +1 -0
- truefoundry/deploy/_autogen/models.py +11 -2
- truefoundry/deploy/builder/__init__.py +2 -0
- truefoundry/deploy/builder/builders/__init__.py +2 -0
- truefoundry/deploy/builder/builders/tfy_python_buildpack/dockerfile_template.py +6 -83
- truefoundry/deploy/builder/builders/tfy_spark_buildpack/dockerfile_template.py +20 -72
- truefoundry/deploy/builder/builders/tfy_spark_buildpack/tfy_execute_notebook.py +14 -1
- truefoundry/deploy/builder/builders/tfy_task_pyspark_buildpack/__init__.py +52 -0
- truefoundry/deploy/builder/builders/tfy_task_pyspark_buildpack/dockerfile_template.py +121 -0
- truefoundry/deploy/builder/utils.py +83 -0
- truefoundry/deploy/v2/lib/patched_models.py +6 -0
- truefoundry/ml/artifact/truefoundry_artifact_repo.py +1 -1
- truefoundry/ml/integrations/__init__.py +0 -0
- truefoundry/ml/integrations/huggingface/__init__.py +0 -0
- truefoundry/ml/integrations/huggingface/trainer_callback.py +198 -0
- {truefoundry-0.11.3rc1.dist-info → truefoundry-0.11.4.dist-info}/METADATA +2 -2
- {truefoundry-0.11.3rc1.dist-info → truefoundry-0.11.4.dist-info}/RECORD +20 -15
- {truefoundry-0.11.3rc1.dist-info → truefoundry-0.11.4.dist-info}/WHEEL +0 -0
- {truefoundry-0.11.3rc1.dist-info → truefoundry-0.11.4.dist-info}/entry_points.txt +0 -0
truefoundry/common/constants.py
CHANGED
|
@@ -68,6 +68,8 @@ class TrueFoundrySdkEnv(BaseSettings):
|
|
|
68
68
|
# TODO(gw): Use another image with more linient rate limits
|
|
69
69
|
TFY_SPARK_BUILD_SPARK_IMAGE_REPO: str = "public.ecr.aws/bitnami/spark"
|
|
70
70
|
|
|
71
|
+
TFY_TASK_PYSPARK_BUILD_SPARK_IMAGE_REPO: str = "public.ecr.aws/bitnami/spark"
|
|
72
|
+
|
|
71
73
|
# For local development, this enables futher configuration via _TFYServersConfig
|
|
72
74
|
TFY_CLI_LOCAL_DEV_MODE: bool = False
|
|
73
75
|
|
truefoundry/deploy/__init__.py
CHANGED
|
@@ -120,6 +120,7 @@ from truefoundry.deploy.v2.lib.patched_models import (
|
|
|
120
120
|
SparkJobPythonEntrypoint,
|
|
121
121
|
SparkJobPythonNotebookEntrypoint,
|
|
122
122
|
SparkJobScalaEntrypoint,
|
|
123
|
+
SparkJobScalaNotebookEntrypoint,
|
|
123
124
|
SQSInputConfig,
|
|
124
125
|
SQSOutputConfig,
|
|
125
126
|
SQSQueueMetricConfig,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# generated by datamodel-codegen:
|
|
2
2
|
# filename: application.json
|
|
3
|
-
# timestamp: 2025-07-
|
|
3
|
+
# timestamp: 2025-07-28T14:11:36+00:00
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
@@ -103,7 +103,7 @@ class AsyncProcessorSidecar(BaseModel):
|
|
|
103
103
|
|
|
104
104
|
|
|
105
105
|
class Autoshutdown(BaseModel):
|
|
106
|
-
wait_time: conint(ge=0) = Field(
|
|
106
|
+
wait_time: conint(ge=0, le=604800) = Field(
|
|
107
107
|
900,
|
|
108
108
|
description="The period to wait after the last received request before scaling the replicas to 0. This value should be high enough to allow for the replicas of the service to come up to avoid premature scaling down.",
|
|
109
109
|
)
|
|
@@ -957,6 +957,14 @@ class SparkJobScalaEntrypoint(BaseModel):
|
|
|
957
957
|
)
|
|
958
958
|
|
|
959
959
|
|
|
960
|
+
class SparkJobScalaNotebookEntrypoint(BaseModel):
|
|
961
|
+
type: Literal["scala-notebook"] = Field(..., description="")
|
|
962
|
+
main_application_file: str = Field(
|
|
963
|
+
...,
|
|
964
|
+
description="The main application file to be executed by the spark job. Relative path in case of git repository.",
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
|
|
960
968
|
class StaticVolumeConfig(BaseModel):
|
|
961
969
|
type: Literal["static"] = Field(..., description="Volume Type for the volume.")
|
|
962
970
|
persistent_volume_name: str = Field(
|
|
@@ -1489,6 +1497,7 @@ class SparkJob(BaseModel):
|
|
|
1489
1497
|
SparkJobScalaEntrypoint,
|
|
1490
1498
|
SparkJobJavaEntrypoint,
|
|
1491
1499
|
SparkJobPythonNotebookEntrypoint,
|
|
1500
|
+
SparkJobScalaNotebookEntrypoint,
|
|
1492
1501
|
] = Field(..., description="")
|
|
1493
1502
|
driver_config: SparkDriverConfig
|
|
1494
1503
|
executor_config: SparkExecutorConfig
|
|
@@ -5,6 +5,7 @@ from truefoundry.deploy._autogen.models import (
|
|
|
5
5
|
PythonBuild,
|
|
6
6
|
SparkBuild,
|
|
7
7
|
TaskDockerFileBuild,
|
|
8
|
+
TaskPySparkBuild,
|
|
8
9
|
TaskPythonBuild,
|
|
9
10
|
)
|
|
10
11
|
from truefoundry.deploy.builder.builders import get_builder
|
|
@@ -22,6 +23,7 @@ class _BuildConfig(BaseModel):
|
|
|
22
23
|
TaskPythonBuild,
|
|
23
24
|
TaskDockerFileBuild,
|
|
24
25
|
SparkBuild,
|
|
26
|
+
TaskPySparkBuild,
|
|
25
27
|
] = Field(discriminator="type")
|
|
26
28
|
|
|
27
29
|
|
|
@@ -5,6 +5,7 @@ from truefoundry.deploy.builder.builders import (
|
|
|
5
5
|
tfy_notebook_buildpack,
|
|
6
6
|
tfy_python_buildpack,
|
|
7
7
|
tfy_spark_buildpack,
|
|
8
|
+
tfy_task_pyspark_buildpack,
|
|
8
9
|
)
|
|
9
10
|
|
|
10
11
|
BUILD_REGISTRY: Dict[str, Callable] = {
|
|
@@ -12,6 +13,7 @@ BUILD_REGISTRY: Dict[str, Callable] = {
|
|
|
12
13
|
"tfy-python-buildpack": tfy_python_buildpack.build,
|
|
13
14
|
"tfy-notebook-buildpack": tfy_notebook_buildpack.build,
|
|
14
15
|
"tfy-spark-buildpack": tfy_spark_buildpack.build,
|
|
16
|
+
"task-pyspark-build": tfy_task_pyspark_buildpack.build,
|
|
15
17
|
}
|
|
16
18
|
|
|
17
19
|
__all__ = ["get_builder"]
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import
|
|
2
|
-
from typing import Dict, List, Optional
|
|
1
|
+
from typing import Dict
|
|
3
2
|
|
|
4
3
|
from mako.template import Template
|
|
5
4
|
|
|
@@ -7,9 +6,12 @@ from truefoundry.common.constants import ENV_VARS, PythonPackageManager
|
|
|
7
6
|
from truefoundry.deploy._autogen.models import PythonBuild
|
|
8
7
|
from truefoundry.deploy.builder.constants import (
|
|
9
8
|
PIP_CONF_BUILDKIT_SECRET_MOUNT,
|
|
10
|
-
PIP_CONF_SECRET_MOUNT_AS_ENV,
|
|
11
9
|
UV_CONF_BUILDKIT_SECRET_MOUNT,
|
|
12
|
-
|
|
10
|
+
)
|
|
11
|
+
from truefoundry.deploy.builder.utils import (
|
|
12
|
+
generate_apt_install_command,
|
|
13
|
+
generate_pip_install_command,
|
|
14
|
+
generate_uv_pip_install_command,
|
|
13
15
|
)
|
|
14
16
|
from truefoundry.deploy.v2.lib.patched_models import (
|
|
15
17
|
CUDAVersion,
|
|
@@ -82,85 +84,6 @@ CUDA_VERSION_TO_IMAGE_TAG: Dict[str, str] = {
|
|
|
82
84
|
}
|
|
83
85
|
|
|
84
86
|
|
|
85
|
-
def generate_apt_install_command(apt_packages: Optional[List[str]]) -> Optional[str]:
|
|
86
|
-
packages_list = None
|
|
87
|
-
if apt_packages:
|
|
88
|
-
packages_list = " ".join(p.strip() for p in apt_packages if p.strip())
|
|
89
|
-
if not packages_list:
|
|
90
|
-
return None
|
|
91
|
-
apt_update_command = "apt update"
|
|
92
|
-
apt_install_command = f"DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends {packages_list}"
|
|
93
|
-
clear_apt_lists_command = "rm -rf /var/lib/apt/lists/*"
|
|
94
|
-
return " && ".join(
|
|
95
|
-
[apt_update_command, apt_install_command, clear_apt_lists_command]
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def generate_pip_install_command(
|
|
100
|
-
requirements_path: Optional[str],
|
|
101
|
-
pip_packages: Optional[List[str]],
|
|
102
|
-
mount_pip_conf_secret: bool = False,
|
|
103
|
-
) -> Optional[str]:
|
|
104
|
-
upgrade_pip_command = "python -m pip install -U pip setuptools wheel"
|
|
105
|
-
envs = []
|
|
106
|
-
if mount_pip_conf_secret:
|
|
107
|
-
envs.append(PIP_CONF_SECRET_MOUNT_AS_ENV)
|
|
108
|
-
|
|
109
|
-
command = ["python", "-m", "pip", "install", "--use-pep517", "--no-cache-dir"]
|
|
110
|
-
args = []
|
|
111
|
-
if requirements_path:
|
|
112
|
-
args.append("-r")
|
|
113
|
-
args.append(requirements_path)
|
|
114
|
-
|
|
115
|
-
if pip_packages:
|
|
116
|
-
args.extend(pip_packages)
|
|
117
|
-
|
|
118
|
-
if not args:
|
|
119
|
-
return None
|
|
120
|
-
|
|
121
|
-
final_pip_install_command = shlex.join(envs + command + args)
|
|
122
|
-
final_docker_run_command = " && ".join(
|
|
123
|
-
[upgrade_pip_command, final_pip_install_command]
|
|
124
|
-
)
|
|
125
|
-
return final_docker_run_command
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def generate_uv_pip_install_command(
|
|
129
|
-
requirements_path: Optional[str],
|
|
130
|
-
pip_packages: Optional[List[str]],
|
|
131
|
-
mount_uv_conf_secret: bool = False,
|
|
132
|
-
) -> Optional[str]:
|
|
133
|
-
upgrade_pip_command = "python -m pip install -U pip setuptools wheel"
|
|
134
|
-
uv_mount = f"--mount=from={ENV_VARS.TFY_PYTHON_BUILD_UV_IMAGE_URI},source=/uv,target=/usr/local/bin/uv"
|
|
135
|
-
envs = [
|
|
136
|
-
"UV_LINK_MODE=copy",
|
|
137
|
-
"UV_PYTHON_DOWNLOADS=never",
|
|
138
|
-
"UV_INDEX_STRATEGY=unsafe-best-match",
|
|
139
|
-
]
|
|
140
|
-
if mount_uv_conf_secret:
|
|
141
|
-
envs.append(UV_CONF_SECRET_MOUNT_AS_ENV)
|
|
142
|
-
|
|
143
|
-
command = ["uv", "pip", "install", "--no-cache-dir"]
|
|
144
|
-
|
|
145
|
-
args = []
|
|
146
|
-
|
|
147
|
-
if requirements_path:
|
|
148
|
-
args.append("-r")
|
|
149
|
-
args.append(requirements_path)
|
|
150
|
-
|
|
151
|
-
if pip_packages:
|
|
152
|
-
args.extend(pip_packages)
|
|
153
|
-
|
|
154
|
-
if not args:
|
|
155
|
-
return None
|
|
156
|
-
|
|
157
|
-
uv_pip_install_command = shlex.join(envs + command + args)
|
|
158
|
-
shell_commands = " && ".join([upgrade_pip_command, uv_pip_install_command])
|
|
159
|
-
final_docker_run_command = " ".join([uv_mount, shell_commands])
|
|
160
|
-
|
|
161
|
-
return final_docker_run_command
|
|
162
|
-
|
|
163
|
-
|
|
164
87
|
def generate_dockerfile_content(
|
|
165
88
|
build_configuration: PythonBuild,
|
|
166
89
|
package_manager: str = ENV_VARS.TFY_PYTHON_BUILD_PACKAGE_MANAGER,
|
|
@@ -1,15 +1,14 @@
|
|
|
1
|
-
import shlex
|
|
2
|
-
from typing import List, Optional
|
|
3
|
-
|
|
4
1
|
from mako.template import Template
|
|
5
2
|
|
|
6
3
|
from truefoundry.common.constants import ENV_VARS, PythonPackageManager
|
|
7
4
|
from truefoundry.deploy._autogen.models import SparkBuild
|
|
8
5
|
from truefoundry.deploy.builder.constants import (
|
|
9
6
|
PIP_CONF_BUILDKIT_SECRET_MOUNT,
|
|
10
|
-
PIP_CONF_SECRET_MOUNT_AS_ENV,
|
|
11
7
|
UV_CONF_BUILDKIT_SECRET_MOUNT,
|
|
12
|
-
|
|
8
|
+
)
|
|
9
|
+
from truefoundry.deploy.builder.utils import (
|
|
10
|
+
generate_pip_install_command,
|
|
11
|
+
generate_uv_pip_install_command,
|
|
13
12
|
)
|
|
14
13
|
from truefoundry.deploy.v2.lib.patched_models import (
|
|
15
14
|
_resolve_requirements_path,
|
|
@@ -26,8 +25,7 @@ RUN ${package_manager_config_secret_mount} ${python_packages_install_command}
|
|
|
26
25
|
% endif
|
|
27
26
|
ENV PYTHONDONTWRITEBYTECODE=1
|
|
28
27
|
ENV IPYTHONDIR=/tmp/.ipython
|
|
29
|
-
|
|
30
|
-
USER spark
|
|
28
|
+
USER 1001
|
|
31
29
|
COPY . /app
|
|
32
30
|
"""
|
|
33
31
|
|
|
@@ -35,6 +33,20 @@ _POST_USER_TEMPLATE = """
|
|
|
35
33
|
COPY tfy_execute_notebook.py /app/tfy_execute_notebook.py
|
|
36
34
|
"""
|
|
37
35
|
|
|
36
|
+
_ALMOND_INSTALL_TEMPLATE = """
|
|
37
|
+
ENV COURSIER_CACHE=/opt/coursier-cache
|
|
38
|
+
RUN install_packages curl
|
|
39
|
+
RUN curl -Lo coursier https://git.io/coursier-cli && \
|
|
40
|
+
chmod +x coursier && \
|
|
41
|
+
./coursier launch almond:0.14.1 -- --install --global && \
|
|
42
|
+
chown -R 1001:0 /usr/local/share/jupyter && \
|
|
43
|
+
chown -R 1001:0 /opt/coursier-cache && \
|
|
44
|
+
rm -f coursier
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
# Docker image size with almond - 1.26GB
|
|
48
|
+
# Docker image size without almond - 1.1GB
|
|
49
|
+
# Not much harm in packaging almond by default
|
|
38
50
|
DOCKERFILE_TEMPLATE = Template(
|
|
39
51
|
"""
|
|
40
52
|
FROM ${spark_image_repo}:${spark_version}
|
|
@@ -43,6 +55,7 @@ RUN apt update && \
|
|
|
43
55
|
DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends git && \
|
|
44
56
|
rm -rf /var/lib/apt/lists/*
|
|
45
57
|
"""
|
|
58
|
+
+ _ALMOND_INSTALL_TEMPLATE
|
|
46
59
|
+ _POST_PYTHON_INSTALL_TEMPLATE
|
|
47
60
|
+ _POST_USER_TEMPLATE
|
|
48
61
|
)
|
|
@@ -55,71 +68,6 @@ ADDITIONAL_PIP_PACKAGES = [
|
|
|
55
68
|
]
|
|
56
69
|
|
|
57
70
|
|
|
58
|
-
def generate_pip_install_command(
|
|
59
|
-
requirements_path: Optional[str],
|
|
60
|
-
pip_packages: Optional[List[str]],
|
|
61
|
-
mount_pip_conf_secret: bool = False,
|
|
62
|
-
) -> Optional[str]:
|
|
63
|
-
upgrade_pip_command = "python3 -m pip install -U pip setuptools wheel"
|
|
64
|
-
envs = []
|
|
65
|
-
if mount_pip_conf_secret:
|
|
66
|
-
envs.append(PIP_CONF_SECRET_MOUNT_AS_ENV)
|
|
67
|
-
|
|
68
|
-
command = ["python3", "-m", "pip", "install", "--use-pep517", "--no-cache-dir"]
|
|
69
|
-
args = []
|
|
70
|
-
if requirements_path:
|
|
71
|
-
args.append("-r")
|
|
72
|
-
args.append(requirements_path)
|
|
73
|
-
|
|
74
|
-
if pip_packages:
|
|
75
|
-
args.extend(pip_packages)
|
|
76
|
-
|
|
77
|
-
if not args:
|
|
78
|
-
return None
|
|
79
|
-
|
|
80
|
-
final_pip_install_command = shlex.join(envs + command + args)
|
|
81
|
-
final_docker_run_command = " && ".join(
|
|
82
|
-
[upgrade_pip_command, final_pip_install_command]
|
|
83
|
-
)
|
|
84
|
-
return final_docker_run_command
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def generate_uv_pip_install_command(
|
|
88
|
-
requirements_path: Optional[str],
|
|
89
|
-
pip_packages: Optional[List[str]],
|
|
90
|
-
mount_uv_conf_secret: bool = False,
|
|
91
|
-
) -> Optional[str]:
|
|
92
|
-
upgrade_pip_command = "python3 -m pip install -U pip setuptools wheel"
|
|
93
|
-
uv_mount = f"--mount=from={ENV_VARS.TFY_PYTHON_BUILD_UV_IMAGE_URI},source=/uv,target=/usr/local/bin/uv"
|
|
94
|
-
envs = [
|
|
95
|
-
"UV_LINK_MODE=copy",
|
|
96
|
-
"UV_PYTHON_DOWNLOADS=never",
|
|
97
|
-
"UV_INDEX_STRATEGY=unsafe-best-match",
|
|
98
|
-
]
|
|
99
|
-
if mount_uv_conf_secret:
|
|
100
|
-
envs.append(UV_CONF_SECRET_MOUNT_AS_ENV)
|
|
101
|
-
|
|
102
|
-
command = ["uv", "pip", "install", "--no-cache-dir"]
|
|
103
|
-
|
|
104
|
-
args = []
|
|
105
|
-
|
|
106
|
-
if requirements_path:
|
|
107
|
-
args.append("-r")
|
|
108
|
-
args.append(requirements_path)
|
|
109
|
-
|
|
110
|
-
if pip_packages:
|
|
111
|
-
args.extend(pip_packages)
|
|
112
|
-
|
|
113
|
-
if not args:
|
|
114
|
-
return None
|
|
115
|
-
|
|
116
|
-
uv_pip_install_command = shlex.join(envs + command + args)
|
|
117
|
-
shell_commands = " && ".join([upgrade_pip_command, uv_pip_install_command])
|
|
118
|
-
final_docker_run_command = " ".join([uv_mount, shell_commands])
|
|
119
|
-
|
|
120
|
-
return final_docker_run_command
|
|
121
|
-
|
|
122
|
-
|
|
123
71
|
def generate_dockerfile_content(
|
|
124
72
|
build_configuration: SparkBuild,
|
|
125
73
|
package_manager: str = ENV_VARS.TFY_PYTHON_BUILD_PACKAGE_MANAGER,
|
|
@@ -86,12 +86,24 @@ def execute_notebook(notebook_path, output_path="/tmp/output.ipynb", parameters=
|
|
|
86
86
|
parameters = {}
|
|
87
87
|
|
|
88
88
|
print(f"Starting execution of notebook: {notebook_path}")
|
|
89
|
+
notebook_type = os.environ.get("TFY_NOTEBOOK_TYPE", "").lower()
|
|
90
|
+
kernel_mapping = {"python": "python3", "scala": "scala"}
|
|
91
|
+
|
|
92
|
+
if notebook_type not in kernel_mapping:
|
|
93
|
+
supported_types = ", ".join(kernel_mapping.keys())
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"Unsupported notebook type: '{notebook_type}'. "
|
|
96
|
+
f"Supported types: [{supported_types}]"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
kernel_name = kernel_mapping[notebook_type]
|
|
100
|
+
|
|
89
101
|
pm.execute_notebook(
|
|
90
102
|
input_path=notebook_path,
|
|
91
103
|
output_path=output_path,
|
|
92
104
|
parameters=parameters,
|
|
93
105
|
# TODO(gw): Replace with kernel name for venv
|
|
94
|
-
kernel_name=
|
|
106
|
+
kernel_name=kernel_name,
|
|
95
107
|
# Log cell by cell execution output
|
|
96
108
|
# TODO(gw): Output logs to a file instead, so that they aren't merged with the container's logs
|
|
97
109
|
log_output=True,
|
|
@@ -107,6 +119,7 @@ def validate_env_vars():
|
|
|
107
119
|
"TFY_NOTEBOOK_OUTPUT_S3_BUCKET",
|
|
108
120
|
"SPARK_APPLICATION_EVENT_LOG_JWT_TOKEN",
|
|
109
121
|
"TFY_NOTEBOOK_OUTPUT_S3_SECRET_KEY",
|
|
122
|
+
"TFY_NOTEBOOK_TYPE",
|
|
110
123
|
]
|
|
111
124
|
unset_keys = [key for key in keys if not os.environ.get(key)]
|
|
112
125
|
if unset_keys:
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from tempfile import TemporaryDirectory
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from truefoundry.deploy._autogen.models import DockerFileBuild, TaskPySparkBuild
|
|
6
|
+
from truefoundry.deploy.builder.builders import dockerfile
|
|
7
|
+
from truefoundry.deploy.builder.builders.tfy_task_pyspark_buildpack.dockerfile_template import (
|
|
8
|
+
generate_dockerfile_content,
|
|
9
|
+
)
|
|
10
|
+
from truefoundry.deploy.builder.utils import has_python_package_manager_conf_secret
|
|
11
|
+
|
|
12
|
+
__all__ = ["generate_dockerfile_content", "build"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _convert_to_dockerfile_build_config(
|
|
16
|
+
build_configuration: TaskPySparkBuild,
|
|
17
|
+
dockerfile_path: str,
|
|
18
|
+
mount_python_package_manager_conf_secret: bool = False,
|
|
19
|
+
) -> DockerFileBuild:
|
|
20
|
+
dockerfile_content = generate_dockerfile_content(
|
|
21
|
+
build_configuration=build_configuration,
|
|
22
|
+
mount_python_package_manager_conf_secret=mount_python_package_manager_conf_secret,
|
|
23
|
+
)
|
|
24
|
+
with open(dockerfile_path, "w", encoding="utf8") as fp:
|
|
25
|
+
fp.write(dockerfile_content)
|
|
26
|
+
|
|
27
|
+
return DockerFileBuild(
|
|
28
|
+
type="dockerfile",
|
|
29
|
+
dockerfile_path=dockerfile_path,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def build(
|
|
34
|
+
tag: str,
|
|
35
|
+
build_configuration: TaskPySparkBuild,
|
|
36
|
+
extra_opts: Optional[List[str]] = None,
|
|
37
|
+
):
|
|
38
|
+
mount_python_package_manager_conf_secret = (
|
|
39
|
+
has_python_package_manager_conf_secret(extra_opts) if extra_opts else False
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
with TemporaryDirectory() as local_dir:
|
|
43
|
+
docker_build_configuration = _convert_to_dockerfile_build_config(
|
|
44
|
+
build_configuration,
|
|
45
|
+
dockerfile_path=os.path.join(local_dir, "Dockerfile"),
|
|
46
|
+
mount_python_package_manager_conf_secret=mount_python_package_manager_conf_secret,
|
|
47
|
+
)
|
|
48
|
+
dockerfile.build(
|
|
49
|
+
tag=tag,
|
|
50
|
+
build_configuration=docker_build_configuration,
|
|
51
|
+
extra_opts=extra_opts,
|
|
52
|
+
)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
from mako.template import Template
|
|
2
|
+
|
|
3
|
+
from truefoundry.common.constants import ENV_VARS, PythonPackageManager
|
|
4
|
+
from truefoundry.deploy._autogen.models import TaskPySparkBuild
|
|
5
|
+
from truefoundry.deploy.builder.constants import (
|
|
6
|
+
PIP_CONF_BUILDKIT_SECRET_MOUNT,
|
|
7
|
+
UV_CONF_BUILDKIT_SECRET_MOUNT,
|
|
8
|
+
)
|
|
9
|
+
from truefoundry.deploy.builder.utils import (
|
|
10
|
+
generate_apt_install_command,
|
|
11
|
+
generate_pip_install_command,
|
|
12
|
+
generate_uv_pip_install_command,
|
|
13
|
+
)
|
|
14
|
+
from truefoundry.deploy.v2.lib.patched_models import (
|
|
15
|
+
_resolve_requirements_path,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# TODO[GW]: Switch to a non-root user inside the container
|
|
19
|
+
_POST_PYTHON_INSTALL_TEMPLATE = """
|
|
20
|
+
% if apt_install_command is not None:
|
|
21
|
+
RUN ${apt_install_command}
|
|
22
|
+
% endif
|
|
23
|
+
% if requirements_path is not None:
|
|
24
|
+
COPY ${requirements_path} ${requirements_destination_path}
|
|
25
|
+
% endif
|
|
26
|
+
% if python_packages_install_command is not None:
|
|
27
|
+
RUN ${package_manager_config_secret_mount} ${python_packages_install_command}
|
|
28
|
+
% endif
|
|
29
|
+
COPY . /app
|
|
30
|
+
WORKDIR /app
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
# TODO[GW]: Check if the entrypoint for the image needs to change
|
|
34
|
+
# Using /opt/venv/ because flyte seems to be using it and this doesn't look configurable
|
|
35
|
+
# TODO[GW]: Double check this^
|
|
36
|
+
DOCKERFILE_TEMPLATE = Template(
|
|
37
|
+
"""
|
|
38
|
+
FROM ${spark_image_repo}:${spark_version}
|
|
39
|
+
ENV PATH=/opt/venv/bin:$PATH
|
|
40
|
+
USER root
|
|
41
|
+
RUN mkdir -p /var/lib/apt/lists/partial && \
|
|
42
|
+
apt update && \
|
|
43
|
+
DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends git && \
|
|
44
|
+
python -m venv /opt/venv/ && \
|
|
45
|
+
rm -rf /var/lib/apt/lists/*
|
|
46
|
+
"""
|
|
47
|
+
+ _POST_PYTHON_INSTALL_TEMPLATE
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_additional_pip_packages(build_configuration: TaskPySparkBuild):
|
|
52
|
+
return [
|
|
53
|
+
f"pyspark=={build_configuration.spark_version}",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def generate_dockerfile_content(
|
|
58
|
+
build_configuration: TaskPySparkBuild,
|
|
59
|
+
package_manager: str = ENV_VARS.TFY_PYTHON_BUILD_PACKAGE_MANAGER,
|
|
60
|
+
mount_python_package_manager_conf_secret: bool = False,
|
|
61
|
+
) -> str:
|
|
62
|
+
# TODO (chiragjn): Handle recursive references to other requirements files e.g. `-r requirements-gpu.txt`
|
|
63
|
+
requirements_path = _resolve_requirements_path(
|
|
64
|
+
build_context_path="",
|
|
65
|
+
requirements_path=build_configuration.requirements_path,
|
|
66
|
+
)
|
|
67
|
+
requirements_destination_path = (
|
|
68
|
+
"/tmp/requirements.txt" if requirements_path else None
|
|
69
|
+
)
|
|
70
|
+
# if not build_configuration.python_version:
|
|
71
|
+
# raise ValueError(
|
|
72
|
+
# "`python_version` is required for `tfy-python-buildpack` builder"
|
|
73
|
+
# )
|
|
74
|
+
pip_packages = get_additional_pip_packages(build_configuration) + (
|
|
75
|
+
build_configuration.pip_packages or []
|
|
76
|
+
)
|
|
77
|
+
if package_manager == PythonPackageManager.PIP.value:
|
|
78
|
+
python_packages_install_command = generate_pip_install_command(
|
|
79
|
+
requirements_path=requirements_destination_path,
|
|
80
|
+
pip_packages=pip_packages,
|
|
81
|
+
mount_pip_conf_secret=mount_python_package_manager_conf_secret,
|
|
82
|
+
)
|
|
83
|
+
elif package_manager == PythonPackageManager.UV.value:
|
|
84
|
+
python_packages_install_command = generate_uv_pip_install_command(
|
|
85
|
+
requirements_path=requirements_destination_path,
|
|
86
|
+
pip_packages=pip_packages,
|
|
87
|
+
mount_uv_conf_secret=mount_python_package_manager_conf_secret,
|
|
88
|
+
)
|
|
89
|
+
else:
|
|
90
|
+
raise ValueError(f"Unsupported package manager: {package_manager}")
|
|
91
|
+
|
|
92
|
+
apt_install_command = generate_apt_install_command(
|
|
93
|
+
apt_packages=build_configuration.apt_packages
|
|
94
|
+
)
|
|
95
|
+
template_args = {
|
|
96
|
+
"spark_image_repo": ENV_VARS.TFY_TASK_PYSPARK_BUILD_SPARK_IMAGE_REPO,
|
|
97
|
+
"spark_version": build_configuration.spark_version,
|
|
98
|
+
"apt_install_command": apt_install_command,
|
|
99
|
+
"requirements_path": requirements_path,
|
|
100
|
+
"requirements_destination_path": requirements_destination_path,
|
|
101
|
+
"python_packages_install_command": python_packages_install_command,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if mount_python_package_manager_conf_secret:
|
|
105
|
+
if package_manager == PythonPackageManager.PIP.value:
|
|
106
|
+
template_args["package_manager_config_secret_mount"] = (
|
|
107
|
+
PIP_CONF_BUILDKIT_SECRET_MOUNT
|
|
108
|
+
)
|
|
109
|
+
elif package_manager == PythonPackageManager.UV.value:
|
|
110
|
+
template_args["package_manager_config_secret_mount"] = (
|
|
111
|
+
UV_CONF_BUILDKIT_SECRET_MOUNT
|
|
112
|
+
)
|
|
113
|
+
else:
|
|
114
|
+
raise ValueError(f"Unsupported package manager: {package_manager}")
|
|
115
|
+
else:
|
|
116
|
+
template_args["package_manager_config_secret_mount"] = ""
|
|
117
|
+
|
|
118
|
+
template = DOCKERFILE_TEMPLATE
|
|
119
|
+
|
|
120
|
+
dockerfile_content = template.render(**template_args)
|
|
121
|
+
return dockerfile_content
|
|
@@ -1,8 +1,12 @@
|
|
|
1
|
+
import shlex
|
|
1
2
|
from typing import List, Optional
|
|
2
3
|
|
|
4
|
+
from truefoundry.common.constants import ENV_VARS
|
|
3
5
|
from truefoundry.deploy.builder.constants import (
|
|
4
6
|
BUILDKIT_SECRET_MOUNT_PIP_CONF_ID,
|
|
5
7
|
BUILDKIT_SECRET_MOUNT_UV_CONF_ID,
|
|
8
|
+
PIP_CONF_SECRET_MOUNT_AS_ENV,
|
|
9
|
+
UV_CONF_SECRET_MOUNT_AS_ENV,
|
|
6
10
|
)
|
|
7
11
|
|
|
8
12
|
|
|
@@ -35,3 +39,82 @@ def has_python_package_manager_conf_secret(docker_build_extra_args: List[str]) -
|
|
|
35
39
|
):
|
|
36
40
|
return True
|
|
37
41
|
return False
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def generate_pip_install_command(
|
|
45
|
+
requirements_path: Optional[str],
|
|
46
|
+
pip_packages: Optional[List[str]],
|
|
47
|
+
mount_pip_conf_secret: bool = False,
|
|
48
|
+
) -> Optional[str]:
|
|
49
|
+
upgrade_pip_command = "python -m pip install -U pip setuptools wheel"
|
|
50
|
+
envs = []
|
|
51
|
+
if mount_pip_conf_secret:
|
|
52
|
+
envs.append(PIP_CONF_SECRET_MOUNT_AS_ENV)
|
|
53
|
+
|
|
54
|
+
command = ["python", "-m", "pip", "install", "--use-pep517", "--no-cache-dir"]
|
|
55
|
+
args = []
|
|
56
|
+
if requirements_path:
|
|
57
|
+
args.append("-r")
|
|
58
|
+
args.append(requirements_path)
|
|
59
|
+
|
|
60
|
+
if pip_packages:
|
|
61
|
+
args.extend(pip_packages)
|
|
62
|
+
|
|
63
|
+
if not args:
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
final_pip_install_command = shlex.join(envs + command + args)
|
|
67
|
+
final_docker_run_command = " && ".join(
|
|
68
|
+
[upgrade_pip_command, final_pip_install_command]
|
|
69
|
+
)
|
|
70
|
+
return final_docker_run_command
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def generate_uv_pip_install_command(
|
|
74
|
+
requirements_path: Optional[str],
|
|
75
|
+
pip_packages: Optional[List[str]],
|
|
76
|
+
mount_uv_conf_secret: bool = False,
|
|
77
|
+
) -> Optional[str]:
|
|
78
|
+
upgrade_pip_command = "python -m pip install -U pip setuptools wheel"
|
|
79
|
+
uv_mount = f"--mount=from={ENV_VARS.TFY_PYTHON_BUILD_UV_IMAGE_URI},source=/uv,target=/usr/local/bin/uv"
|
|
80
|
+
envs = [
|
|
81
|
+
"UV_LINK_MODE=copy",
|
|
82
|
+
"UV_PYTHON_DOWNLOADS=never",
|
|
83
|
+
"UV_INDEX_STRATEGY=unsafe-best-match",
|
|
84
|
+
]
|
|
85
|
+
if mount_uv_conf_secret:
|
|
86
|
+
envs.append(UV_CONF_SECRET_MOUNT_AS_ENV)
|
|
87
|
+
|
|
88
|
+
command = ["uv", "pip", "install", "--no-cache-dir"]
|
|
89
|
+
|
|
90
|
+
args = []
|
|
91
|
+
|
|
92
|
+
if requirements_path:
|
|
93
|
+
args.append("-r")
|
|
94
|
+
args.append(requirements_path)
|
|
95
|
+
|
|
96
|
+
if pip_packages:
|
|
97
|
+
args.extend(pip_packages)
|
|
98
|
+
|
|
99
|
+
if not args:
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
uv_pip_install_command = shlex.join(envs + command + args)
|
|
103
|
+
shell_commands = " && ".join([upgrade_pip_command, uv_pip_install_command])
|
|
104
|
+
final_docker_run_command = " ".join([uv_mount, shell_commands])
|
|
105
|
+
|
|
106
|
+
return final_docker_run_command
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def generate_apt_install_command(apt_packages: Optional[List[str]]) -> Optional[str]:
|
|
110
|
+
packages_list = None
|
|
111
|
+
if apt_packages:
|
|
112
|
+
packages_list = " ".join(p.strip() for p in apt_packages if p.strip())
|
|
113
|
+
if not packages_list:
|
|
114
|
+
return None
|
|
115
|
+
apt_update_command = "apt update"
|
|
116
|
+
apt_install_command = f"DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends {packages_list}"
|
|
117
|
+
clear_apt_lists_command = "rm -rf /var/lib/apt/lists/*"
|
|
118
|
+
return " && ".join(
|
|
119
|
+
[apt_update_command, apt_install_command, clear_apt_lists_command]
|
|
120
|
+
)
|
|
@@ -528,6 +528,12 @@ class SparkJobPythonNotebookEntrypoint(
|
|
|
528
528
|
type: Literal["python-notebook"] = "python-notebook"
|
|
529
529
|
|
|
530
530
|
|
|
531
|
+
class SparkJobScalaNotebookEntrypoint(
|
|
532
|
+
models.SparkJobScalaNotebookEntrypoint, PatchedModelBase
|
|
533
|
+
):
|
|
534
|
+
type: Literal["scala-notebook"] = "scala-notebook"
|
|
535
|
+
|
|
536
|
+
|
|
531
537
|
class PySparkTaskConfig(models.PySparkTaskConfig, PatchedModelBase):
|
|
532
538
|
type: Literal["pyspark-task-config"] = "pyspark-task-config"
|
|
533
539
|
|
|
@@ -654,7 +654,7 @@ class MlFoundryArtifactsRepository:
|
|
|
654
654
|
artifact_identifier=self.artifact_identifier, paths=[remote_file_path]
|
|
655
655
|
)[0]
|
|
656
656
|
|
|
657
|
-
if progress_bar is None or
|
|
657
|
+
if progress_bar is None or progress_bar.disable:
|
|
658
658
|
logger.info("Downloading %s to %s", remote_file_path, local_path)
|
|
659
659
|
|
|
660
660
|
if progress_bar is not None:
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import math
|
|
3
|
+
import os
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Dict, Optional
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from truefoundry import ml
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from transformers.integrations.integration_utils import rewrite_logs
|
|
12
|
+
from transformers.trainer_callback import TrainerCallback
|
|
13
|
+
except ImportError as e:
|
|
14
|
+
raise ImportError(
|
|
15
|
+
"Importing this module requires `transformers` to be installed"
|
|
16
|
+
) from e
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from transformers.trainer_callback import TrainerControl, TrainerState
|
|
20
|
+
from transformers.training_args import TrainingArguments
|
|
21
|
+
|
|
22
|
+
from truefoundry.ml import MlFoundryRun
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TrueFoundryMLCallback(TrainerCallback):
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
run: "MlFoundryRun",
|
|
31
|
+
log_checkpoints: bool = True,
|
|
32
|
+
checkpoint_artifact_name: Optional[str] = None,
|
|
33
|
+
auto_end_run_on_train_end: bool = False,
|
|
34
|
+
):
|
|
35
|
+
"""
|
|
36
|
+
Args:
|
|
37
|
+
run: The run entity to log metrics to.
|
|
38
|
+
log_checkpoints: Whether to log checkpoints or not, defaults to True.
|
|
39
|
+
checkpoint_artifact_name: The name of the artifact to log checkpoints to, required if log_checkpoints is True.
|
|
40
|
+
auto_end_run_on_train_end: Whether to end the run automatically when training ends, defaults to False.
|
|
41
|
+
|
|
42
|
+
Usage:
|
|
43
|
+
from transformers import Trainer
|
|
44
|
+
from truefoundry.ml.integrations.huggingface.trainer_callback import TrueFoundryMLCallback
|
|
45
|
+
from truefoundry.ml import get_client
|
|
46
|
+
|
|
47
|
+
client = get_client()
|
|
48
|
+
run = client.create_run(ml_repo="my-ml-repo", run_name="my-run", auto_end=False)
|
|
49
|
+
|
|
50
|
+
callback = TrueFoundryMLCallback(
|
|
51
|
+
run=run,
|
|
52
|
+
log_checkpoints=True,
|
|
53
|
+
checkpoint_artifact_name="my-checkpoint",
|
|
54
|
+
auto_end_run_on_train_end=True,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
trainer = Trainer(
|
|
58
|
+
...,
|
|
59
|
+
callbacks=[callback]
|
|
60
|
+
)
|
|
61
|
+
"""
|
|
62
|
+
self._run = run
|
|
63
|
+
self._log_checkpoints = log_checkpoints
|
|
64
|
+
if self._log_checkpoints and not checkpoint_artifact_name:
|
|
65
|
+
raise ValueError(
|
|
66
|
+
"`checkpoint_artifact_name` is required when `log_checkpoints` is True"
|
|
67
|
+
)
|
|
68
|
+
self._checkpoint_artifact_name = checkpoint_artifact_name
|
|
69
|
+
self._auto_end_run_on_train_end = auto_end_run_on_train_end
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def with_managed_run(
|
|
73
|
+
cls,
|
|
74
|
+
ml_repo: str,
|
|
75
|
+
run_name: Optional[str] = None,
|
|
76
|
+
log_checkpoints: bool = True,
|
|
77
|
+
checkpoint_artifact_name: Optional[str] = None,
|
|
78
|
+
auto_end_run_on_train_end: bool = True,
|
|
79
|
+
) -> "TrueFoundryMLCallback":
|
|
80
|
+
"""
|
|
81
|
+
Args:
|
|
82
|
+
ml_repo: The name of the ML Repository to log metrics and data to.
|
|
83
|
+
run_name: The name of the run, if not provided, a random name will be generated.
|
|
84
|
+
log_checkpoints: Whether to log checkpoints or not, defaults to True.
|
|
85
|
+
checkpoint_artifact_name: The name of the artifact to log checkpoints to, required if log_checkpoints is True.
|
|
86
|
+
auto_end_run_on_train_end: Whether to end the run automatically when training ends, defaults to True.
|
|
87
|
+
|
|
88
|
+
Usage:
|
|
89
|
+
from transformers import Trainer
|
|
90
|
+
from truefoundry.ml.integrations.huggingface.trainer_callback import TrueFoundryMLCallback
|
|
91
|
+
|
|
92
|
+
callback = TrueFoundryMLCallback.with_managed_run(
|
|
93
|
+
ml_repo="my-ml-repo",
|
|
94
|
+
run_name="my-run",
|
|
95
|
+
log_checkpoints=True,
|
|
96
|
+
checkpoint_artifact_name="my-checkpoint",
|
|
97
|
+
auto_end_run_on_train_end=True,
|
|
98
|
+
)
|
|
99
|
+
trainer = Trainer(
|
|
100
|
+
...,
|
|
101
|
+
callbacks=[callback]
|
|
102
|
+
)
|
|
103
|
+
"""
|
|
104
|
+
run = ml.get_client().create_run(
|
|
105
|
+
ml_repo=ml_repo, run_name=run_name, auto_end=False
|
|
106
|
+
)
|
|
107
|
+
return cls(
|
|
108
|
+
run=run,
|
|
109
|
+
log_checkpoints=log_checkpoints,
|
|
110
|
+
checkpoint_artifact_name=checkpoint_artifact_name,
|
|
111
|
+
auto_end_run_on_train_end=auto_end_run_on_train_end,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def _drop_non_finite_values(self, dct: Dict[str, Any]) -> Dict[str, Any]:
|
|
115
|
+
sanitized = {}
|
|
116
|
+
for k, v in dct.items():
|
|
117
|
+
if isinstance(v, (int, float, np.integer, np.floating)) and math.isfinite(
|
|
118
|
+
v
|
|
119
|
+
):
|
|
120
|
+
sanitized[k] = v
|
|
121
|
+
else:
|
|
122
|
+
logger.warning(
|
|
123
|
+
f'Trainer is attempting to log a value of "{v}" of'
|
|
124
|
+
f' type {type(v)} for key "{k}" as a metric.'
|
|
125
|
+
" Mlfoundry's log_metric() only accepts finite float and"
|
|
126
|
+
" int types so we dropped this attribute."
|
|
127
|
+
)
|
|
128
|
+
return sanitized
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def run(self) -> "MlFoundryRun":
|
|
132
|
+
return self._run
|
|
133
|
+
|
|
134
|
+
# noinspection PyMethodOverriding
|
|
135
|
+
def on_log(
|
|
136
|
+
self,
|
|
137
|
+
args: "TrainingArguments",
|
|
138
|
+
state: "TrainerState",
|
|
139
|
+
control: "TrainerControl",
|
|
140
|
+
logs: Optional[Dict[str, Any]] = None,
|
|
141
|
+
**kwargs,
|
|
142
|
+
):
|
|
143
|
+
logs = logs or {}
|
|
144
|
+
if not state.is_world_process_zero:
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
metrics = self._drop_non_finite_values(logs)
|
|
148
|
+
self._run.log_metrics(rewrite_logs(metrics), step=state.global_step)
|
|
149
|
+
|
|
150
|
+
def on_save(
|
|
151
|
+
self,
|
|
152
|
+
args: "TrainingArguments",
|
|
153
|
+
state: "TrainerState",
|
|
154
|
+
control: "TrainerControl",
|
|
155
|
+
**kwargs,
|
|
156
|
+
):
|
|
157
|
+
if not state.is_world_process_zero:
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
if not self._log_checkpoints:
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
if not self._checkpoint_artifact_name:
|
|
164
|
+
return
|
|
165
|
+
|
|
166
|
+
ckpt_dir = f"checkpoint-{state.global_step}"
|
|
167
|
+
artifact_path = os.path.join(args.output_dir, ckpt_dir)
|
|
168
|
+
description = None
|
|
169
|
+
_job_name = os.getenv("TFY_INTERNAL_COMPONENT_NAME")
|
|
170
|
+
_job_run_name = os.getenv("TFY_INTERNAL_JOB_RUN_NAME")
|
|
171
|
+
if _job_name:
|
|
172
|
+
description = f"Checkpoint from job={_job_name} run={_job_run_name}"
|
|
173
|
+
logger.info(f"Uploading checkpoint {ckpt_dir} ...")
|
|
174
|
+
metadata = {}
|
|
175
|
+
for log in state.log_history:
|
|
176
|
+
if isinstance(log, dict) and log.get("step") == state.global_step:
|
|
177
|
+
metadata = log.copy()
|
|
178
|
+
metadata = self._drop_non_finite_values(metadata)
|
|
179
|
+
self._run.log_artifact(
|
|
180
|
+
name=self._checkpoint_artifact_name,
|
|
181
|
+
artifact_paths=[(artifact_path, None)],
|
|
182
|
+
metadata=metadata,
|
|
183
|
+
step=state.global_step,
|
|
184
|
+
description=description,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
def on_train_end(
|
|
188
|
+
self,
|
|
189
|
+
args: "TrainingArguments",
|
|
190
|
+
state: "TrainerState",
|
|
191
|
+
control: "TrainerControl",
|
|
192
|
+
**kwargs,
|
|
193
|
+
):
|
|
194
|
+
"""
|
|
195
|
+
Event called at the end of training.
|
|
196
|
+
"""
|
|
197
|
+
if self._auto_end_run_on_train_end:
|
|
198
|
+
self._run.end()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: truefoundry
|
|
3
|
-
Version: 0.11.
|
|
3
|
+
Version: 0.11.4
|
|
4
4
|
Summary: TrueFoundry CLI
|
|
5
5
|
Author-email: TrueFoundry Team <abhishek@truefoundry.com>
|
|
6
6
|
Requires-Python: <3.14,>=3.8.1
|
|
@@ -30,7 +30,7 @@ Requires-Dist: requirements-parser<0.12.0,>=0.11.0
|
|
|
30
30
|
Requires-Dist: rich-click<2.0.0,>=1.2.1
|
|
31
31
|
Requires-Dist: rich<14.0.0,>=13.7.1
|
|
32
32
|
Requires-Dist: tqdm<5.0.0,>=4.0.0
|
|
33
|
-
Requires-Dist: truefoundry-sdk<0.2.0,>=0.1.
|
|
33
|
+
Requires-Dist: truefoundry-sdk<0.2.0,>=0.1.9
|
|
34
34
|
Requires-Dist: typing-extensions>=4.0
|
|
35
35
|
Requires-Dist: urllib3<3,>=1.26.18
|
|
36
36
|
Requires-Dist: yq<4.0.0,>=3.1.0
|
|
@@ -40,7 +40,7 @@ truefoundry/cli/display_util.py,sha256=9vzN3mbQqU6OhS7qRUiMRana4PTHa4sDTA0Hn7OVj
|
|
|
40
40
|
truefoundry/cli/util.py,sha256=kEjC20-n_jwxZV9jq-78CxDk4xAySxAoYIXTxZfJzLM,5423
|
|
41
41
|
truefoundry/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
42
|
truefoundry/common/auth_service_client.py,sha256=N3YxKlx63r6cPZqbgb2lqBOPI69ShB7D7RCIq4FSCjc,7949
|
|
43
|
-
truefoundry/common/constants.py,sha256=
|
|
43
|
+
truefoundry/common/constants.py,sha256=3GH-KgEuBp-Gmaq_lreudKCKKPVPdEsGME7xdvIOpBA,4710
|
|
44
44
|
truefoundry/common/credential_file_manager.py,sha256=1yEk1Zm2xS4G0VDFwKSZ4w0VUrcPWQ1nJnoBaz9xyKA,4251
|
|
45
45
|
truefoundry/common/credential_provider.py,sha256=_OhJ2XFlDaVsrUO-FyywxctcGGqDdC2pgcvwEKqQD0Q,4071
|
|
46
46
|
truefoundry/common/entities.py,sha256=b4R6ss06-ygDS3C4Tqa_GOq5LFKDYbt7x4Mghnfz6yo,4007
|
|
@@ -52,22 +52,24 @@ truefoundry/common/storage_provider_utils.py,sha256=yURhMw8k0FLFvaviRHDiifhvc6Gn
|
|
|
52
52
|
truefoundry/common/types.py,sha256=BMJFCsR1lPJAw66IQBSvLyV4I6o_x5oj78gVsUa9si8,188
|
|
53
53
|
truefoundry/common/utils.py,sha256=P0FuAadoJGdpieUORLSN-PiFnkyoGO-K2cS4OPITBWg,6714
|
|
54
54
|
truefoundry/common/warnings.py,sha256=xDMhR_-ZGC40Ycaj6nlFb5MYPexn8WbKCHd4FlflTXQ,705
|
|
55
|
-
truefoundry/deploy/__init__.py,sha256=
|
|
55
|
+
truefoundry/deploy/__init__.py,sha256=sP-6Nv-_uV2o3knWcNSGV07j_Hkq0lfUkfZffBg-Hfo,2874
|
|
56
56
|
truefoundry/deploy/python_deploy_codegen.py,sha256=k19_m5DGsUyjOUCSKwIVP8vDna2sq01tHABsUfoVpW4,8019
|
|
57
|
-
truefoundry/deploy/_autogen/models.py,sha256=
|
|
58
|
-
truefoundry/deploy/builder/__init__.py,sha256=
|
|
57
|
+
truefoundry/deploy/_autogen/models.py,sha256=oOBwFb7qumBz1XXllns8wL02-NVvYq5iOOGZlEP3BzU,75893
|
|
58
|
+
truefoundry/deploy/builder/__init__.py,sha256=VR07ZB7ziONEBbVgg1JdRTWY7t4qJjJTMhc2VodXYdA,5036
|
|
59
59
|
truefoundry/deploy/builder/constants.py,sha256=amUkHoHvVKzGv0v_knfiioRuKiJM0V0xW0diERgWiI0,508
|
|
60
60
|
truefoundry/deploy/builder/docker_service.py,sha256=sm7GWeIqyrKaZpxskdLejZlsxcZnM3BTDJr6orvPN4E,3948
|
|
61
|
-
truefoundry/deploy/builder/utils.py,sha256=
|
|
62
|
-
truefoundry/deploy/builder/builders/__init__.py,sha256=
|
|
61
|
+
truefoundry/deploy/builder/utils.py,sha256=4TO0f3qMFGfFoBK0two1P59jgxlNjUoZYHYRgStcovM,3694
|
|
62
|
+
truefoundry/deploy/builder/builders/__init__.py,sha256=Gp9NODR1E7mUjadhzIe3zzO43bBfHPeNcEDryYF2uo0,807
|
|
63
63
|
truefoundry/deploy/builder/builders/dockerfile.py,sha256=XMbMlPUTMPCyaHl7jJQY1ODtlRkpI61PcvgG6Ck5jNc,1522
|
|
64
64
|
truefoundry/deploy/builder/builders/tfy_notebook_buildpack/__init__.py,sha256=RGWGqY8xOF7vycUPJd10N7ZzahWv24lO0anrOPtLuDU,1796
|
|
65
65
|
truefoundry/deploy/builder/builders/tfy_notebook_buildpack/dockerfile_template.py,sha256=rQgdvKmAT9HArVW4TAG5yd2QTKRs3S5LJ9RQbc_EkHE,2518
|
|
66
66
|
truefoundry/deploy/builder/builders/tfy_python_buildpack/__init__.py,sha256=_fjqHKn80qKi68SAMMALge7_A6e1sTsQWichw8uoGIw,2025
|
|
67
|
-
truefoundry/deploy/builder/builders/tfy_python_buildpack/dockerfile_template.py,sha256=
|
|
67
|
+
truefoundry/deploy/builder/builders/tfy_python_buildpack/dockerfile_template.py,sha256=Kj-ICGFTpDj86v6Juohz7q2TNYpcGIeKBW5HADG7SGE,6704
|
|
68
68
|
truefoundry/deploy/builder/builders/tfy_spark_buildpack/__init__.py,sha256=NEPlM6_vTVxp4ITa18B8DBbgYCn1q5d8be21lbgu5oY,2888
|
|
69
|
-
truefoundry/deploy/builder/builders/tfy_spark_buildpack/dockerfile_template.py,sha256=
|
|
70
|
-
truefoundry/deploy/builder/builders/tfy_spark_buildpack/tfy_execute_notebook.py,sha256
|
|
69
|
+
truefoundry/deploy/builder/builders/tfy_spark_buildpack/dockerfile_template.py,sha256=nMJJfxjy8R7BZK89KicerQQwKLspUSJ3kerWZI3hFxk,4571
|
|
70
|
+
truefoundry/deploy/builder/builders/tfy_spark_buildpack/tfy_execute_notebook.py,sha256=-D37Zjy2SBt3RHxonPEpR1_LR0W7vTSM1kQ1S-fdK-I,6363
|
|
71
|
+
truefoundry/deploy/builder/builders/tfy_task_pyspark_buildpack/__init__.py,sha256=ynnjrFDg1__REd_x-npxxj-5zmFo46z_Ntz7GZ9-DHI,1819
|
|
72
|
+
truefoundry/deploy/builder/builders/tfy_task_pyspark_buildpack/dockerfile_template.py,sha256=cjyPCLXJ8x5blaKbTK4XQ-4tO7DJqe5I9Fw2EJoLmtk,4555
|
|
71
73
|
truefoundry/deploy/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
72
74
|
truefoundry/deploy/cli/commands/__init__.py,sha256=qv818jxqSAygJ3h-6Ul8t-5VOgR_UrSgsVtNCl3e5G0,1408
|
|
73
75
|
truefoundry/deploy/cli/commands/apply_command.py,sha256=DmXmKVokkauyKIiJDtErTwbJ5_LvQeJbTQsG5BjyKpo,2427
|
|
@@ -117,7 +119,7 @@ truefoundry/deploy/v2/lib/deploy.py,sha256=Ltm7cpIW14IbmEsR3EAIeWQUch2Z6HLej7heu
|
|
|
117
119
|
truefoundry/deploy/v2/lib/deploy_workflow.py,sha256=G5BzMIbap8pgDX1eY-TITruUxQdkKhYtBmRwLL6lDeY,14342
|
|
118
120
|
truefoundry/deploy/v2/lib/deployable_patched_models.py,sha256=mUi-OjPf7bc8rzfrPLdFb79LKuDq7F36RxL4V-AXebs,6830
|
|
119
121
|
truefoundry/deploy/v2/lib/models.py,sha256=ogc1UYs1Z2nBdGSKCrde9sk8d0GxFKMkem99uqO5CmM,1148
|
|
120
|
-
truefoundry/deploy/v2/lib/patched_models.py,sha256=
|
|
122
|
+
truefoundry/deploy/v2/lib/patched_models.py,sha256=bsznDLcUH5GcW8SUEvHETJqoFGlYYJ0j-tyGIqnRraw,16911
|
|
121
123
|
truefoundry/deploy/v2/lib/source.py,sha256=d6-8_6Zn5koBglqrBrY6ZLG_7yyPuLdyEmK4iZTw6xY,9405
|
|
122
124
|
truefoundry/ml/__init__.py,sha256=EEEHV7w58Krpo_W9Chd8Y3TdItfFO3LI6j6Izqc4-P8,2219
|
|
123
125
|
truefoundry/ml/constants.py,sha256=vDq72d4C9FSWqr9MMdjgTF4TuyNFApvo_6RVsSeAjB4,2837
|
|
@@ -347,7 +349,7 @@ truefoundry/ml/_autogen/models/schema.py,sha256=a_bp42MMPUbwO3407m0UW2W8EOhnxZXf
|
|
|
347
349
|
truefoundry/ml/_autogen/models/signature.py,sha256=rBjpxUIsEeWM0sIyYG5uCJB18DKHR4k5yZw8TzuoP48,4987
|
|
348
350
|
truefoundry/ml/_autogen/models/utils.py,sha256=c7RtSLXhOLcP8rjuUtfnMdaKVTZvvbsmw98gPAkAFrs,24371
|
|
349
351
|
truefoundry/ml/artifact/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
350
|
-
truefoundry/ml/artifact/truefoundry_artifact_repo.py,sha256=
|
|
352
|
+
truefoundry/ml/artifact/truefoundry_artifact_repo.py,sha256=8BFKaXDxutw8bPJLnDI0bO0oNS_xJKo2ijubc2PLFsU,35688
|
|
351
353
|
truefoundry/ml/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
352
354
|
truefoundry/ml/cli/cli.py,sha256=MwpY7z_NEeJE_XIP7XbZELjNeu2vpMmohttHCKDRk54,335
|
|
353
355
|
truefoundry/ml/cli/utils.py,sha256=j6_mZ4Spn114mz3P4QQ8jx0tmorXIuyQnHXVUSDvZi4,1035
|
|
@@ -355,6 +357,9 @@ truefoundry/ml/cli/commands/__init__.py,sha256=diDUiRUX4l6TtNLI4iF-ZblczkELM7FRV
|
|
|
355
357
|
truefoundry/ml/cli/commands/download.py,sha256=N9MhsEQ3U24v_OmnMZT8Q4SoAi38Sm7a21unrACOSDw,2573
|
|
356
358
|
truefoundry/ml/cli/commands/model_init.py,sha256=INyUAU6hiFClI8cZqX5hgnrtNbeKxlZxrjFrjzStU18,2664
|
|
357
359
|
truefoundry/ml/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
360
|
+
truefoundry/ml/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
361
|
+
truefoundry/ml/integrations/huggingface/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
362
|
+
truefoundry/ml/integrations/huggingface/trainer_callback.py,sha256=Zu5AUbH_ct8I1dHyNYJQZBj9Y__hKo0sc2OxpPXJARE,6952
|
|
358
363
|
truefoundry/ml/log_types/__init__.py,sha256=g4u4D4Jaj0aBK5GtrLV88-qThKZR9pSZ17vFEkN-LmM,125
|
|
359
364
|
truefoundry/ml/log_types/plot.py,sha256=LDh4uy6z2P_a2oPM2lc85c0lt8utVvunohzeMawFjZw,7572
|
|
360
365
|
truefoundry/ml/log_types/pydantic_base.py,sha256=eBlw_AEyAz4iJKDP4zgJOCFWcldwQqpf7FADW1jzIQY,272
|
|
@@ -381,7 +386,7 @@ truefoundry/workflow/remote_filesystem/__init__.py,sha256=LQ95ViEjJ7Ts4JcCGOxMPs
|
|
|
381
386
|
truefoundry/workflow/remote_filesystem/logger.py,sha256=em2l7D6sw7xTLDP0kQSLpgfRRCLpN14Qw85TN7ujQcE,1022
|
|
382
387
|
truefoundry/workflow/remote_filesystem/tfy_signed_url_client.py,sha256=xcT0wQmQlgzcj0nP3tJopyFSVWT1uv3nhiTIuwfXYeg,12342
|
|
383
388
|
truefoundry/workflow/remote_filesystem/tfy_signed_url_fs.py,sha256=nSGPZu0Gyd_jz0KsEE-7w_BmnTD8CVF1S8cUJoxaCbc,13305
|
|
384
|
-
truefoundry-0.11.
|
|
385
|
-
truefoundry-0.11.
|
|
386
|
-
truefoundry-0.11.
|
|
387
|
-
truefoundry-0.11.
|
|
389
|
+
truefoundry-0.11.4.dist-info/METADATA,sha256=RD0XhZ5hvcV7BAguapQ9yYssfoEDIXwNUi11w5riKtc,2759
|
|
390
|
+
truefoundry-0.11.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
391
|
+
truefoundry-0.11.4.dist-info/entry_points.txt,sha256=xVjn7RMN-MW2-9f7YU-bBdlZSvvrwzhpX1zmmRmsNPU,98
|
|
392
|
+
truefoundry-0.11.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|