truefoundry 0.11.1rc1__py3-none-any.whl → 0.11.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truefoundry might be problematic. Click here for more details.
- truefoundry/__init__.py +2 -0
- truefoundry/common/constants.py +2 -0
- truefoundry/common/utils.py +9 -3
- truefoundry/deploy/__init__.py +1 -0
- truefoundry/deploy/_autogen/models.py +92 -36
- truefoundry/deploy/builder/__init__.py +2 -0
- truefoundry/deploy/builder/builders/__init__.py +2 -0
- truefoundry/deploy/builder/builders/tfy_python_buildpack/dockerfile_template.py +6 -83
- truefoundry/deploy/builder/builders/tfy_spark_buildpack/dockerfile_template.py +20 -72
- truefoundry/deploy/builder/builders/tfy_spark_buildpack/tfy_execute_notebook.py +46 -29
- truefoundry/deploy/builder/builders/tfy_task_pyspark_buildpack/__init__.py +52 -0
- truefoundry/deploy/builder/builders/tfy_task_pyspark_buildpack/dockerfile_template.py +121 -0
- truefoundry/deploy/builder/utils.py +83 -0
- truefoundry/deploy/cli/commands/trigger_command.py +21 -2
- truefoundry/deploy/lib/clients/servicefoundry_client.py +4 -0
- truefoundry/deploy/lib/dao/application.py +2 -0
- truefoundry/deploy/v2/lib/patched_models.py +39 -0
- truefoundry/ml/artifact/truefoundry_artifact_repo.py +15 -50
- truefoundry/workflow/__init__.py +2 -0
- truefoundry/workflow/spark_task.py +91 -0
- {truefoundry-0.11.1rc1.dist-info → truefoundry-0.11.3.dist-info}/METADATA +6 -3
- {truefoundry-0.11.1rc1.dist-info → truefoundry-0.11.3.dist-info}/RECORD +24 -21
- {truefoundry-0.11.1rc1.dist-info → truefoundry-0.11.3.dist-info}/WHEEL +0 -0
- {truefoundry-0.11.1rc1.dist-info → truefoundry-0.11.3.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from tempfile import TemporaryDirectory
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from truefoundry.deploy._autogen.models import DockerFileBuild, TaskPySparkBuild
|
|
6
|
+
from truefoundry.deploy.builder.builders import dockerfile
|
|
7
|
+
from truefoundry.deploy.builder.builders.tfy_task_pyspark_buildpack.dockerfile_template import (
|
|
8
|
+
generate_dockerfile_content,
|
|
9
|
+
)
|
|
10
|
+
from truefoundry.deploy.builder.utils import has_python_package_manager_conf_secret
|
|
11
|
+
|
|
12
|
+
__all__ = ["generate_dockerfile_content", "build"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _convert_to_dockerfile_build_config(
|
|
16
|
+
build_configuration: TaskPySparkBuild,
|
|
17
|
+
dockerfile_path: str,
|
|
18
|
+
mount_python_package_manager_conf_secret: bool = False,
|
|
19
|
+
) -> DockerFileBuild:
|
|
20
|
+
dockerfile_content = generate_dockerfile_content(
|
|
21
|
+
build_configuration=build_configuration,
|
|
22
|
+
mount_python_package_manager_conf_secret=mount_python_package_manager_conf_secret,
|
|
23
|
+
)
|
|
24
|
+
with open(dockerfile_path, "w", encoding="utf8") as fp:
|
|
25
|
+
fp.write(dockerfile_content)
|
|
26
|
+
|
|
27
|
+
return DockerFileBuild(
|
|
28
|
+
type="dockerfile",
|
|
29
|
+
dockerfile_path=dockerfile_path,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def build(
|
|
34
|
+
tag: str,
|
|
35
|
+
build_configuration: TaskPySparkBuild,
|
|
36
|
+
extra_opts: Optional[List[str]] = None,
|
|
37
|
+
):
|
|
38
|
+
mount_python_package_manager_conf_secret = (
|
|
39
|
+
has_python_package_manager_conf_secret(extra_opts) if extra_opts else False
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
with TemporaryDirectory() as local_dir:
|
|
43
|
+
docker_build_configuration = _convert_to_dockerfile_build_config(
|
|
44
|
+
build_configuration,
|
|
45
|
+
dockerfile_path=os.path.join(local_dir, "Dockerfile"),
|
|
46
|
+
mount_python_package_manager_conf_secret=mount_python_package_manager_conf_secret,
|
|
47
|
+
)
|
|
48
|
+
dockerfile.build(
|
|
49
|
+
tag=tag,
|
|
50
|
+
build_configuration=docker_build_configuration,
|
|
51
|
+
extra_opts=extra_opts,
|
|
52
|
+
)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
from mako.template import Template
|
|
2
|
+
|
|
3
|
+
from truefoundry.common.constants import ENV_VARS, PythonPackageManager
|
|
4
|
+
from truefoundry.deploy._autogen.models import TaskPySparkBuild
|
|
5
|
+
from truefoundry.deploy.builder.constants import (
|
|
6
|
+
PIP_CONF_BUILDKIT_SECRET_MOUNT,
|
|
7
|
+
UV_CONF_BUILDKIT_SECRET_MOUNT,
|
|
8
|
+
)
|
|
9
|
+
from truefoundry.deploy.builder.utils import (
|
|
10
|
+
generate_apt_install_command,
|
|
11
|
+
generate_pip_install_command,
|
|
12
|
+
generate_uv_pip_install_command,
|
|
13
|
+
)
|
|
14
|
+
from truefoundry.deploy.v2.lib.patched_models import (
|
|
15
|
+
_resolve_requirements_path,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# TODO[GW]: Switch to a non-root user inside the container
|
|
19
|
+
_POST_PYTHON_INSTALL_TEMPLATE = """
|
|
20
|
+
% if apt_install_command is not None:
|
|
21
|
+
RUN ${apt_install_command}
|
|
22
|
+
% endif
|
|
23
|
+
% if requirements_path is not None:
|
|
24
|
+
COPY ${requirements_path} ${requirements_destination_path}
|
|
25
|
+
% endif
|
|
26
|
+
% if python_packages_install_command is not None:
|
|
27
|
+
RUN ${package_manager_config_secret_mount} ${python_packages_install_command}
|
|
28
|
+
% endif
|
|
29
|
+
COPY . /app
|
|
30
|
+
WORKDIR /app
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
# TODO[GW]: Check if the entrypoint for the image needs to change
|
|
34
|
+
# Using /opt/venv/ because flyte seems to be using it and this doesn't look configurable
|
|
35
|
+
# TODO[GW]: Double check this^
|
|
36
|
+
DOCKERFILE_TEMPLATE = Template(
|
|
37
|
+
"""
|
|
38
|
+
FROM ${spark_image_repo}:${spark_version}
|
|
39
|
+
ENV PATH=/opt/venv/bin:$PATH
|
|
40
|
+
USER root
|
|
41
|
+
RUN mkdir -p /var/lib/apt/lists/partial && \
|
|
42
|
+
apt update && \
|
|
43
|
+
DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends git && \
|
|
44
|
+
python -m venv /opt/venv/ && \
|
|
45
|
+
rm -rf /var/lib/apt/lists/*
|
|
46
|
+
"""
|
|
47
|
+
+ _POST_PYTHON_INSTALL_TEMPLATE
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_additional_pip_packages(build_configuration: TaskPySparkBuild):
|
|
52
|
+
return [
|
|
53
|
+
f"pyspark=={build_configuration.spark_version}",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def generate_dockerfile_content(
|
|
58
|
+
build_configuration: TaskPySparkBuild,
|
|
59
|
+
package_manager: str = ENV_VARS.TFY_PYTHON_BUILD_PACKAGE_MANAGER,
|
|
60
|
+
mount_python_package_manager_conf_secret: bool = False,
|
|
61
|
+
) -> str:
|
|
62
|
+
# TODO (chiragjn): Handle recursive references to other requirements files e.g. `-r requirements-gpu.txt`
|
|
63
|
+
requirements_path = _resolve_requirements_path(
|
|
64
|
+
build_context_path="",
|
|
65
|
+
requirements_path=build_configuration.requirements_path,
|
|
66
|
+
)
|
|
67
|
+
requirements_destination_path = (
|
|
68
|
+
"/tmp/requirements.txt" if requirements_path else None
|
|
69
|
+
)
|
|
70
|
+
# if not build_configuration.python_version:
|
|
71
|
+
# raise ValueError(
|
|
72
|
+
# "`python_version` is required for `tfy-python-buildpack` builder"
|
|
73
|
+
# )
|
|
74
|
+
pip_packages = get_additional_pip_packages(build_configuration) + (
|
|
75
|
+
build_configuration.pip_packages or []
|
|
76
|
+
)
|
|
77
|
+
if package_manager == PythonPackageManager.PIP.value:
|
|
78
|
+
python_packages_install_command = generate_pip_install_command(
|
|
79
|
+
requirements_path=requirements_destination_path,
|
|
80
|
+
pip_packages=pip_packages,
|
|
81
|
+
mount_pip_conf_secret=mount_python_package_manager_conf_secret,
|
|
82
|
+
)
|
|
83
|
+
elif package_manager == PythonPackageManager.UV.value:
|
|
84
|
+
python_packages_install_command = generate_uv_pip_install_command(
|
|
85
|
+
requirements_path=requirements_destination_path,
|
|
86
|
+
pip_packages=pip_packages,
|
|
87
|
+
mount_uv_conf_secret=mount_python_package_manager_conf_secret,
|
|
88
|
+
)
|
|
89
|
+
else:
|
|
90
|
+
raise ValueError(f"Unsupported package manager: {package_manager}")
|
|
91
|
+
|
|
92
|
+
apt_install_command = generate_apt_install_command(
|
|
93
|
+
apt_packages=build_configuration.apt_packages
|
|
94
|
+
)
|
|
95
|
+
template_args = {
|
|
96
|
+
"spark_image_repo": ENV_VARS.TFY_TASK_PYSPARK_BUILD_SPARK_IMAGE_REPO,
|
|
97
|
+
"spark_version": build_configuration.spark_version,
|
|
98
|
+
"apt_install_command": apt_install_command,
|
|
99
|
+
"requirements_path": requirements_path,
|
|
100
|
+
"requirements_destination_path": requirements_destination_path,
|
|
101
|
+
"python_packages_install_command": python_packages_install_command,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if mount_python_package_manager_conf_secret:
|
|
105
|
+
if package_manager == PythonPackageManager.PIP.value:
|
|
106
|
+
template_args["package_manager_config_secret_mount"] = (
|
|
107
|
+
PIP_CONF_BUILDKIT_SECRET_MOUNT
|
|
108
|
+
)
|
|
109
|
+
elif package_manager == PythonPackageManager.UV.value:
|
|
110
|
+
template_args["package_manager_config_secret_mount"] = (
|
|
111
|
+
UV_CONF_BUILDKIT_SECRET_MOUNT
|
|
112
|
+
)
|
|
113
|
+
else:
|
|
114
|
+
raise ValueError(f"Unsupported package manager: {package_manager}")
|
|
115
|
+
else:
|
|
116
|
+
template_args["package_manager_config_secret_mount"] = ""
|
|
117
|
+
|
|
118
|
+
template = DOCKERFILE_TEMPLATE
|
|
119
|
+
|
|
120
|
+
dockerfile_content = template.render(**template_args)
|
|
121
|
+
return dockerfile_content
|
|
@@ -1,8 +1,12 @@
|
|
|
1
|
+
import shlex
|
|
1
2
|
from typing import List, Optional
|
|
2
3
|
|
|
4
|
+
from truefoundry.common.constants import ENV_VARS
|
|
3
5
|
from truefoundry.deploy.builder.constants import (
|
|
4
6
|
BUILDKIT_SECRET_MOUNT_PIP_CONF_ID,
|
|
5
7
|
BUILDKIT_SECRET_MOUNT_UV_CONF_ID,
|
|
8
|
+
PIP_CONF_SECRET_MOUNT_AS_ENV,
|
|
9
|
+
UV_CONF_SECRET_MOUNT_AS_ENV,
|
|
6
10
|
)
|
|
7
11
|
|
|
8
12
|
|
|
@@ -35,3 +39,82 @@ def has_python_package_manager_conf_secret(docker_build_extra_args: List[str]) -
|
|
|
35
39
|
):
|
|
36
40
|
return True
|
|
37
41
|
return False
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def generate_pip_install_command(
|
|
45
|
+
requirements_path: Optional[str],
|
|
46
|
+
pip_packages: Optional[List[str]],
|
|
47
|
+
mount_pip_conf_secret: bool = False,
|
|
48
|
+
) -> Optional[str]:
|
|
49
|
+
upgrade_pip_command = "python -m pip install -U pip setuptools wheel"
|
|
50
|
+
envs = []
|
|
51
|
+
if mount_pip_conf_secret:
|
|
52
|
+
envs.append(PIP_CONF_SECRET_MOUNT_AS_ENV)
|
|
53
|
+
|
|
54
|
+
command = ["python", "-m", "pip", "install", "--use-pep517", "--no-cache-dir"]
|
|
55
|
+
args = []
|
|
56
|
+
if requirements_path:
|
|
57
|
+
args.append("-r")
|
|
58
|
+
args.append(requirements_path)
|
|
59
|
+
|
|
60
|
+
if pip_packages:
|
|
61
|
+
args.extend(pip_packages)
|
|
62
|
+
|
|
63
|
+
if not args:
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
final_pip_install_command = shlex.join(envs + command + args)
|
|
67
|
+
final_docker_run_command = " && ".join(
|
|
68
|
+
[upgrade_pip_command, final_pip_install_command]
|
|
69
|
+
)
|
|
70
|
+
return final_docker_run_command
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def generate_uv_pip_install_command(
|
|
74
|
+
requirements_path: Optional[str],
|
|
75
|
+
pip_packages: Optional[List[str]],
|
|
76
|
+
mount_uv_conf_secret: bool = False,
|
|
77
|
+
) -> Optional[str]:
|
|
78
|
+
upgrade_pip_command = "python -m pip install -U pip setuptools wheel"
|
|
79
|
+
uv_mount = f"--mount=from={ENV_VARS.TFY_PYTHON_BUILD_UV_IMAGE_URI},source=/uv,target=/usr/local/bin/uv"
|
|
80
|
+
envs = [
|
|
81
|
+
"UV_LINK_MODE=copy",
|
|
82
|
+
"UV_PYTHON_DOWNLOADS=never",
|
|
83
|
+
"UV_INDEX_STRATEGY=unsafe-best-match",
|
|
84
|
+
]
|
|
85
|
+
if mount_uv_conf_secret:
|
|
86
|
+
envs.append(UV_CONF_SECRET_MOUNT_AS_ENV)
|
|
87
|
+
|
|
88
|
+
command = ["uv", "pip", "install", "--no-cache-dir"]
|
|
89
|
+
|
|
90
|
+
args = []
|
|
91
|
+
|
|
92
|
+
if requirements_path:
|
|
93
|
+
args.append("-r")
|
|
94
|
+
args.append(requirements_path)
|
|
95
|
+
|
|
96
|
+
if pip_packages:
|
|
97
|
+
args.extend(pip_packages)
|
|
98
|
+
|
|
99
|
+
if not args:
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
uv_pip_install_command = shlex.join(envs + command + args)
|
|
103
|
+
shell_commands = " && ".join([upgrade_pip_command, uv_pip_install_command])
|
|
104
|
+
final_docker_run_command = " ".join([uv_mount, shell_commands])
|
|
105
|
+
|
|
106
|
+
return final_docker_run_command
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def generate_apt_install_command(apt_packages: Optional[List[str]]) -> Optional[str]:
|
|
110
|
+
packages_list = None
|
|
111
|
+
if apt_packages:
|
|
112
|
+
packages_list = " ".join(p.strip() for p in apt_packages if p.strip())
|
|
113
|
+
if not packages_list:
|
|
114
|
+
return None
|
|
115
|
+
apt_update_command = "apt update"
|
|
116
|
+
apt_install_command = f"DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends {packages_list}"
|
|
117
|
+
clear_apt_lists_command = "rm -rf /var/lib/apt/lists/*"
|
|
118
|
+
return " && ".join(
|
|
119
|
+
[apt_update_command, apt_install_command, clear_apt_lists_command]
|
|
120
|
+
)
|
|
@@ -35,8 +35,20 @@ def trigger_command():
|
|
|
35
35
|
nargs=-1,
|
|
36
36
|
required=False,
|
|
37
37
|
)
|
|
38
|
+
@click.option(
|
|
39
|
+
"--run-name-alias",
|
|
40
|
+
"--run_name_alias",
|
|
41
|
+
type=click.STRING,
|
|
42
|
+
required=False,
|
|
43
|
+
help="Alias for the job run name.",
|
|
44
|
+
)
|
|
38
45
|
@handle_exception_wrapper
|
|
39
|
-
def trigger_job(
|
|
46
|
+
def trigger_job(
|
|
47
|
+
application_fqn: str,
|
|
48
|
+
params,
|
|
49
|
+
command: Optional[Sequence[str]],
|
|
50
|
+
run_name_alias: Optional[str],
|
|
51
|
+
):
|
|
40
52
|
"""
|
|
41
53
|
Trigger a Job on TrueFoundry asynchronously
|
|
42
54
|
|
|
@@ -54,6 +66,10 @@ def trigger_job(application_fqn: str, command: Optional[Sequence[str]], params):
|
|
|
54
66
|
Passing params:
|
|
55
67
|
|
|
56
68
|
[b]tfy trigger job --application-fqn "my-cluster:my-workspace:my-job" -- --param1_name param1_value --param2_name param2_value ...[/]
|
|
69
|
+
\n
|
|
70
|
+
|
|
71
|
+
passing run_name_alias:
|
|
72
|
+
[b]tfy trigger job --application-fqn "my-cluster:my-workspace:my-job" --run_name_alias "my_run_alias"[/]
|
|
57
73
|
"""
|
|
58
74
|
if params:
|
|
59
75
|
params_dict = {}
|
|
@@ -78,7 +94,10 @@ def trigger_job(application_fqn: str, command: Optional[Sequence[str]], params):
|
|
|
78
94
|
params_dict[key] = value
|
|
79
95
|
|
|
80
96
|
application.trigger_job(
|
|
81
|
-
application_fqn=application_fqn,
|
|
97
|
+
application_fqn=application_fqn,
|
|
98
|
+
command=command,
|
|
99
|
+
params=params,
|
|
100
|
+
run_name_alias=run_name_alias,
|
|
82
101
|
)
|
|
83
102
|
|
|
84
103
|
|
|
@@ -578,6 +578,7 @@ class ServiceFoundryServiceClient(BaseServiceFoundryServiceClient):
|
|
|
578
578
|
def trigger_job(
|
|
579
579
|
self,
|
|
580
580
|
deployment_id: str,
|
|
581
|
+
run_name_alias: Optional[str] = None,
|
|
581
582
|
command: Optional[str] = None,
|
|
582
583
|
params: Optional[Dict[str, str]] = None,
|
|
583
584
|
) -> TriggerJobResult:
|
|
@@ -585,11 +586,14 @@ class ServiceFoundryServiceClient(BaseServiceFoundryServiceClient):
|
|
|
585
586
|
body = {
|
|
586
587
|
"deploymentId": deployment_id,
|
|
587
588
|
"input": {},
|
|
589
|
+
"metadata": {},
|
|
588
590
|
}
|
|
589
591
|
if command:
|
|
590
592
|
body["input"]["command"] = command
|
|
591
593
|
if params:
|
|
592
594
|
body["input"]["params"] = params
|
|
595
|
+
if run_name_alias:
|
|
596
|
+
body["metadata"]["job_run_name_alias"] = run_name_alias
|
|
593
597
|
response = session_with_retries().post(
|
|
594
598
|
url, json=body, headers=self._get_headers()
|
|
595
599
|
)
|
|
@@ -117,6 +117,7 @@ def trigger_job(
|
|
|
117
117
|
application_fqn: str,
|
|
118
118
|
command: Optional[Union[str, Sequence[str]]] = None,
|
|
119
119
|
params: Optional[Dict[str, str]] = None,
|
|
120
|
+
run_name_alias: Optional[str] = None,
|
|
120
121
|
) -> TriggerJobResult:
|
|
121
122
|
"""
|
|
122
123
|
Trigger a Job on TrueFoundry platform
|
|
@@ -178,6 +179,7 @@ def trigger_job(
|
|
|
178
179
|
deployment_id=application_info.activeDeploymentId,
|
|
179
180
|
command=command_str if command_str else None,
|
|
180
181
|
params=params if params else None,
|
|
182
|
+
run_name_alias=run_name_alias.strip() if run_name_alias else None,
|
|
181
183
|
)
|
|
182
184
|
jobRunName = result.jobRunName
|
|
183
185
|
previous_runs_url = f"{client.tfy_host.strip('/')}/deployments/{application_info.id}?tab=previousRuns"
|
|
@@ -526,3 +526,42 @@ class SparkJobPythonNotebookEntrypoint(
|
|
|
526
526
|
models.SparkJobPythonNotebookEntrypoint, PatchedModelBase
|
|
527
527
|
):
|
|
528
528
|
type: Literal["python-notebook"] = "python-notebook"
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
class SparkJobScalaNotebookEntrypoint(
|
|
532
|
+
models.SparkJobScalaNotebookEntrypoint, PatchedModelBase
|
|
533
|
+
):
|
|
534
|
+
type: Literal["scala-notebook"] = "scala-notebook"
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
class PySparkTaskConfig(models.PySparkTaskConfig, PatchedModelBase):
|
|
538
|
+
type: Literal["pyspark-task-config"] = "pyspark-task-config"
|
|
539
|
+
|
|
540
|
+
def __init__(self, *args, **kwargs):
|
|
541
|
+
super().__init__(*args, **kwargs)
|
|
542
|
+
try:
|
|
543
|
+
import truefoundry.workflow.spark_task as _ # noqa: F401
|
|
544
|
+
except ImportError as e:
|
|
545
|
+
raise ImportError(
|
|
546
|
+
"truefoundry.workflow.spark_task is not installed. Please install it with `pip install truefoundry[workflow,spark]`"
|
|
547
|
+
) from e
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
class SparkDriverConfig(models.SparkDriverConfig, PatchedModelBase):
|
|
551
|
+
type: Literal["spark-driver-config"] = "spark-driver-config"
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
class SparkExecutorConfig(models.SparkExecutorConfig, PatchedModelBase):
|
|
555
|
+
type: Literal["spark-executor-config"] = "spark-executor-config"
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
class SparkExecutorFixedInstances(models.SparkExecutorFixedInstances, PatchedModelBase):
|
|
559
|
+
type: Literal["fixed"] = "fixed"
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
class SparkExecutorDynamicScaling(models.SparkExecutorDynamicScaling, PatchedModelBase):
|
|
563
|
+
type: Literal["dynamic"] = "dynamic"
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
class TaskPySparkBuild(models.TaskPySparkBuild, PatchedModelBase):
|
|
567
|
+
type: Literal["task-pyspark-build"] = "task-pyspark-build"
|
|
@@ -6,17 +6,7 @@ import uuid
|
|
|
6
6
|
from concurrent.futures import FIRST_EXCEPTION, Future, ThreadPoolExecutor, wait
|
|
7
7
|
from shutil import rmtree
|
|
8
8
|
from threading import Event
|
|
9
|
-
from typing import
|
|
10
|
-
Any,
|
|
11
|
-
Callable,
|
|
12
|
-
Dict,
|
|
13
|
-
Iterator,
|
|
14
|
-
List,
|
|
15
|
-
Optional,
|
|
16
|
-
Sequence,
|
|
17
|
-
Tuple,
|
|
18
|
-
Union,
|
|
19
|
-
)
|
|
9
|
+
from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Tuple
|
|
20
10
|
from urllib.parse import unquote
|
|
21
11
|
from urllib.request import pathname2url
|
|
22
12
|
|
|
@@ -31,6 +21,7 @@ from rich.progress import (
|
|
|
31
21
|
)
|
|
32
22
|
from tqdm.utils import CallbackIOWrapper
|
|
33
23
|
from truefoundry_sdk import (
|
|
24
|
+
FileInfo,
|
|
34
25
|
MultiPartUploadResponse,
|
|
35
26
|
MultiPartUploadStorageProvider,
|
|
36
27
|
Operation,
|
|
@@ -52,11 +43,6 @@ from truefoundry.common.storage_provider_utils import (
|
|
|
52
43
|
)
|
|
53
44
|
from truefoundry.ml._autogen.client import ( # type: ignore[attr-defined]
|
|
54
45
|
ApiClient,
|
|
55
|
-
FileInfoDto,
|
|
56
|
-
ListFilesForArtifactVersionRequestDto,
|
|
57
|
-
ListFilesForArtifactVersionsResponseDto,
|
|
58
|
-
ListFilesForDatasetRequestDto,
|
|
59
|
-
ListFilesForDatasetResponseDto,
|
|
60
46
|
MlfoundryArtifactsApi,
|
|
61
47
|
RunArtifactsApi,
|
|
62
48
|
)
|
|
@@ -592,44 +578,23 @@ class MlFoundryArtifactsRepository:
|
|
|
592
578
|
progress=progress,
|
|
593
579
|
)
|
|
594
580
|
|
|
595
|
-
def _list_files(
|
|
596
|
-
self, artifact_identifier: ArtifactIdentifier, path, page_size, page_token
|
|
597
|
-
) -> Union[ListFilesForDatasetResponseDto, ListFilesForArtifactVersionsResponseDto]:
|
|
598
|
-
if artifact_identifier.dataset_fqn:
|
|
599
|
-
return self._mlfoundry_artifacts_api.list_files_for_dataset_post(
|
|
600
|
-
list_files_for_dataset_request_dto=ListFilesForDatasetRequestDto(
|
|
601
|
-
dataset_fqn=artifact_identifier.dataset_fqn,
|
|
602
|
-
path=path,
|
|
603
|
-
max_results=page_size,
|
|
604
|
-
page_token=page_token,
|
|
605
|
-
)
|
|
606
|
-
)
|
|
607
|
-
else:
|
|
608
|
-
return self._mlfoundry_artifacts_api.list_files_for_artifact_version_post(
|
|
609
|
-
list_files_for_artifact_version_request_dto=ListFilesForArtifactVersionRequestDto(
|
|
610
|
-
id=str(artifact_identifier.artifact_version_id),
|
|
611
|
-
path=path,
|
|
612
|
-
max_results=page_size,
|
|
613
|
-
page_token=page_token,
|
|
614
|
-
)
|
|
615
|
-
)
|
|
616
|
-
|
|
617
581
|
def list_artifacts(
|
|
618
582
|
self, path=None, page_size=_LIST_FILES_PAGE_SIZE, **kwargs
|
|
619
|
-
) -> Iterator[
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
started = True
|
|
624
|
-
page = self._list_files(
|
|
625
|
-
artifact_identifier=self.artifact_identifier,
|
|
583
|
+
) -> Iterator[FileInfo]:
|
|
584
|
+
if self.artifact_identifier.dataset_id:
|
|
585
|
+
for file_info in client.data_directories.list_files(
|
|
586
|
+
id=str(self.artifact_identifier.dataset_id),
|
|
626
587
|
path=path,
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
588
|
+
limit=page_size,
|
|
589
|
+
):
|
|
590
|
+
yield file_info
|
|
591
|
+
else:
|
|
592
|
+
for file_info in client.artifact_versions.list_files(
|
|
593
|
+
id=str(self.artifact_identifier.artifact_version_id),
|
|
594
|
+
path=path,
|
|
595
|
+
limit=page_size,
|
|
596
|
+
):
|
|
631
597
|
yield file_info
|
|
632
|
-
page_token = page.next_page_token
|
|
633
598
|
|
|
634
599
|
def _is_directory(self, artifact_path):
|
|
635
600
|
# TODO: Ideally server should return a flag to indicate if it is a directory
|
truefoundry/workflow/__init__.py
CHANGED
|
@@ -15,6 +15,7 @@ from flytekit.types.file import FlyteFile
|
|
|
15
15
|
from truefoundry.common.constants import ENV_VARS
|
|
16
16
|
from truefoundry.deploy.v2.lib.patched_models import (
|
|
17
17
|
ContainerTaskConfig,
|
|
18
|
+
PySparkTaskConfig,
|
|
18
19
|
PythonTaskConfig,
|
|
19
20
|
TaskDockerFileBuild,
|
|
20
21
|
TaskPythonBuild,
|
|
@@ -41,6 +42,7 @@ __all__ = [
|
|
|
41
42
|
"ExecutionConfig",
|
|
42
43
|
"FlyteFile",
|
|
43
44
|
"FlyteError",
|
|
45
|
+
"PySparkTaskConfig",
|
|
44
46
|
]
|
|
45
47
|
|
|
46
48
|
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
from typing import Any, Callable, Dict, Optional
|
|
4
|
+
|
|
5
|
+
from flytekit import FlyteContextManager, PythonFunctionTask, lazy_module
|
|
6
|
+
from flytekit.configuration import SerializationSettings
|
|
7
|
+
from flytekit.core.context_manager import ExecutionParameters
|
|
8
|
+
from flytekit.extend import ExecutionState, TaskPlugins
|
|
9
|
+
from flytekit.extend.backend.base_agent import AsyncAgentExecutorMixin
|
|
10
|
+
|
|
11
|
+
from truefoundry.deploy.v2.lib.patched_models import PySparkTaskConfig
|
|
12
|
+
|
|
13
|
+
pyspark_sql = lazy_module("pyspark.sql")
|
|
14
|
+
SparkSession = pyspark_sql.SparkSession
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TfySparkFunctionTask(
|
|
18
|
+
AsyncAgentExecutorMixin, PythonFunctionTask[PySparkTaskConfig]
|
|
19
|
+
):
|
|
20
|
+
"""
|
|
21
|
+
Actual Plugin that transforms the local python code for execution within a spark context
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
_SPARK_TASK_TYPE = "spark"
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
task_config: PySparkTaskConfig,
|
|
29
|
+
task_function: Callable,
|
|
30
|
+
**kwargs,
|
|
31
|
+
):
|
|
32
|
+
self.sess: Optional[SparkSession] = None # type: ignore
|
|
33
|
+
|
|
34
|
+
task_type = self._SPARK_TASK_TYPE
|
|
35
|
+
|
|
36
|
+
super(TfySparkFunctionTask, self).__init__(
|
|
37
|
+
task_config=task_config,
|
|
38
|
+
task_type=task_type,
|
|
39
|
+
task_function=task_function,
|
|
40
|
+
**kwargs,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def get_custom(self, settings: SerializationSettings) -> Dict[str, Any]:
|
|
44
|
+
return {"truefoundry": self._task_config.dict()}
|
|
45
|
+
|
|
46
|
+
def pre_execute(self, user_params: ExecutionParameters) -> ExecutionParameters:
|
|
47
|
+
import pyspark as _pyspark
|
|
48
|
+
|
|
49
|
+
ctx = FlyteContextManager.current_context()
|
|
50
|
+
sess_builder = _pyspark.sql.SparkSession.builder.appName(
|
|
51
|
+
f"FlyteSpark: {user_params.execution_id}"
|
|
52
|
+
)
|
|
53
|
+
if not (
|
|
54
|
+
ctx.execution_state
|
|
55
|
+
and ctx.execution_state.mode == ExecutionState.Mode.TASK_EXECUTION
|
|
56
|
+
):
|
|
57
|
+
# If either of above cases is not true, then we are in local execution of this task
|
|
58
|
+
# Add system spark-conf for local/notebook based execution.
|
|
59
|
+
spark_conf = _pyspark.SparkConf()
|
|
60
|
+
spark_conf.set("spark.driver.bindAddress", "127.0.0.1")
|
|
61
|
+
for k, v in self.task_config.spark_conf.items():
|
|
62
|
+
spark_conf.set(k, v)
|
|
63
|
+
# In local execution, propagate PYTHONPATH to executors too. This makes the spark
|
|
64
|
+
# execution hermetic to the execution environment. For example, it allows running
|
|
65
|
+
# Spark applications using Bazel, without major changes.
|
|
66
|
+
if "PYTHONPATH" in os.environ:
|
|
67
|
+
spark_conf.setExecutorEnv("PYTHONPATH", os.environ["PYTHONPATH"])
|
|
68
|
+
sess_builder = sess_builder.config(conf=spark_conf)
|
|
69
|
+
|
|
70
|
+
self.sess = sess_builder.getOrCreate()
|
|
71
|
+
|
|
72
|
+
if (
|
|
73
|
+
ctx.serialization_settings
|
|
74
|
+
and ctx.serialization_settings.fast_serialization_settings
|
|
75
|
+
and ctx.serialization_settings.fast_serialization_settings.enabled
|
|
76
|
+
and ctx.execution_state
|
|
77
|
+
and ctx.execution_state.mode == ExecutionState.Mode.TASK_EXECUTION
|
|
78
|
+
):
|
|
79
|
+
file_name = "flyte_wf"
|
|
80
|
+
file_format = "zip"
|
|
81
|
+
shutil.make_archive(file_name, file_format, os.getcwd())
|
|
82
|
+
self.sess.sparkContext.addPyFile(f"{file_name}.{file_format}")
|
|
83
|
+
|
|
84
|
+
return user_params.builder().add_attr("SPARK_SESSION", self.sess).build()
|
|
85
|
+
|
|
86
|
+
def execute(self, **kwargs) -> Any:
|
|
87
|
+
return PythonFunctionTask.execute(self, **kwargs)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# Inject the Spark plugin into flytekits dynamic plugin loading system
|
|
91
|
+
TaskPlugins.register_pythontask_plugin(PySparkTaskConfig, TfySparkFunctionTask)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: truefoundry
|
|
3
|
-
Version: 0.11.
|
|
3
|
+
Version: 0.11.3
|
|
4
4
|
Summary: TrueFoundry CLI
|
|
5
5
|
Author-email: TrueFoundry Team <abhishek@truefoundry.com>
|
|
6
6
|
Requires-Python: <3.14,>=3.8.1
|
|
@@ -30,14 +30,17 @@ Requires-Dist: requirements-parser<0.12.0,>=0.11.0
|
|
|
30
30
|
Requires-Dist: rich-click<2.0.0,>=1.2.1
|
|
31
31
|
Requires-Dist: rich<14.0.0,>=13.7.1
|
|
32
32
|
Requires-Dist: tqdm<5.0.0,>=4.0.0
|
|
33
|
-
Requires-Dist: truefoundry-sdk<0.2.0,>=0.1.
|
|
33
|
+
Requires-Dist: truefoundry-sdk<0.2.0,>=0.1.9
|
|
34
34
|
Requires-Dist: typing-extensions>=4.0
|
|
35
35
|
Requires-Dist: urllib3<3,>=1.26.18
|
|
36
36
|
Requires-Dist: yq<4.0.0,>=3.1.0
|
|
37
37
|
Provides-Extra: ai
|
|
38
38
|
Requires-Dist: mcp==1.9.4; (python_version >= '3.10') and extra == 'ai'
|
|
39
|
+
Provides-Extra: spark
|
|
40
|
+
Requires-Dist: flytekit==1.15.3; (python_version >= '3.9' and python_version <= '3.12') and extra == 'spark'
|
|
41
|
+
Requires-Dist: flytekitplugins-spark==1.15.3; (python_version >= '3.9' and python_version <= '3.12') and extra == 'spark'
|
|
39
42
|
Provides-Extra: workflow
|
|
40
|
-
Requires-Dist: flytekit==1.15.3; (python_version >= '3.9' and python_version
|
|
43
|
+
Requires-Dist: flytekit==1.15.3; (python_version >= '3.9' and python_version <= '3.12') and extra == 'workflow'
|
|
41
44
|
Description-Content-Type: text/markdown
|
|
42
45
|
|
|
43
46
|
# TrueFoundry
|