truefoundry 0.11.1rc1__py3-none-any.whl → 0.11.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truefoundry might be problematic. Click here for more details.
- truefoundry/__init__.py +2 -0
- truefoundry/common/constants.py +2 -0
- truefoundry/common/utils.py +9 -3
- truefoundry/deploy/__init__.py +1 -0
- truefoundry/deploy/_autogen/models.py +92 -36
- truefoundry/deploy/builder/__init__.py +2 -0
- truefoundry/deploy/builder/builders/__init__.py +2 -0
- truefoundry/deploy/builder/builders/tfy_python_buildpack/dockerfile_template.py +6 -83
- truefoundry/deploy/builder/builders/tfy_spark_buildpack/dockerfile_template.py +20 -72
- truefoundry/deploy/builder/builders/tfy_spark_buildpack/tfy_execute_notebook.py +46 -29
- truefoundry/deploy/builder/builders/tfy_task_pyspark_buildpack/__init__.py +52 -0
- truefoundry/deploy/builder/builders/tfy_task_pyspark_buildpack/dockerfile_template.py +121 -0
- truefoundry/deploy/builder/utils.py +83 -0
- truefoundry/deploy/cli/commands/trigger_command.py +21 -2
- truefoundry/deploy/lib/clients/servicefoundry_client.py +4 -0
- truefoundry/deploy/lib/dao/application.py +2 -0
- truefoundry/deploy/v2/lib/patched_models.py +39 -0
- truefoundry/ml/artifact/truefoundry_artifact_repo.py +15 -50
- truefoundry/workflow/__init__.py +2 -0
- truefoundry/workflow/spark_task.py +91 -0
- {truefoundry-0.11.1rc1.dist-info → truefoundry-0.11.3.dist-info}/METADATA +6 -3
- {truefoundry-0.11.1rc1.dist-info → truefoundry-0.11.3.dist-info}/RECORD +24 -21
- {truefoundry-0.11.1rc1.dist-info → truefoundry-0.11.3.dist-info}/WHEEL +0 -0
- {truefoundry-0.11.1rc1.dist-info → truefoundry-0.11.3.dist-info}/entry_points.txt +0 -0
truefoundry/__init__.py
CHANGED
|
@@ -12,6 +12,7 @@ from truefoundry_sdk import (
|
|
|
12
12
|
ToolSchema,
|
|
13
13
|
UserMessage,
|
|
14
14
|
)
|
|
15
|
+
from truefoundry_sdk.client import TrueFoundry
|
|
15
16
|
|
|
16
17
|
from truefoundry._client import client
|
|
17
18
|
from truefoundry.common.warnings import (
|
|
@@ -39,6 +40,7 @@ __all__ = [
|
|
|
39
40
|
"render_prompt",
|
|
40
41
|
"suppress_truefoundry_deprecation_warnings",
|
|
41
42
|
"SystemMessage",
|
|
43
|
+
"TrueFoundry",
|
|
42
44
|
"ToolCall",
|
|
43
45
|
"ToolMessage",
|
|
44
46
|
"ToolSchema",
|
truefoundry/common/constants.py
CHANGED
|
@@ -68,6 +68,8 @@ class TrueFoundrySdkEnv(BaseSettings):
|
|
|
68
68
|
# TODO(gw): Use another image with more linient rate limits
|
|
69
69
|
TFY_SPARK_BUILD_SPARK_IMAGE_REPO: str = "public.ecr.aws/bitnami/spark"
|
|
70
70
|
|
|
71
|
+
TFY_TASK_PYSPARK_BUILD_SPARK_IMAGE_REPO: str = "public.ecr.aws/bitnami/spark"
|
|
72
|
+
|
|
71
73
|
# For local development, this enables futher configuration via _TFYServersConfig
|
|
72
74
|
TFY_CLI_LOCAL_DEV_MODE: bool = False
|
|
73
75
|
|
truefoundry/common/utils.py
CHANGED
|
@@ -16,6 +16,7 @@ from truefoundry.common.constants import (
|
|
|
16
16
|
API_SERVER_RELATIVE_PATH,
|
|
17
17
|
ENV_VARS,
|
|
18
18
|
MLFOUNDRY_SERVER_RELATIVE_PATH,
|
|
19
|
+
TFY_API_KEY_ENV_KEY,
|
|
19
20
|
TFY_DEBUG_ENV_KEY,
|
|
20
21
|
TFY_HOST_ENV_KEY,
|
|
21
22
|
TFY_INTERNAL_ENV_KEY,
|
|
@@ -113,9 +114,14 @@ def validate_tfy_host(tfy_host: str) -> None:
|
|
|
113
114
|
def resolve_tfy_host(tfy_host: Optional[str] = None) -> str:
|
|
114
115
|
tfy_host = tfy_host or ENV_VARS.TFY_HOST
|
|
115
116
|
if not tfy_host:
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
117
|
+
if ENV_VARS.TFY_API_KEY:
|
|
118
|
+
raise ValueError(
|
|
119
|
+
f"TFY_HOST` env must be set since `{TFY_API_KEY_ENV_KEY}` env is set. Either set `{TFY_HOST_ENV_KEY}` or unset `{TFY_API_KEY_ENV_KEY}` and login"
|
|
120
|
+
)
|
|
121
|
+
else:
|
|
122
|
+
raise ValueError(
|
|
123
|
+
f"Either `host` should be provided using `--host <value>`, or `{TFY_HOST_ENV_KEY}` env must be set"
|
|
124
|
+
)
|
|
119
125
|
tfy_host = tfy_host.strip("/")
|
|
120
126
|
validate_tfy_host(tfy_host)
|
|
121
127
|
return tfy_host
|
truefoundry/deploy/__init__.py
CHANGED
|
@@ -120,6 +120,7 @@ from truefoundry.deploy.v2.lib.patched_models import (
|
|
|
120
120
|
SparkJobPythonEntrypoint,
|
|
121
121
|
SparkJobPythonNotebookEntrypoint,
|
|
122
122
|
SparkJobScalaEntrypoint,
|
|
123
|
+
SparkJobScalaNotebookEntrypoint,
|
|
123
124
|
SQSInputConfig,
|
|
124
125
|
SQSOutputConfig,
|
|
125
126
|
SQSQueueMetricConfig,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# generated by datamodel-codegen:
|
|
2
2
|
# filename: application.json
|
|
3
|
-
# timestamp: 2025-
|
|
3
|
+
# timestamp: 2025-07-28T14:11:36+00:00
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
@@ -103,7 +103,7 @@ class AsyncProcessorSidecar(BaseModel):
|
|
|
103
103
|
|
|
104
104
|
|
|
105
105
|
class Autoshutdown(BaseModel):
|
|
106
|
-
wait_time: conint(ge=0) = Field(
|
|
106
|
+
wait_time: conint(ge=0, le=604800) = Field(
|
|
107
107
|
900,
|
|
108
108
|
description="The period to wait after the last received request before scaling the replicas to 0. This value should be high enough to allow for the replicas of the service to come up to avoid premature scaling down.",
|
|
109
109
|
)
|
|
@@ -957,6 +957,14 @@ class SparkJobScalaEntrypoint(BaseModel):
|
|
|
957
957
|
)
|
|
958
958
|
|
|
959
959
|
|
|
960
|
+
class SparkJobScalaNotebookEntrypoint(BaseModel):
|
|
961
|
+
type: Literal["scala-notebook"] = Field(..., description="")
|
|
962
|
+
main_application_file: str = Field(
|
|
963
|
+
...,
|
|
964
|
+
description="The main application file to be executed by the spark job. Relative path in case of git repository.",
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
|
|
960
968
|
class StaticVolumeConfig(BaseModel):
|
|
961
969
|
type: Literal["static"] = Field(..., description="Volume Type for the volume.")
|
|
962
970
|
persistent_volume_name: str = Field(
|
|
@@ -989,6 +997,34 @@ class TaskDockerFileBuild(BaseModel):
|
|
|
989
997
|
build_args: Optional[Dict[str, str]] = Field(None, description="")
|
|
990
998
|
|
|
991
999
|
|
|
1000
|
+
class TaskPySparkBuild(BaseModel):
|
|
1001
|
+
"""
|
|
1002
|
+
Describes the configuration for the PySpark build for a task
|
|
1003
|
+
"""
|
|
1004
|
+
|
|
1005
|
+
type: Literal["task-pyspark-build"] = Field(..., description="")
|
|
1006
|
+
spark_version: str = Field(
|
|
1007
|
+
"3.5.2",
|
|
1008
|
+
description="Spark version should match the spark version installed in the image.",
|
|
1009
|
+
)
|
|
1010
|
+
docker_registry: Optional[str] = Field(
|
|
1011
|
+
None,
|
|
1012
|
+
description="FQN of the container registry. If you can't find your registry here,\nadd it through the [Integrations](/integrations?tab=docker-registry) page",
|
|
1013
|
+
)
|
|
1014
|
+
requirements_path: Optional[str] = Field(
|
|
1015
|
+
None,
|
|
1016
|
+
description="Path to `requirements.txt` relative to\n`Path to build context`",
|
|
1017
|
+
)
|
|
1018
|
+
pip_packages: Optional[List[str]] = Field(
|
|
1019
|
+
None,
|
|
1020
|
+
description='Define pip package requirements.\nIn Python/YAML E.g. ["fastapi>=0.90,<1.0", "uvicorn"]',
|
|
1021
|
+
)
|
|
1022
|
+
apt_packages: Optional[List[str]] = Field(
|
|
1023
|
+
None,
|
|
1024
|
+
description='Debian packages to install via `apt get`.\nIn Python/YAML E.g. ["git", "ffmpeg", "htop"]',
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
|
|
992
1028
|
class TaskPythonBuild(BaseModel):
|
|
993
1029
|
"""
|
|
994
1030
|
Describes the configuration for the python build for a task
|
|
@@ -1200,7 +1236,7 @@ class ContainerTaskConfig(BaseModel):
|
|
|
1200
1236
|
description="Configure environment variables to be injected in the task either as plain text or secrets. [Docs](https://docs.truefoundry.com/docs/env-variables)",
|
|
1201
1237
|
)
|
|
1202
1238
|
resources: Optional[Resources] = None
|
|
1203
|
-
mounts: Optional[List[
|
|
1239
|
+
mounts: Optional[List[VolumeMount]] = Field(
|
|
1204
1240
|
None, description="Configure data to be mounted to Workflow pod(s) as a volume."
|
|
1205
1241
|
)
|
|
1206
1242
|
service_account: Optional[str] = Field(None, description="")
|
|
@@ -1461,6 +1497,7 @@ class SparkJob(BaseModel):
|
|
|
1461
1497
|
SparkJobScalaEntrypoint,
|
|
1462
1498
|
SparkJobJavaEntrypoint,
|
|
1463
1499
|
SparkJobPythonNotebookEntrypoint,
|
|
1500
|
+
SparkJobScalaNotebookEntrypoint,
|
|
1464
1501
|
] = Field(..., description="")
|
|
1465
1502
|
driver_config: SparkDriverConfig
|
|
1466
1503
|
executor_config: SparkExecutorConfig
|
|
@@ -1567,15 +1604,6 @@ class FlyteLaunchPlan(BaseModel):
|
|
|
1567
1604
|
closure: Any
|
|
1568
1605
|
|
|
1569
1606
|
|
|
1570
|
-
class FlyteTaskCustom(BaseModel):
|
|
1571
|
-
truefoundry: Union[PythonTaskConfig, ContainerTaskConfig]
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
class FlyteTaskTemplate(BaseModel):
|
|
1575
|
-
id: FlyteTaskID
|
|
1576
|
-
custom: FlyteTaskCustom
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
1607
|
class JobAlert(BaseModel):
|
|
1580
1608
|
"""
|
|
1581
1609
|
Describes the configuration for the job alerts
|
|
@@ -1594,6 +1622,25 @@ class JobAlert(BaseModel):
|
|
|
1594
1622
|
on_failure: bool = Field(True, description="Send an alert when the job fails")
|
|
1595
1623
|
|
|
1596
1624
|
|
|
1625
|
+
class PySparkTaskConfig(BaseModel):
|
|
1626
|
+
type: Literal["pyspark-task-config"] = Field(..., description="")
|
|
1627
|
+
image: TaskPySparkBuild
|
|
1628
|
+
driver_config: SparkDriverConfig
|
|
1629
|
+
executor_config: SparkExecutorConfig
|
|
1630
|
+
spark_conf: Optional[Dict[str, Any]] = Field(
|
|
1631
|
+
None,
|
|
1632
|
+
description="Extra configuration properties to be passed to the spark job. [Docs](https://spark.apache.org/docs/latest/configuration.html)",
|
|
1633
|
+
)
|
|
1634
|
+
env: Optional[Dict[str, str]] = Field(
|
|
1635
|
+
None,
|
|
1636
|
+
description="Configure environment variables to be injected in the task either as plain text or secrets. [Docs](https://docs.truefoundry.com/docs/env-variables)",
|
|
1637
|
+
)
|
|
1638
|
+
mounts: Optional[List[Union[SecretMount, StringDataMount, VolumeMount]]] = Field(
|
|
1639
|
+
None, description="Configure data to be mounted to Workflow pod(s) as a volume."
|
|
1640
|
+
)
|
|
1641
|
+
service_account: Optional[str] = Field(None, description="")
|
|
1642
|
+
|
|
1643
|
+
|
|
1597
1644
|
class Service(BaseService):
|
|
1598
1645
|
"""
|
|
1599
1646
|
Describes the configuration for the service
|
|
@@ -1630,9 +1677,13 @@ class AsyncService(BaseService):
|
|
|
1630
1677
|
sidecar: Optional[AsyncProcessorSidecar] = None
|
|
1631
1678
|
|
|
1632
1679
|
|
|
1633
|
-
class
|
|
1634
|
-
|
|
1635
|
-
|
|
1680
|
+
class FlyteTaskCustom(BaseModel):
|
|
1681
|
+
truefoundry: Union[PythonTaskConfig, ContainerTaskConfig, PySparkTaskConfig]
|
|
1682
|
+
|
|
1683
|
+
|
|
1684
|
+
class FlyteTaskTemplate(BaseModel):
|
|
1685
|
+
id: FlyteTaskID
|
|
1686
|
+
custom: FlyteTaskCustom
|
|
1636
1687
|
|
|
1637
1688
|
|
|
1638
1689
|
class Job(BaseModel):
|
|
@@ -1688,27 +1739,6 @@ class Job(BaseModel):
|
|
|
1688
1739
|
)
|
|
1689
1740
|
|
|
1690
1741
|
|
|
1691
|
-
class Workflow(BaseModel):
|
|
1692
|
-
"""
|
|
1693
|
-
Describes the configuration for the worflow
|
|
1694
|
-
"""
|
|
1695
|
-
|
|
1696
|
-
type: Literal["workflow"] = Field(..., description="")
|
|
1697
|
-
name: constr(regex=r"^[a-z](?:[a-z0-9]|-(?!-)){1,30}[a-z0-9]$") = Field(
|
|
1698
|
-
..., description="Name of the workflow"
|
|
1699
|
-
)
|
|
1700
|
-
source: Union[LocalSource, RemoteSource] = Field(
|
|
1701
|
-
..., description="Source Code for the workflow, either local or remote"
|
|
1702
|
-
)
|
|
1703
|
-
workflow_file_path: str = Field(
|
|
1704
|
-
..., description="Path to the workflow file relative to the project root path"
|
|
1705
|
-
)
|
|
1706
|
-
flyte_entities: Optional[List[Union[FlyteTask, FlyteWorkflow, FlyteLaunchPlan]]] = (
|
|
1707
|
-
Field(None, description="")
|
|
1708
|
-
)
|
|
1709
|
-
alerts: Optional[List[WorkflowAlert]] = Field(None, description="")
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
1742
|
class ApplicationSet(BaseModel):
|
|
1713
1743
|
"""
|
|
1714
1744
|
Describes the configuration for the application set
|
|
@@ -1735,6 +1765,32 @@ class ApplicationSet(BaseModel):
|
|
|
1735
1765
|
)
|
|
1736
1766
|
|
|
1737
1767
|
|
|
1768
|
+
class FlyteTask(BaseModel):
|
|
1769
|
+
template: FlyteTaskTemplate
|
|
1770
|
+
description: Optional[Any] = None
|
|
1771
|
+
|
|
1772
|
+
|
|
1773
|
+
class Workflow(BaseModel):
|
|
1774
|
+
"""
|
|
1775
|
+
Describes the configuration for the worflow
|
|
1776
|
+
"""
|
|
1777
|
+
|
|
1778
|
+
type: Literal["workflow"] = Field(..., description="")
|
|
1779
|
+
name: constr(regex=r"^[a-z](?:[a-z0-9]|-(?!-)){1,30}[a-z0-9]$") = Field(
|
|
1780
|
+
..., description="Name of the workflow"
|
|
1781
|
+
)
|
|
1782
|
+
source: Union[LocalSource, RemoteSource] = Field(
|
|
1783
|
+
..., description="Source Code for the workflow, either local or remote"
|
|
1784
|
+
)
|
|
1785
|
+
workflow_file_path: str = Field(
|
|
1786
|
+
..., description="Path to the workflow file relative to the project root path"
|
|
1787
|
+
)
|
|
1788
|
+
flyte_entities: Optional[List[Union[FlyteTask, FlyteWorkflow, FlyteLaunchPlan]]] = (
|
|
1789
|
+
Field(None, description="")
|
|
1790
|
+
)
|
|
1791
|
+
alerts: Optional[List[WorkflowAlert]] = Field(None, description="")
|
|
1792
|
+
|
|
1793
|
+
|
|
1738
1794
|
class Application(BaseModel):
|
|
1739
1795
|
__root__: Union[
|
|
1740
1796
|
Service,
|
|
@@ -5,6 +5,7 @@ from truefoundry.deploy._autogen.models import (
|
|
|
5
5
|
PythonBuild,
|
|
6
6
|
SparkBuild,
|
|
7
7
|
TaskDockerFileBuild,
|
|
8
|
+
TaskPySparkBuild,
|
|
8
9
|
TaskPythonBuild,
|
|
9
10
|
)
|
|
10
11
|
from truefoundry.deploy.builder.builders import get_builder
|
|
@@ -22,6 +23,7 @@ class _BuildConfig(BaseModel):
|
|
|
22
23
|
TaskPythonBuild,
|
|
23
24
|
TaskDockerFileBuild,
|
|
24
25
|
SparkBuild,
|
|
26
|
+
TaskPySparkBuild,
|
|
25
27
|
] = Field(discriminator="type")
|
|
26
28
|
|
|
27
29
|
|
|
@@ -5,6 +5,7 @@ from truefoundry.deploy.builder.builders import (
|
|
|
5
5
|
tfy_notebook_buildpack,
|
|
6
6
|
tfy_python_buildpack,
|
|
7
7
|
tfy_spark_buildpack,
|
|
8
|
+
tfy_task_pyspark_buildpack,
|
|
8
9
|
)
|
|
9
10
|
|
|
10
11
|
BUILD_REGISTRY: Dict[str, Callable] = {
|
|
@@ -12,6 +13,7 @@ BUILD_REGISTRY: Dict[str, Callable] = {
|
|
|
12
13
|
"tfy-python-buildpack": tfy_python_buildpack.build,
|
|
13
14
|
"tfy-notebook-buildpack": tfy_notebook_buildpack.build,
|
|
14
15
|
"tfy-spark-buildpack": tfy_spark_buildpack.build,
|
|
16
|
+
"task-pyspark-build": tfy_task_pyspark_buildpack.build,
|
|
15
17
|
}
|
|
16
18
|
|
|
17
19
|
__all__ = ["get_builder"]
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import
|
|
2
|
-
from typing import Dict, List, Optional
|
|
1
|
+
from typing import Dict
|
|
3
2
|
|
|
4
3
|
from mako.template import Template
|
|
5
4
|
|
|
@@ -7,9 +6,12 @@ from truefoundry.common.constants import ENV_VARS, PythonPackageManager
|
|
|
7
6
|
from truefoundry.deploy._autogen.models import PythonBuild
|
|
8
7
|
from truefoundry.deploy.builder.constants import (
|
|
9
8
|
PIP_CONF_BUILDKIT_SECRET_MOUNT,
|
|
10
|
-
PIP_CONF_SECRET_MOUNT_AS_ENV,
|
|
11
9
|
UV_CONF_BUILDKIT_SECRET_MOUNT,
|
|
12
|
-
|
|
10
|
+
)
|
|
11
|
+
from truefoundry.deploy.builder.utils import (
|
|
12
|
+
generate_apt_install_command,
|
|
13
|
+
generate_pip_install_command,
|
|
14
|
+
generate_uv_pip_install_command,
|
|
13
15
|
)
|
|
14
16
|
from truefoundry.deploy.v2.lib.patched_models import (
|
|
15
17
|
CUDAVersion,
|
|
@@ -82,85 +84,6 @@ CUDA_VERSION_TO_IMAGE_TAG: Dict[str, str] = {
|
|
|
82
84
|
}
|
|
83
85
|
|
|
84
86
|
|
|
85
|
-
def generate_apt_install_command(apt_packages: Optional[List[str]]) -> Optional[str]:
|
|
86
|
-
packages_list = None
|
|
87
|
-
if apt_packages:
|
|
88
|
-
packages_list = " ".join(p.strip() for p in apt_packages if p.strip())
|
|
89
|
-
if not packages_list:
|
|
90
|
-
return None
|
|
91
|
-
apt_update_command = "apt update"
|
|
92
|
-
apt_install_command = f"DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends {packages_list}"
|
|
93
|
-
clear_apt_lists_command = "rm -rf /var/lib/apt/lists/*"
|
|
94
|
-
return " && ".join(
|
|
95
|
-
[apt_update_command, apt_install_command, clear_apt_lists_command]
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def generate_pip_install_command(
|
|
100
|
-
requirements_path: Optional[str],
|
|
101
|
-
pip_packages: Optional[List[str]],
|
|
102
|
-
mount_pip_conf_secret: bool = False,
|
|
103
|
-
) -> Optional[str]:
|
|
104
|
-
upgrade_pip_command = "python -m pip install -U pip setuptools wheel"
|
|
105
|
-
envs = []
|
|
106
|
-
if mount_pip_conf_secret:
|
|
107
|
-
envs.append(PIP_CONF_SECRET_MOUNT_AS_ENV)
|
|
108
|
-
|
|
109
|
-
command = ["python", "-m", "pip", "install", "--use-pep517", "--no-cache-dir"]
|
|
110
|
-
args = []
|
|
111
|
-
if requirements_path:
|
|
112
|
-
args.append("-r")
|
|
113
|
-
args.append(requirements_path)
|
|
114
|
-
|
|
115
|
-
if pip_packages:
|
|
116
|
-
args.extend(pip_packages)
|
|
117
|
-
|
|
118
|
-
if not args:
|
|
119
|
-
return None
|
|
120
|
-
|
|
121
|
-
final_pip_install_command = shlex.join(envs + command + args)
|
|
122
|
-
final_docker_run_command = " && ".join(
|
|
123
|
-
[upgrade_pip_command, final_pip_install_command]
|
|
124
|
-
)
|
|
125
|
-
return final_docker_run_command
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def generate_uv_pip_install_command(
|
|
129
|
-
requirements_path: Optional[str],
|
|
130
|
-
pip_packages: Optional[List[str]],
|
|
131
|
-
mount_uv_conf_secret: bool = False,
|
|
132
|
-
) -> Optional[str]:
|
|
133
|
-
upgrade_pip_command = "python -m pip install -U pip setuptools wheel"
|
|
134
|
-
uv_mount = f"--mount=from={ENV_VARS.TFY_PYTHON_BUILD_UV_IMAGE_URI},source=/uv,target=/usr/local/bin/uv"
|
|
135
|
-
envs = [
|
|
136
|
-
"UV_LINK_MODE=copy",
|
|
137
|
-
"UV_PYTHON_DOWNLOADS=never",
|
|
138
|
-
"UV_INDEX_STRATEGY=unsafe-best-match",
|
|
139
|
-
]
|
|
140
|
-
if mount_uv_conf_secret:
|
|
141
|
-
envs.append(UV_CONF_SECRET_MOUNT_AS_ENV)
|
|
142
|
-
|
|
143
|
-
command = ["uv", "pip", "install", "--no-cache-dir"]
|
|
144
|
-
|
|
145
|
-
args = []
|
|
146
|
-
|
|
147
|
-
if requirements_path:
|
|
148
|
-
args.append("-r")
|
|
149
|
-
args.append(requirements_path)
|
|
150
|
-
|
|
151
|
-
if pip_packages:
|
|
152
|
-
args.extend(pip_packages)
|
|
153
|
-
|
|
154
|
-
if not args:
|
|
155
|
-
return None
|
|
156
|
-
|
|
157
|
-
uv_pip_install_command = shlex.join(envs + command + args)
|
|
158
|
-
shell_commands = " && ".join([upgrade_pip_command, uv_pip_install_command])
|
|
159
|
-
final_docker_run_command = " ".join([uv_mount, shell_commands])
|
|
160
|
-
|
|
161
|
-
return final_docker_run_command
|
|
162
|
-
|
|
163
|
-
|
|
164
87
|
def generate_dockerfile_content(
|
|
165
88
|
build_configuration: PythonBuild,
|
|
166
89
|
package_manager: str = ENV_VARS.TFY_PYTHON_BUILD_PACKAGE_MANAGER,
|
|
@@ -1,15 +1,14 @@
|
|
|
1
|
-
import shlex
|
|
2
|
-
from typing import List, Optional
|
|
3
|
-
|
|
4
1
|
from mako.template import Template
|
|
5
2
|
|
|
6
3
|
from truefoundry.common.constants import ENV_VARS, PythonPackageManager
|
|
7
4
|
from truefoundry.deploy._autogen.models import SparkBuild
|
|
8
5
|
from truefoundry.deploy.builder.constants import (
|
|
9
6
|
PIP_CONF_BUILDKIT_SECRET_MOUNT,
|
|
10
|
-
PIP_CONF_SECRET_MOUNT_AS_ENV,
|
|
11
7
|
UV_CONF_BUILDKIT_SECRET_MOUNT,
|
|
12
|
-
|
|
8
|
+
)
|
|
9
|
+
from truefoundry.deploy.builder.utils import (
|
|
10
|
+
generate_pip_install_command,
|
|
11
|
+
generate_uv_pip_install_command,
|
|
13
12
|
)
|
|
14
13
|
from truefoundry.deploy.v2.lib.patched_models import (
|
|
15
14
|
_resolve_requirements_path,
|
|
@@ -26,8 +25,7 @@ RUN ${package_manager_config_secret_mount} ${python_packages_install_command}
|
|
|
26
25
|
% endif
|
|
27
26
|
ENV PYTHONDONTWRITEBYTECODE=1
|
|
28
27
|
ENV IPYTHONDIR=/tmp/.ipython
|
|
29
|
-
|
|
30
|
-
USER spark
|
|
28
|
+
USER 1001
|
|
31
29
|
COPY . /app
|
|
32
30
|
"""
|
|
33
31
|
|
|
@@ -35,6 +33,20 @@ _POST_USER_TEMPLATE = """
|
|
|
35
33
|
COPY tfy_execute_notebook.py /app/tfy_execute_notebook.py
|
|
36
34
|
"""
|
|
37
35
|
|
|
36
|
+
_ALMOND_INSTALL_TEMPLATE = """
|
|
37
|
+
ENV COURSIER_CACHE=/opt/coursier-cache
|
|
38
|
+
RUN install_packages curl
|
|
39
|
+
RUN curl -Lo coursier https://git.io/coursier-cli && \
|
|
40
|
+
chmod +x coursier && \
|
|
41
|
+
./coursier launch almond:0.14.1 -- --install --global && \
|
|
42
|
+
chown -R 1001:0 /usr/local/share/jupyter && \
|
|
43
|
+
chown -R 1001:0 /opt/coursier-cache && \
|
|
44
|
+
rm -f coursier
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
# Docker image size with almond - 1.26GB
|
|
48
|
+
# Docker image size without almond - 1.1GB
|
|
49
|
+
# Not much harm in packaging almond by default
|
|
38
50
|
DOCKERFILE_TEMPLATE = Template(
|
|
39
51
|
"""
|
|
40
52
|
FROM ${spark_image_repo}:${spark_version}
|
|
@@ -43,6 +55,7 @@ RUN apt update && \
|
|
|
43
55
|
DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends git && \
|
|
44
56
|
rm -rf /var/lib/apt/lists/*
|
|
45
57
|
"""
|
|
58
|
+
+ _ALMOND_INSTALL_TEMPLATE
|
|
46
59
|
+ _POST_PYTHON_INSTALL_TEMPLATE
|
|
47
60
|
+ _POST_USER_TEMPLATE
|
|
48
61
|
)
|
|
@@ -55,71 +68,6 @@ ADDITIONAL_PIP_PACKAGES = [
|
|
|
55
68
|
]
|
|
56
69
|
|
|
57
70
|
|
|
58
|
-
def generate_pip_install_command(
|
|
59
|
-
requirements_path: Optional[str],
|
|
60
|
-
pip_packages: Optional[List[str]],
|
|
61
|
-
mount_pip_conf_secret: bool = False,
|
|
62
|
-
) -> Optional[str]:
|
|
63
|
-
upgrade_pip_command = "python3 -m pip install -U pip setuptools wheel"
|
|
64
|
-
envs = []
|
|
65
|
-
if mount_pip_conf_secret:
|
|
66
|
-
envs.append(PIP_CONF_SECRET_MOUNT_AS_ENV)
|
|
67
|
-
|
|
68
|
-
command = ["python3", "-m", "pip", "install", "--use-pep517", "--no-cache-dir"]
|
|
69
|
-
args = []
|
|
70
|
-
if requirements_path:
|
|
71
|
-
args.append("-r")
|
|
72
|
-
args.append(requirements_path)
|
|
73
|
-
|
|
74
|
-
if pip_packages:
|
|
75
|
-
args.extend(pip_packages)
|
|
76
|
-
|
|
77
|
-
if not args:
|
|
78
|
-
return None
|
|
79
|
-
|
|
80
|
-
final_pip_install_command = shlex.join(envs + command + args)
|
|
81
|
-
final_docker_run_command = " && ".join(
|
|
82
|
-
[upgrade_pip_command, final_pip_install_command]
|
|
83
|
-
)
|
|
84
|
-
return final_docker_run_command
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def generate_uv_pip_install_command(
|
|
88
|
-
requirements_path: Optional[str],
|
|
89
|
-
pip_packages: Optional[List[str]],
|
|
90
|
-
mount_uv_conf_secret: bool = False,
|
|
91
|
-
) -> Optional[str]:
|
|
92
|
-
upgrade_pip_command = "python3 -m pip install -U pip setuptools wheel"
|
|
93
|
-
uv_mount = f"--mount=from={ENV_VARS.TFY_PYTHON_BUILD_UV_IMAGE_URI},source=/uv,target=/usr/local/bin/uv"
|
|
94
|
-
envs = [
|
|
95
|
-
"UV_LINK_MODE=copy",
|
|
96
|
-
"UV_PYTHON_DOWNLOADS=never",
|
|
97
|
-
"UV_INDEX_STRATEGY=unsafe-best-match",
|
|
98
|
-
]
|
|
99
|
-
if mount_uv_conf_secret:
|
|
100
|
-
envs.append(UV_CONF_SECRET_MOUNT_AS_ENV)
|
|
101
|
-
|
|
102
|
-
command = ["uv", "pip", "install", "--no-cache-dir"]
|
|
103
|
-
|
|
104
|
-
args = []
|
|
105
|
-
|
|
106
|
-
if requirements_path:
|
|
107
|
-
args.append("-r")
|
|
108
|
-
args.append(requirements_path)
|
|
109
|
-
|
|
110
|
-
if pip_packages:
|
|
111
|
-
args.extend(pip_packages)
|
|
112
|
-
|
|
113
|
-
if not args:
|
|
114
|
-
return None
|
|
115
|
-
|
|
116
|
-
uv_pip_install_command = shlex.join(envs + command + args)
|
|
117
|
-
shell_commands = " && ".join([upgrade_pip_command, uv_pip_install_command])
|
|
118
|
-
final_docker_run_command = " ".join([uv_mount, shell_commands])
|
|
119
|
-
|
|
120
|
-
return final_docker_run_command
|
|
121
|
-
|
|
122
|
-
|
|
123
71
|
def generate_dockerfile_content(
|
|
124
72
|
build_configuration: SparkBuild,
|
|
125
73
|
package_manager: str = ENV_VARS.TFY_PYTHON_BUILD_PACKAGE_MANAGER,
|
|
@@ -45,16 +45,9 @@ def upload_file_to_s3(file_path, bucket_name, s3_key):
|
|
|
45
45
|
# Use s3proxy for pushing data to s3
|
|
46
46
|
# The JWT token is already available in the pod
|
|
47
47
|
aws_access_key_id = os.environ.get("SPARK_APPLICATION_EVENT_LOG_JWT_TOKEN")
|
|
48
|
-
aws_secret_access_key = "
|
|
48
|
+
aws_secret_access_key = os.environ.get("TFY_NOTEBOOK_OUTPUT_S3_SECRET_KEY")
|
|
49
49
|
s3_endpoint_url = os.environ.get("S3_PROXY_URL")
|
|
50
50
|
|
|
51
|
-
if not aws_access_key_id:
|
|
52
|
-
raise ValueError(
|
|
53
|
-
"SPARK_APPLICATION_EVENT_LOG_JWT_TOKEN environment variable is not set"
|
|
54
|
-
)
|
|
55
|
-
if not s3_endpoint_url:
|
|
56
|
-
raise ValueError("S3_PROXY_URL environment variable is not set")
|
|
57
|
-
|
|
58
51
|
# Needed for the issue https://github.com/gaul/s3proxy/issues/765
|
|
59
52
|
s3_config = Config(
|
|
60
53
|
request_checksum_calculation="when_required",
|
|
@@ -93,12 +86,24 @@ def execute_notebook(notebook_path, output_path="/tmp/output.ipynb", parameters=
|
|
|
93
86
|
parameters = {}
|
|
94
87
|
|
|
95
88
|
print(f"Starting execution of notebook: {notebook_path}")
|
|
89
|
+
notebook_type = os.environ.get("TFY_NOTEBOOK_TYPE", "").lower()
|
|
90
|
+
kernel_mapping = {"python": "python3", "scala": "scala"}
|
|
91
|
+
|
|
92
|
+
if notebook_type not in kernel_mapping:
|
|
93
|
+
supported_types = ", ".join(kernel_mapping.keys())
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"Unsupported notebook type: '{notebook_type}'. "
|
|
96
|
+
f"Supported types: [{supported_types}]"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
kernel_name = kernel_mapping[notebook_type]
|
|
100
|
+
|
|
96
101
|
pm.execute_notebook(
|
|
97
102
|
input_path=notebook_path,
|
|
98
103
|
output_path=output_path,
|
|
99
104
|
parameters=parameters,
|
|
100
105
|
# TODO(gw): Replace with kernel name for venv
|
|
101
|
-
kernel_name=
|
|
106
|
+
kernel_name=kernel_name,
|
|
102
107
|
# Log cell by cell execution output
|
|
103
108
|
# TODO(gw): Output logs to a file instead, so that they aren't merged with the container's logs
|
|
104
109
|
log_output=True,
|
|
@@ -106,7 +111,23 @@ def execute_notebook(notebook_path, output_path="/tmp/output.ipynb", parameters=
|
|
|
106
111
|
stderr_file=sys.stderr,
|
|
107
112
|
)
|
|
108
113
|
print(f"Successfully executed notebook: {notebook_path}")
|
|
109
|
-
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def validate_env_vars():
|
|
117
|
+
keys = [
|
|
118
|
+
"TFY_NOTEBOOK_OUTPUT_S3_KEY",
|
|
119
|
+
"TFY_NOTEBOOK_OUTPUT_S3_BUCKET",
|
|
120
|
+
"SPARK_APPLICATION_EVENT_LOG_JWT_TOKEN",
|
|
121
|
+
"TFY_NOTEBOOK_OUTPUT_S3_SECRET_KEY",
|
|
122
|
+
"TFY_NOTEBOOK_TYPE",
|
|
123
|
+
]
|
|
124
|
+
unset_keys = [key for key in keys if not os.environ.get(key)]
|
|
125
|
+
if unset_keys:
|
|
126
|
+
raise ValueError(
|
|
127
|
+
f"Environment variables {unset_keys} are not set."
|
|
128
|
+
f"Contact you tenant-admin to configure storage bucket on the control plane "
|
|
129
|
+
f"to enable uploading spark notebook outputs."
|
|
130
|
+
)
|
|
110
131
|
|
|
111
132
|
|
|
112
133
|
if __name__ == "__main__":
|
|
@@ -116,35 +137,31 @@ if __name__ == "__main__":
|
|
|
116
137
|
parser.add_argument("notebook_path", help="Path to the notebook file to execute")
|
|
117
138
|
args = parser.parse_args()
|
|
118
139
|
|
|
140
|
+
# Since failure to upload is considered a job failure, fail the job even before it run if uploads cannot happen
|
|
141
|
+
validate_env_vars()
|
|
142
|
+
|
|
119
143
|
output_notebook_path = "/tmp/output.ipynb"
|
|
120
144
|
|
|
121
145
|
# This would be the same as the default bucket used by servicefoundry-server
|
|
122
146
|
s3_bucket = os.environ.get("TFY_NOTEBOOK_OUTPUT_S3_BUCKET")
|
|
123
|
-
# This would be something like sparkjob-events/<tenant-id
|
|
124
|
-
|
|
147
|
+
# This would be something like sparkjob-events/<tenant-id>/output-notebooks/<application-id>/<jobrun-name>/output.html
|
|
148
|
+
s3_key = os.environ.get("TFY_NOTEBOOK_OUTPUT_S3_KEY")
|
|
125
149
|
|
|
126
150
|
try:
|
|
127
|
-
|
|
128
|
-
args.notebook_path, output_path=output_notebook_path
|
|
129
|
-
)
|
|
151
|
+
execute_notebook(args.notebook_path, output_path=output_notebook_path)
|
|
130
152
|
|
|
131
153
|
# The following may also be modeled as an entrypoint
|
|
132
154
|
# https://papermill.readthedocs.io/en/latest/extending-entry-points.html
|
|
133
155
|
# Will take that up with next iteration where we save the executed notebook periodically
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
s3_html_key = f"{s3_key_prefix}/output.html"
|
|
144
|
-
upload_file_to_s3(
|
|
145
|
-
file_path=html_output_path, bucket_name=s3_bucket, s3_key=s3_html_key
|
|
146
|
-
)
|
|
147
|
-
print(f"Successfully uploaded HTML to s3://{s3_bucket}/{s3_html_key}")
|
|
156
|
+
print("Converting notebook to HTML and uploading to S3...")
|
|
157
|
+
html_output_path = "/tmp/output.html"
|
|
158
|
+
convert_notebook_to_html(
|
|
159
|
+
notebook_path=output_notebook_path, output_html_path=html_output_path
|
|
160
|
+
)
|
|
161
|
+
upload_file_to_s3(
|
|
162
|
+
file_path=html_output_path, bucket_name=s3_bucket, s3_key=s3_key
|
|
163
|
+
)
|
|
164
|
+
print(f"Successfully uploaded HTML to s3://{s3_bucket}/{s3_key}")
|
|
148
165
|
|
|
149
166
|
except Exception as e:
|
|
150
167
|
print(f"Error executing notebook {args.notebook_path}: {e}")
|