truefoundry 0.11.1rc1__py3-none-any.whl → 0.11.3.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of truefoundry might be problematic. Click here for more details.

@@ -16,6 +16,7 @@ from truefoundry.common.constants import (
16
16
  API_SERVER_RELATIVE_PATH,
17
17
  ENV_VARS,
18
18
  MLFOUNDRY_SERVER_RELATIVE_PATH,
19
+ TFY_API_KEY_ENV_KEY,
19
20
  TFY_DEBUG_ENV_KEY,
20
21
  TFY_HOST_ENV_KEY,
21
22
  TFY_INTERNAL_ENV_KEY,
@@ -113,9 +114,14 @@ def validate_tfy_host(tfy_host: str) -> None:
113
114
  def resolve_tfy_host(tfy_host: Optional[str] = None) -> str:
114
115
  tfy_host = tfy_host or ENV_VARS.TFY_HOST
115
116
  if not tfy_host:
116
- raise ValueError(
117
- f"Either `host` should be provided using `--host <value>`, or `{TFY_HOST_ENV_KEY}` env must be set"
118
- )
117
+ if ENV_VARS.TFY_API_KEY:
118
+ raise ValueError(
119
+ f"TFY_HOST` env must be set since `{TFY_API_KEY_ENV_KEY}` env is set. Either set `{TFY_HOST_ENV_KEY}` or unset `{TFY_API_KEY_ENV_KEY}` and login"
120
+ )
121
+ else:
122
+ raise ValueError(
123
+ f"Either `host` should be provided using `--host <value>`, or `{TFY_HOST_ENV_KEY}` env must be set"
124
+ )
119
125
  tfy_host = tfy_host.strip("/")
120
126
  validate_tfy_host(tfy_host)
121
127
  return tfy_host
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: application.json
3
- # timestamp: 2025-06-18T21:24:37+00:00
3
+ # timestamp: 2025-07-16T09:53:23+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
 
@@ -989,6 +989,34 @@ class TaskDockerFileBuild(BaseModel):
989
989
  build_args: Optional[Dict[str, str]] = Field(None, description="")
990
990
 
991
991
 
992
+ class TaskPySparkBuild(BaseModel):
993
+ """
994
+ Describes the configuration for the PySpark build for a task
995
+ """
996
+
997
+ type: Literal["task-pyspark-build"] = Field(..., description="")
998
+ spark_version: str = Field(
999
+ "3.5.2",
1000
+ description="Spark version should match the spark version installed in the image.",
1001
+ )
1002
+ docker_registry: Optional[str] = Field(
1003
+ None,
1004
+ description="FQN of the container registry. If you can't find your registry here,\nadd it through the [Integrations](/integrations?tab=docker-registry) page",
1005
+ )
1006
+ requirements_path: Optional[str] = Field(
1007
+ None,
1008
+ description="Path to `requirements.txt` relative to\n`Path to build context`",
1009
+ )
1010
+ pip_packages: Optional[List[str]] = Field(
1011
+ None,
1012
+ description='Define pip package requirements.\nIn Python/YAML E.g. ["fastapi>=0.90,<1.0", "uvicorn"]',
1013
+ )
1014
+ apt_packages: Optional[List[str]] = Field(
1015
+ None,
1016
+ description='Debian packages to install via `apt get`.\nIn Python/YAML E.g. ["git", "ffmpeg", "htop"]',
1017
+ )
1018
+
1019
+
992
1020
  class TaskPythonBuild(BaseModel):
993
1021
  """
994
1022
  Describes the configuration for the python build for a task
@@ -1200,7 +1228,7 @@ class ContainerTaskConfig(BaseModel):
1200
1228
  description="Configure environment variables to be injected in the task either as plain text or secrets. [Docs](https://docs.truefoundry.com/docs/env-variables)",
1201
1229
  )
1202
1230
  resources: Optional[Resources] = None
1203
- mounts: Optional[List[Union[SecretMount, StringDataMount, VolumeMount]]] = Field(
1231
+ mounts: Optional[List[VolumeMount]] = Field(
1204
1232
  None, description="Configure data to be mounted to Workflow pod(s) as a volume."
1205
1233
  )
1206
1234
  service_account: Optional[str] = Field(None, description="")
@@ -1567,15 +1595,6 @@ class FlyteLaunchPlan(BaseModel):
1567
1595
  closure: Any
1568
1596
 
1569
1597
 
1570
- class FlyteTaskCustom(BaseModel):
1571
- truefoundry: Union[PythonTaskConfig, ContainerTaskConfig]
1572
-
1573
-
1574
- class FlyteTaskTemplate(BaseModel):
1575
- id: FlyteTaskID
1576
- custom: FlyteTaskCustom
1577
-
1578
-
1579
1598
  class JobAlert(BaseModel):
1580
1599
  """
1581
1600
  Describes the configuration for the job alerts
@@ -1594,6 +1613,25 @@ class JobAlert(BaseModel):
1594
1613
  on_failure: bool = Field(True, description="Send an alert when the job fails")
1595
1614
 
1596
1615
 
1616
+ class PySparkTaskConfig(BaseModel):
1617
+ type: Literal["pyspark-task-config"] = Field(..., description="")
1618
+ image: TaskPySparkBuild
1619
+ driver_config: SparkDriverConfig
1620
+ executor_config: SparkExecutorConfig
1621
+ spark_conf: Optional[Dict[str, Any]] = Field(
1622
+ None,
1623
+ description="Extra configuration properties to be passed to the spark job. [Docs](https://spark.apache.org/docs/latest/configuration.html)",
1624
+ )
1625
+ env: Optional[Dict[str, str]] = Field(
1626
+ None,
1627
+ description="Configure environment variables to be injected in the task either as plain text or secrets. [Docs](https://docs.truefoundry.com/docs/env-variables)",
1628
+ )
1629
+ mounts: Optional[List[Union[SecretMount, StringDataMount, VolumeMount]]] = Field(
1630
+ None, description="Configure data to be mounted to Workflow pod(s) as a volume."
1631
+ )
1632
+ service_account: Optional[str] = Field(None, description="")
1633
+
1634
+
1597
1635
  class Service(BaseService):
1598
1636
  """
1599
1637
  Describes the configuration for the service
@@ -1630,9 +1668,13 @@ class AsyncService(BaseService):
1630
1668
  sidecar: Optional[AsyncProcessorSidecar] = None
1631
1669
 
1632
1670
 
1633
- class FlyteTask(BaseModel):
1634
- template: FlyteTaskTemplate
1635
- description: Optional[Any] = None
1671
+ class FlyteTaskCustom(BaseModel):
1672
+ truefoundry: Union[PythonTaskConfig, ContainerTaskConfig, PySparkTaskConfig]
1673
+
1674
+
1675
+ class FlyteTaskTemplate(BaseModel):
1676
+ id: FlyteTaskID
1677
+ custom: FlyteTaskCustom
1636
1678
 
1637
1679
 
1638
1680
  class Job(BaseModel):
@@ -1688,27 +1730,6 @@ class Job(BaseModel):
1688
1730
  )
1689
1731
 
1690
1732
 
1691
- class Workflow(BaseModel):
1692
- """
1693
- Describes the configuration for the worflow
1694
- """
1695
-
1696
- type: Literal["workflow"] = Field(..., description="")
1697
- name: constr(regex=r"^[a-z](?:[a-z0-9]|-(?!-)){1,30}[a-z0-9]$") = Field(
1698
- ..., description="Name of the workflow"
1699
- )
1700
- source: Union[LocalSource, RemoteSource] = Field(
1701
- ..., description="Source Code for the workflow, either local or remote"
1702
- )
1703
- workflow_file_path: str = Field(
1704
- ..., description="Path to the workflow file relative to the project root path"
1705
- )
1706
- flyte_entities: Optional[List[Union[FlyteTask, FlyteWorkflow, FlyteLaunchPlan]]] = (
1707
- Field(None, description="")
1708
- )
1709
- alerts: Optional[List[WorkflowAlert]] = Field(None, description="")
1710
-
1711
-
1712
1733
  class ApplicationSet(BaseModel):
1713
1734
  """
1714
1735
  Describes the configuration for the application set
@@ -1735,6 +1756,32 @@ class ApplicationSet(BaseModel):
1735
1756
  )
1736
1757
 
1737
1758
 
1759
+ class FlyteTask(BaseModel):
1760
+ template: FlyteTaskTemplate
1761
+ description: Optional[Any] = None
1762
+
1763
+
1764
+ class Workflow(BaseModel):
1765
+ """
1766
+ Describes the configuration for the worflow
1767
+ """
1768
+
1769
+ type: Literal["workflow"] = Field(..., description="")
1770
+ name: constr(regex=r"^[a-z](?:[a-z0-9]|-(?!-)){1,30}[a-z0-9]$") = Field(
1771
+ ..., description="Name of the workflow"
1772
+ )
1773
+ source: Union[LocalSource, RemoteSource] = Field(
1774
+ ..., description="Source Code for the workflow, either local or remote"
1775
+ )
1776
+ workflow_file_path: str = Field(
1777
+ ..., description="Path to the workflow file relative to the project root path"
1778
+ )
1779
+ flyte_entities: Optional[List[Union[FlyteTask, FlyteWorkflow, FlyteLaunchPlan]]] = (
1780
+ Field(None, description="")
1781
+ )
1782
+ alerts: Optional[List[WorkflowAlert]] = Field(None, description="")
1783
+
1784
+
1738
1785
  class Application(BaseModel):
1739
1786
  __root__: Union[
1740
1787
  Service,
@@ -45,16 +45,9 @@ def upload_file_to_s3(file_path, bucket_name, s3_key):
45
45
  # Use s3proxy for pushing data to s3
46
46
  # The JWT token is already available in the pod
47
47
  aws_access_key_id = os.environ.get("SPARK_APPLICATION_EVENT_LOG_JWT_TOKEN")
48
- aws_secret_access_key = "__token__"
48
+ aws_secret_access_key = os.environ.get("TFY_NOTEBOOK_OUTPUT_S3_SECRET_KEY")
49
49
  s3_endpoint_url = os.environ.get("S3_PROXY_URL")
50
50
 
51
- if not aws_access_key_id:
52
- raise ValueError(
53
- "SPARK_APPLICATION_EVENT_LOG_JWT_TOKEN environment variable is not set"
54
- )
55
- if not s3_endpoint_url:
56
- raise ValueError("S3_PROXY_URL environment variable is not set")
57
-
58
51
  # Needed for the issue https://github.com/gaul/s3proxy/issues/765
59
52
  s3_config = Config(
60
53
  request_checksum_calculation="when_required",
@@ -106,7 +99,22 @@ def execute_notebook(notebook_path, output_path="/tmp/output.ipynb", parameters=
106
99
  stderr_file=sys.stderr,
107
100
  )
108
101
  print(f"Successfully executed notebook: {notebook_path}")
109
- return output_path
102
+
103
+
104
+ def validate_env_vars():
105
+ keys = [
106
+ "TFY_NOTEBOOK_OUTPUT_S3_KEY",
107
+ "TFY_NOTEBOOK_OUTPUT_S3_BUCKET",
108
+ "SPARK_APPLICATION_EVENT_LOG_JWT_TOKEN",
109
+ "TFY_NOTEBOOK_OUTPUT_S3_SECRET_KEY",
110
+ ]
111
+ unset_keys = [key for key in keys if not os.environ.get(key)]
112
+ if unset_keys:
113
+ raise ValueError(
114
+ f"Environment variables {unset_keys} are not set."
115
+ f"Contact you tenant-admin to configure storage bucket on the control plane "
116
+ f"to enable uploading spark notebook outputs."
117
+ )
110
118
 
111
119
 
112
120
  if __name__ == "__main__":
@@ -116,35 +124,31 @@ if __name__ == "__main__":
116
124
  parser.add_argument("notebook_path", help="Path to the notebook file to execute")
117
125
  args = parser.parse_args()
118
126
 
127
+ # Since failure to upload is considered a job failure, fail the job even before it run if uploads cannot happen
128
+ validate_env_vars()
129
+
119
130
  output_notebook_path = "/tmp/output.ipynb"
120
131
 
121
132
  # This would be the same as the default bucket used by servicefoundry-server
122
133
  s3_bucket = os.environ.get("TFY_NOTEBOOK_OUTPUT_S3_BUCKET")
123
- # This would be something like sparkjob-events/<tenant-id>
124
- s3_key_prefix = os.environ.get("TFY_NOTEBOOK_OUTPUT_S3_KEY_PREFIX")
134
+ # This would be something like sparkjob-events/<tenant-id>/output-notebooks/<application-id>/<jobrun-name>/output.html
135
+ s3_key = os.environ.get("TFY_NOTEBOOK_OUTPUT_S3_KEY")
125
136
 
126
137
  try:
127
- executed_notebook_path = execute_notebook(
128
- args.notebook_path, output_path=output_notebook_path
129
- )
138
+ execute_notebook(args.notebook_path, output_path=output_notebook_path)
130
139
 
131
140
  # The following may also be modeled as an entrypoint
132
141
  # https://papermill.readthedocs.io/en/latest/extending-entry-points.html
133
142
  # Will take that up with next iteration where we save the executed notebook periodically
134
- if s3_bucket and s3_key_prefix:
135
- print("Converting notebook to HTML and uploading to S3...")
136
- html_output_path = "/tmp/output.html"
137
- convert_notebook_to_html(
138
- notebook_path=executed_notebook_path, output_html_path=html_output_path
139
- )
140
-
141
- # Construct S3 key: use the original notebook name for the HTML file
142
- notebook_name = os.path.basename(args.notebook_path)
143
- s3_html_key = f"{s3_key_prefix}/output.html"
144
- upload_file_to_s3(
145
- file_path=html_output_path, bucket_name=s3_bucket, s3_key=s3_html_key
146
- )
147
- print(f"Successfully uploaded HTML to s3://{s3_bucket}/{s3_html_key}")
143
+ print("Converting notebook to HTML and uploading to S3...")
144
+ html_output_path = "/tmp/output.html"
145
+ convert_notebook_to_html(
146
+ notebook_path=output_notebook_path, output_html_path=html_output_path
147
+ )
148
+ upload_file_to_s3(
149
+ file_path=html_output_path, bucket_name=s3_bucket, s3_key=s3_key
150
+ )
151
+ print(f"Successfully uploaded HTML to s3://{s3_bucket}/{s3_key}")
148
152
 
149
153
  except Exception as e:
150
154
  print(f"Error executing notebook {args.notebook_path}: {e}")
@@ -35,8 +35,20 @@ def trigger_command():
35
35
  nargs=-1,
36
36
  required=False,
37
37
  )
38
+ @click.option(
39
+ "--run-name-alias",
40
+ "--run_name_alias",
41
+ type=click.STRING,
42
+ required=False,
43
+ help="Alias for the job run name.",
44
+ )
38
45
  @handle_exception_wrapper
39
- def trigger_job(application_fqn: str, command: Optional[Sequence[str]], params):
46
+ def trigger_job(
47
+ application_fqn: str,
48
+ params,
49
+ command: Optional[Sequence[str]],
50
+ run_name_alias: Optional[str],
51
+ ):
40
52
  """
41
53
  Trigger a Job on TrueFoundry asynchronously
42
54
 
@@ -54,6 +66,10 @@ def trigger_job(application_fqn: str, command: Optional[Sequence[str]], params):
54
66
  Passing params:
55
67
 
56
68
  [b]tfy trigger job --application-fqn "my-cluster:my-workspace:my-job" -- --param1_name param1_value --param2_name param2_value ...[/]
69
+ \n
70
+
71
+ passing run_name_alias:
72
+ [b]tfy trigger job --application-fqn "my-cluster:my-workspace:my-job" --run_name_alias "my_run_alias"[/]
57
73
  """
58
74
  if params:
59
75
  params_dict = {}
@@ -78,7 +94,10 @@ def trigger_job(application_fqn: str, command: Optional[Sequence[str]], params):
78
94
  params_dict[key] = value
79
95
 
80
96
  application.trigger_job(
81
- application_fqn=application_fqn, command=command, params=params
97
+ application_fqn=application_fqn,
98
+ command=command,
99
+ params=params,
100
+ run_name_alias=run_name_alias,
82
101
  )
83
102
 
84
103
 
@@ -578,6 +578,7 @@ class ServiceFoundryServiceClient(BaseServiceFoundryServiceClient):
578
578
  def trigger_job(
579
579
  self,
580
580
  deployment_id: str,
581
+ run_name_alias: Optional[str] = None,
581
582
  command: Optional[str] = None,
582
583
  params: Optional[Dict[str, str]] = None,
583
584
  ) -> TriggerJobResult:
@@ -585,11 +586,14 @@ class ServiceFoundryServiceClient(BaseServiceFoundryServiceClient):
585
586
  body = {
586
587
  "deploymentId": deployment_id,
587
588
  "input": {},
589
+ "metadata": {},
588
590
  }
589
591
  if command:
590
592
  body["input"]["command"] = command
591
593
  if params:
592
594
  body["input"]["params"] = params
595
+ if run_name_alias:
596
+ body["metadata"]["job_run_name_alias"] = run_name_alias
593
597
  response = session_with_retries().post(
594
598
  url, json=body, headers=self._get_headers()
595
599
  )
@@ -117,6 +117,7 @@ def trigger_job(
117
117
  application_fqn: str,
118
118
  command: Optional[Union[str, Sequence[str]]] = None,
119
119
  params: Optional[Dict[str, str]] = None,
120
+ run_name_alias: Optional[str] = None,
120
121
  ) -> TriggerJobResult:
121
122
  """
122
123
  Trigger a Job on TrueFoundry platform
@@ -178,6 +179,7 @@ def trigger_job(
178
179
  deployment_id=application_info.activeDeploymentId,
179
180
  command=command_str if command_str else None,
180
181
  params=params if params else None,
182
+ run_name_alias=run_name_alias.strip() if run_name_alias else None,
181
183
  )
182
184
  jobRunName = result.jobRunName
183
185
  previous_runs_url = f"{client.tfy_host.strip('/')}/deployments/{application_info.id}?tab=previousRuns"
@@ -526,3 +526,36 @@ class SparkJobPythonNotebookEntrypoint(
526
526
  models.SparkJobPythonNotebookEntrypoint, PatchedModelBase
527
527
  ):
528
528
  type: Literal["python-notebook"] = "python-notebook"
529
+
530
+
531
+ class PySparkTaskConfig(models.PySparkTaskConfig, PatchedModelBase):
532
+ type: Literal["pyspark-task-config"] = "pyspark-task-config"
533
+
534
+ def __init__(self, *args, **kwargs):
535
+ super().__init__(*args, **kwargs)
536
+ try:
537
+ import truefoundry.workflow.spark_task as _ # noqa: F401
538
+ except ImportError as e:
539
+ raise ImportError(
540
+ "truefoundry.workflow.spark_task is not installed. Please install it with `pip install truefoundry[workflow,spark]`"
541
+ ) from e
542
+
543
+
544
+ class SparkDriverConfig(models.SparkDriverConfig, PatchedModelBase):
545
+ type: Literal["spark-driver-config"] = "spark-driver-config"
546
+
547
+
548
+ class SparkExecutorConfig(models.SparkExecutorConfig, PatchedModelBase):
549
+ type: Literal["spark-executor-config"] = "spark-executor-config"
550
+
551
+
552
+ class SparkExecutorFixedInstances(models.SparkExecutorFixedInstances, PatchedModelBase):
553
+ type: Literal["fixed"] = "fixed"
554
+
555
+
556
+ class SparkExecutorDynamicScaling(models.SparkExecutorDynamicScaling, PatchedModelBase):
557
+ type: Literal["dynamic"] = "dynamic"
558
+
559
+
560
+ class TaskPySparkBuild(models.TaskPySparkBuild, PatchedModelBase):
561
+ type: Literal["task-pyspark-build"] = "task-pyspark-build"
@@ -6,17 +6,7 @@ import uuid
6
6
  from concurrent.futures import FIRST_EXCEPTION, Future, ThreadPoolExecutor, wait
7
7
  from shutil import rmtree
8
8
  from threading import Event
9
- from typing import (
10
- Any,
11
- Callable,
12
- Dict,
13
- Iterator,
14
- List,
15
- Optional,
16
- Sequence,
17
- Tuple,
18
- Union,
19
- )
9
+ from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Tuple
20
10
  from urllib.parse import unquote
21
11
  from urllib.request import pathname2url
22
12
 
@@ -31,6 +21,7 @@ from rich.progress import (
31
21
  )
32
22
  from tqdm.utils import CallbackIOWrapper
33
23
  from truefoundry_sdk import (
24
+ FileInfo,
34
25
  MultiPartUploadResponse,
35
26
  MultiPartUploadStorageProvider,
36
27
  Operation,
@@ -52,11 +43,6 @@ from truefoundry.common.storage_provider_utils import (
52
43
  )
53
44
  from truefoundry.ml._autogen.client import ( # type: ignore[attr-defined]
54
45
  ApiClient,
55
- FileInfoDto,
56
- ListFilesForArtifactVersionRequestDto,
57
- ListFilesForArtifactVersionsResponseDto,
58
- ListFilesForDatasetRequestDto,
59
- ListFilesForDatasetResponseDto,
60
46
  MlfoundryArtifactsApi,
61
47
  RunArtifactsApi,
62
48
  )
@@ -592,44 +578,23 @@ class MlFoundryArtifactsRepository:
592
578
  progress=progress,
593
579
  )
594
580
 
595
- def _list_files(
596
- self, artifact_identifier: ArtifactIdentifier, path, page_size, page_token
597
- ) -> Union[ListFilesForDatasetResponseDto, ListFilesForArtifactVersionsResponseDto]:
598
- if artifact_identifier.dataset_fqn:
599
- return self._mlfoundry_artifacts_api.list_files_for_dataset_post(
600
- list_files_for_dataset_request_dto=ListFilesForDatasetRequestDto(
601
- dataset_fqn=artifact_identifier.dataset_fqn,
602
- path=path,
603
- max_results=page_size,
604
- page_token=page_token,
605
- )
606
- )
607
- else:
608
- return self._mlfoundry_artifacts_api.list_files_for_artifact_version_post(
609
- list_files_for_artifact_version_request_dto=ListFilesForArtifactVersionRequestDto(
610
- id=str(artifact_identifier.artifact_version_id),
611
- path=path,
612
- max_results=page_size,
613
- page_token=page_token,
614
- )
615
- )
616
-
617
581
  def list_artifacts(
618
582
  self, path=None, page_size=_LIST_FILES_PAGE_SIZE, **kwargs
619
- ) -> Iterator[FileInfoDto]:
620
- page_token = None
621
- started = False
622
- while not started or page_token is not None:
623
- started = True
624
- page = self._list_files(
625
- artifact_identifier=self.artifact_identifier,
583
+ ) -> Iterator[FileInfo]:
584
+ if self.artifact_identifier.dataset_id:
585
+ for file_info in client.data_directories.list_files(
586
+ id=str(self.artifact_identifier.dataset_id),
626
587
  path=path,
627
- page_size=page_size,
628
- page_token=page_token,
629
- )
630
- for file_info in page.files:
588
+ limit=page_size,
589
+ ):
590
+ yield file_info
591
+ else:
592
+ for file_info in client.artifact_versions.list_files(
593
+ id=str(self.artifact_identifier.artifact_version_id),
594
+ path=path,
595
+ limit=page_size,
596
+ ):
631
597
  yield file_info
632
- page_token = page.next_page_token
633
598
 
634
599
  def _is_directory(self, artifact_path):
635
600
  # TODO: Ideally server should return a flag to indicate if it is a directory
@@ -15,6 +15,7 @@ from flytekit.types.file import FlyteFile
15
15
  from truefoundry.common.constants import ENV_VARS
16
16
  from truefoundry.deploy.v2.lib.patched_models import (
17
17
  ContainerTaskConfig,
18
+ PySparkTaskConfig,
18
19
  PythonTaskConfig,
19
20
  TaskDockerFileBuild,
20
21
  TaskPythonBuild,
@@ -41,6 +42,7 @@ __all__ = [
41
42
  "ExecutionConfig",
42
43
  "FlyteFile",
43
44
  "FlyteError",
45
+ "PySparkTaskConfig",
44
46
  ]
45
47
 
46
48
 
@@ -0,0 +1,91 @@
1
+ import os
2
+ import shutil
3
+ from typing import Any, Callable, Dict, Optional
4
+
5
+ from flytekit import FlyteContextManager, PythonFunctionTask, lazy_module
6
+ from flytekit.configuration import SerializationSettings
7
+ from flytekit.core.context_manager import ExecutionParameters
8
+ from flytekit.extend import ExecutionState, TaskPlugins
9
+ from flytekit.extend.backend.base_agent import AsyncAgentExecutorMixin
10
+
11
+ from truefoundry.deploy.v2.lib.patched_models import PySparkTaskConfig
12
+
13
+ pyspark_sql = lazy_module("pyspark.sql")
14
+ SparkSession = pyspark_sql.SparkSession
15
+
16
+
17
+ class TfySparkFunctionTask(
18
+ AsyncAgentExecutorMixin, PythonFunctionTask[PySparkTaskConfig]
19
+ ):
20
+ """
21
+ Actual Plugin that transforms the local python code for execution within a spark context
22
+ """
23
+
24
+ _SPARK_TASK_TYPE = "spark"
25
+
26
+ def __init__(
27
+ self,
28
+ task_config: PySparkTaskConfig,
29
+ task_function: Callable,
30
+ **kwargs,
31
+ ):
32
+ self.sess: Optional[SparkSession] = None # type: ignore
33
+
34
+ task_type = self._SPARK_TASK_TYPE
35
+
36
+ super(TfySparkFunctionTask, self).__init__(
37
+ task_config=task_config,
38
+ task_type=task_type,
39
+ task_function=task_function,
40
+ **kwargs,
41
+ )
42
+
43
+ def get_custom(self, settings: SerializationSettings) -> Dict[str, Any]:
44
+ return {"truefoundry": self._task_config.dict()}
45
+
46
+ def pre_execute(self, user_params: ExecutionParameters) -> ExecutionParameters:
47
+ import pyspark as _pyspark
48
+
49
+ ctx = FlyteContextManager.current_context()
50
+ sess_builder = _pyspark.sql.SparkSession.builder.appName(
51
+ f"FlyteSpark: {user_params.execution_id}"
52
+ )
53
+ if not (
54
+ ctx.execution_state
55
+ and ctx.execution_state.mode == ExecutionState.Mode.TASK_EXECUTION
56
+ ):
57
+ # If either of above cases is not true, then we are in local execution of this task
58
+ # Add system spark-conf for local/notebook based execution.
59
+ spark_conf = _pyspark.SparkConf()
60
+ spark_conf.set("spark.driver.bindAddress", "127.0.0.1")
61
+ for k, v in self.task_config.spark_conf.items():
62
+ spark_conf.set(k, v)
63
+ # In local execution, propagate PYTHONPATH to executors too. This makes the spark
64
+ # execution hermetic to the execution environment. For example, it allows running
65
+ # Spark applications using Bazel, without major changes.
66
+ if "PYTHONPATH" in os.environ:
67
+ spark_conf.setExecutorEnv("PYTHONPATH", os.environ["PYTHONPATH"])
68
+ sess_builder = sess_builder.config(conf=spark_conf)
69
+
70
+ self.sess = sess_builder.getOrCreate()
71
+
72
+ if (
73
+ ctx.serialization_settings
74
+ and ctx.serialization_settings.fast_serialization_settings
75
+ and ctx.serialization_settings.fast_serialization_settings.enabled
76
+ and ctx.execution_state
77
+ and ctx.execution_state.mode == ExecutionState.Mode.TASK_EXECUTION
78
+ ):
79
+ file_name = "flyte_wf"
80
+ file_format = "zip"
81
+ shutil.make_archive(file_name, file_format, os.getcwd())
82
+ self.sess.sparkContext.addPyFile(f"{file_name}.{file_format}")
83
+
84
+ return user_params.builder().add_attr("SPARK_SESSION", self.sess).build()
85
+
86
+ def execute(self, **kwargs) -> Any:
87
+ return PythonFunctionTask.execute(self, **kwargs)
88
+
89
+
90
+ # Inject the Spark plugin into flytekits dynamic plugin loading system
91
+ TaskPlugins.register_pythontask_plugin(PySparkTaskConfig, TfySparkFunctionTask)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: truefoundry
3
- Version: 0.11.1rc1
3
+ Version: 0.11.3.dev0
4
4
  Summary: TrueFoundry CLI
5
5
  Author-email: TrueFoundry Team <abhishek@truefoundry.com>
6
6
  Requires-Python: <3.14,>=3.8.1
@@ -36,8 +36,11 @@ Requires-Dist: urllib3<3,>=1.26.18
36
36
  Requires-Dist: yq<4.0.0,>=3.1.0
37
37
  Provides-Extra: ai
38
38
  Requires-Dist: mcp==1.9.4; (python_version >= '3.10') and extra == 'ai'
39
+ Provides-Extra: spark
40
+ Requires-Dist: flytekit==1.15.3; (python_version >= '3.9' and python_version <= '3.12') and extra == 'spark'
41
+ Requires-Dist: flytekitplugins-spark==1.15.3; (python_version >= '3.9' and python_version <= '3.12') and extra == 'spark'
39
42
  Provides-Extra: workflow
40
- Requires-Dist: flytekit==1.15.3; (python_version >= '3.9' and python_version < '3.13') and extra == 'workflow'
43
+ Requires-Dist: flytekit==1.15.3; (python_version >= '3.9' and python_version <= '3.12') and extra == 'workflow'
41
44
  Description-Content-Type: text/markdown
42
45
 
43
46
  # TrueFoundry
@@ -50,11 +50,11 @@ truefoundry/common/servicefoundry_client.py,sha256=2fYhdVPSvLXz5C5tosOq86JD8WM3I
50
50
  truefoundry/common/session.py,sha256=d9l3TEBpqVP4mr4mTGY1qVxc815skzMlNNdw14otg34,2923
51
51
  truefoundry/common/storage_provider_utils.py,sha256=yURhMw8k0FLFvaviRHDiifhvc6GnuQwGMC9Qd2uM440,10934
52
52
  truefoundry/common/types.py,sha256=BMJFCsR1lPJAw66IQBSvLyV4I6o_x5oj78gVsUa9si8,188
53
- truefoundry/common/utils.py,sha256=j3QP0uOsaGD_VmDDR68JTwoYE1okkAq6OqpVkzVf48Q,6424
53
+ truefoundry/common/utils.py,sha256=P0FuAadoJGdpieUORLSN-PiFnkyoGO-K2cS4OPITBWg,6714
54
54
  truefoundry/common/warnings.py,sha256=xDMhR_-ZGC40Ycaj6nlFb5MYPexn8WbKCHd4FlflTXQ,705
55
55
  truefoundry/deploy/__init__.py,sha256=PVbGPU9S3-dTFn5LvLwaEnfsp2RrGT9iiM7_15kOV84,2837
56
56
  truefoundry/deploy/python_deploy_codegen.py,sha256=k19_m5DGsUyjOUCSKwIVP8vDna2sq01tHABsUfoVpW4,8019
57
- truefoundry/deploy/_autogen/models.py,sha256=8j_y0Yp8k8Sjj7iVtZDHeuxq9kDvD0xI8-iFnbf0370,73571
57
+ truefoundry/deploy/_autogen/models.py,sha256=8sim4XOGFtK68pdZVKscvg6XPQRuGNTisUUSStqcP_A,75542
58
58
  truefoundry/deploy/builder/__init__.py,sha256=kgvlkVkiWpMVdim81tIeLrdoACqrFDgwCqHdQVsCsMo,4988
59
59
  truefoundry/deploy/builder/constants.py,sha256=amUkHoHvVKzGv0v_knfiioRuKiJM0V0xW0diERgWiI0,508
60
60
  truefoundry/deploy/builder/docker_service.py,sha256=sm7GWeIqyrKaZpxskdLejZlsxcZnM3BTDJr6orvPN4E,3948
@@ -67,7 +67,7 @@ truefoundry/deploy/builder/builders/tfy_python_buildpack/__init__.py,sha256=_fjq
67
67
  truefoundry/deploy/builder/builders/tfy_python_buildpack/dockerfile_template.py,sha256=f4l3fH21E2b8W3-JotMKc0AdPcCxV7LRPxxYJa7z_UQ,9134
68
68
  truefoundry/deploy/builder/builders/tfy_spark_buildpack/__init__.py,sha256=NEPlM6_vTVxp4ITa18B8DBbgYCn1q5d8be21lbgu5oY,2888
69
69
  truefoundry/deploy/builder/builders/tfy_spark_buildpack/dockerfile_template.py,sha256=2zohUaW8Yw_QREHlpRW7Pooomt19HJh44fHjlsiDmwM,6064
70
- truefoundry/deploy/builder/builders/tfy_spark_buildpack/tfy_execute_notebook.py,sha256=tTx-GZDVf5iB1Pyz2z5c2LH1yrb7lErFbJcr-giAIuI,5734
70
+ truefoundry/deploy/builder/builders/tfy_spark_buildpack/tfy_execute_notebook.py,sha256=QvawKw30dcHROJ05XQU2KgwH3gtUmEGSkuLxiuPNJ2c,5899
71
71
  truefoundry/deploy/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
72
  truefoundry/deploy/cli/commands/__init__.py,sha256=qv818jxqSAygJ3h-6Ul8t-5VOgR_UrSgsVtNCl3e5G0,1408
73
73
  truefoundry/deploy/cli/commands/apply_command.py,sha256=DmXmKVokkauyKIiJDtErTwbJ5_LvQeJbTQsG5BjyKpo,2427
@@ -84,7 +84,7 @@ truefoundry/deploy/cli/commands/logs_command.py,sha256=osl2z5VaIceB9sYa6GtwsuyAP
84
84
  truefoundry/deploy/cli/commands/patch_application_command.py,sha256=aRTHu2OmxQd7j9iE0RavsFCkCILp0rGh4DJO51Oij5I,2591
85
85
  truefoundry/deploy/cli/commands/patch_command.py,sha256=wA95khMO9uVz8SaJlgYMUwaX7HagtchjyxXXATq83Bk,1665
86
86
  truefoundry/deploy/cli/commands/terminate_comand.py,sha256=UKhOdbAej8ubX3q44vpLrOotAcvH4vHpRZJQrRf_AfM,1077
87
- truefoundry/deploy/cli/commands/trigger_command.py,sha256=_qSl-AShepZpbGUGTfLfJGd74VJJ_wd3eXYt2DfxIFo,4716
87
+ truefoundry/deploy/cli/commands/trigger_command.py,sha256=-FZy_XnFsexH5SOWzDc4Dj9fTwmdjene_EaLjJPmG2c,5119
88
88
  truefoundry/deploy/cli/commands/utils.py,sha256=mIMYbHuAxnT0yz_0PU8LDC9sAZPU_xURZFMOrGoasuc,3694
89
89
  truefoundry/deploy/core/__init__.py,sha256=j61bMWj4BkWihdssKMSFhieo7afJDtpc7qO7zk1rDB4,140
90
90
  truefoundry/deploy/core/login.py,sha256=N2VrW3nlBzoyoYulkipxwQvCpjBhi3sfsmhxK1ktWhg,236
@@ -102,9 +102,9 @@ truefoundry/deploy/lib/session.py,sha256=fLdgR6ZDp8-hFl5NTON4ngnWLsMzGxvKtfpDOOw
102
102
  truefoundry/deploy/lib/util.py,sha256=J7r8San2wKo48A7-BlH2-OKTlBO67zlPjLEhMsL8os0,1059
103
103
  truefoundry/deploy/lib/win32.py,sha256=1RcvPTdlOAJ48rt8rCbE2Ufha2ztRqBAE9dueNXArrY,5009
104
104
  truefoundry/deploy/lib/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
- truefoundry/deploy/lib/clients/servicefoundry_client.py,sha256=nSaaD91ONpDkRyOWHGv2VerPzdtn-Z3UF0iloj00VVU,27200
105
+ truefoundry/deploy/lib/clients/servicefoundry_client.py,sha256=JIj0Rs5PVZzXeh2QubLaVjgMJiUkfHrIMTtZMpgBmiA,27369
106
106
  truefoundry/deploy/lib/dao/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
- truefoundry/deploy/lib/dao/application.py,sha256=oMszpueXPUfTUuN_XdKwoRjQyqAgWHhZ-10cbprCVdM,9226
107
+ truefoundry/deploy/lib/dao/application.py,sha256=wRM4b3Mr4XCm838s8YjXlPpkKaSFbGgMyliEZRJra2k,9343
108
108
  truefoundry/deploy/lib/dao/apply.py,sha256=F7I8yp-IZir_6CL9NPE_KFD9rgicVJn2vcIv1a3MpuA,3771
109
109
  truefoundry/deploy/lib/dao/delete.py,sha256=uPL2psqWNw2O0oDikXJOlVxmG8n5d3Z0Ia9qZwqCn_8,2735
110
110
  truefoundry/deploy/lib/dao/version.py,sha256=AtdW_4O1DPUKdfv2qy6iUJsZ_95vM6z0AqeEy3WDKs8,1130
@@ -117,7 +117,7 @@ truefoundry/deploy/v2/lib/deploy.py,sha256=Ltm7cpIW14IbmEsR3EAIeWQUch2Z6HLej7heu
117
117
  truefoundry/deploy/v2/lib/deploy_workflow.py,sha256=G5BzMIbap8pgDX1eY-TITruUxQdkKhYtBmRwLL6lDeY,14342
118
118
  truefoundry/deploy/v2/lib/deployable_patched_models.py,sha256=mUi-OjPf7bc8rzfrPLdFb79LKuDq7F36RxL4V-AXebs,6830
119
119
  truefoundry/deploy/v2/lib/models.py,sha256=ogc1UYs1Z2nBdGSKCrde9sk8d0GxFKMkem99uqO5CmM,1148
120
- truefoundry/deploy/v2/lib/patched_models.py,sha256=oNsOr5ojVn2XHjATD3VLuuO6w_ljDL99siHXy6y3Y0g,15558
120
+ truefoundry/deploy/v2/lib/patched_models.py,sha256=TqGzHfHp20xdptmT--4LCWtFVjsWfE-LMT5wNS-fkuU,16751
121
121
  truefoundry/deploy/v2/lib/source.py,sha256=d6-8_6Zn5koBglqrBrY6ZLG_7yyPuLdyEmK4iZTw6xY,9405
122
122
  truefoundry/ml/__init__.py,sha256=EEEHV7w58Krpo_W9Chd8Y3TdItfFO3LI6j6Izqc4-P8,2219
123
123
  truefoundry/ml/constants.py,sha256=vDq72d4C9FSWqr9MMdjgTF4TuyNFApvo_6RVsSeAjB4,2837
@@ -347,7 +347,7 @@ truefoundry/ml/_autogen/models/schema.py,sha256=a_bp42MMPUbwO3407m0UW2W8EOhnxZXf
347
347
  truefoundry/ml/_autogen/models/signature.py,sha256=rBjpxUIsEeWM0sIyYG5uCJB18DKHR4k5yZw8TzuoP48,4987
348
348
  truefoundry/ml/_autogen/models/utils.py,sha256=c7RtSLXhOLcP8rjuUtfnMdaKVTZvvbsmw98gPAkAFrs,24371
349
349
  truefoundry/ml/artifact/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
350
- truefoundry/ml/artifact/truefoundry_artifact_repo.py,sha256=ocX5EIcLQa8Uc_C3NxxgNorpxc-z1Yp4TLvmzSORPpw,36862
350
+ truefoundry/ml/artifact/truefoundry_artifact_repo.py,sha256=hbgLxSoihkLVuICzRueuh8iAIc-yruCW5TuMXYQ-aCU,35692
351
351
  truefoundry/ml/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
352
352
  truefoundry/ml/cli/cli.py,sha256=MwpY7z_NEeJE_XIP7XbZELjNeu2vpMmohttHCKDRk54,335
353
353
  truefoundry/ml/cli/utils.py,sha256=j6_mZ4Spn114mz3P4QQ8jx0tmorXIuyQnHXVUSDvZi4,1035
@@ -370,17 +370,18 @@ truefoundry/ml/log_types/image/constants.py,sha256=wLtGEOA4T5fZHSlOXPuNDLX3lpbCt
370
370
  truefoundry/ml/log_types/image/image.py,sha256=sa0tBHdyluC8bELXY16E0HgFrUDnDBxHrteix4BFXcs,12479
371
371
  truefoundry/ml/log_types/image/image_normalizer.py,sha256=vrzfuSpVGgIxw_Q2sbFe7kQ_JpAndX0bMwC7wtfi41g,3104
372
372
  truefoundry/ml/log_types/image/types.py,sha256=inFQlyAyDvZtfliFpENirNCm1XO9beyZ8DNn97DoDKs,1568
373
- truefoundry/workflow/__init__.py,sha256=8wjsorcOGzCAWGqLRbAUf8eyezxpnB4NvXHX_rdO7ks,1656
373
+ truefoundry/workflow/__init__.py,sha256=tscHelUxCnzkZBVdeEHtW7HjTsUzwMxIQQ_iAIAL0tM,1704
374
374
  truefoundry/workflow/container_task.py,sha256=8arieePsX4__OnG337hOtCiNgJwtKJJCsZcmFmCBJtk,402
375
375
  truefoundry/workflow/map_task.py,sha256=f9vcAPRQy0Ttw6bvdZBKUVJMSm4eGQrbE1GHWhepHIU,1864
376
376
  truefoundry/workflow/python_task.py,sha256=SRXRLC4vdBqGjhkwuaY39LEWN6iPCpJAuW17URRdWTY,1128
377
+ truefoundry/workflow/spark_task.py,sha256=qHVxFD_sQqiK34zwOq9Idbme_ut6Z151QwfaWT4IiMA,3550
377
378
  truefoundry/workflow/task.py,sha256=34m55mALXx6ko9o5HkK6FDtMajdvJzBhOsHwDM2RcBA,1779
378
379
  truefoundry/workflow/workflow.py,sha256=OjKBwEArxTzNDpfJWgnIqkXDQrYQRLXjheRwpOCu3LE,4861
379
380
  truefoundry/workflow/remote_filesystem/__init__.py,sha256=LQ95ViEjJ7Ts4JcCGOxMPs7NZmQdZ4bTiq6qXtsjUhE,206
380
381
  truefoundry/workflow/remote_filesystem/logger.py,sha256=em2l7D6sw7xTLDP0kQSLpgfRRCLpN14Qw85TN7ujQcE,1022
381
382
  truefoundry/workflow/remote_filesystem/tfy_signed_url_client.py,sha256=xcT0wQmQlgzcj0nP3tJopyFSVWT1uv3nhiTIuwfXYeg,12342
382
383
  truefoundry/workflow/remote_filesystem/tfy_signed_url_fs.py,sha256=nSGPZu0Gyd_jz0KsEE-7w_BmnTD8CVF1S8cUJoxaCbc,13305
383
- truefoundry-0.11.1rc1.dist-info/METADATA,sha256=jIzJR7W9qc5D4a-WuEne8Y8BUotdhjnc_CEcz-Bdp78,2508
384
- truefoundry-0.11.1rc1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
385
- truefoundry-0.11.1rc1.dist-info/entry_points.txt,sha256=xVjn7RMN-MW2-9f7YU-bBdlZSvvrwzhpX1zmmRmsNPU,98
386
- truefoundry-0.11.1rc1.dist-info/RECORD,,
384
+ truefoundry-0.11.3.dev0.dist-info/METADATA,sha256=9xt1A3KNiY3CA95wQVDTAwF0LyzbgsX936eAHFIRnk4,2764
385
+ truefoundry-0.11.3.dev0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
386
+ truefoundry-0.11.3.dev0.dist-info/entry_points.txt,sha256=xVjn7RMN-MW2-9f7YU-bBdlZSvvrwzhpX1zmmRmsNPU,98
387
+ truefoundry-0.11.3.dev0.dist-info/RECORD,,