torchx-nightly 2023.10.21__py3-none-any.whl → 2025.12.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchx-nightly might be problematic. Click here for more details.
- torchx/__init__.py +2 -0
- torchx/{schedulers/ray/__init__.py → _version.py} +3 -1
- torchx/apps/serve/serve.py +2 -0
- torchx/apps/utils/booth_main.py +2 -0
- torchx/apps/utils/copy_main.py +2 -0
- torchx/apps/utils/process_monitor.py +2 -0
- torchx/cli/__init__.py +2 -0
- torchx/cli/argparse_util.py +38 -3
- torchx/cli/cmd_base.py +2 -0
- torchx/cli/cmd_cancel.py +2 -0
- torchx/cli/cmd_configure.py +2 -0
- torchx/cli/cmd_delete.py +30 -0
- torchx/cli/cmd_describe.py +2 -0
- torchx/cli/cmd_list.py +8 -4
- torchx/cli/cmd_log.py +6 -24
- torchx/cli/cmd_run.py +269 -45
- torchx/cli/cmd_runopts.py +2 -0
- torchx/cli/cmd_status.py +12 -1
- torchx/cli/cmd_tracker.py +3 -1
- torchx/cli/colors.py +2 -0
- torchx/cli/main.py +4 -0
- torchx/components/__init__.py +3 -8
- torchx/components/component_test_base.py +2 -0
- torchx/components/dist.py +18 -7
- torchx/components/integration_tests/component_provider.py +4 -2
- torchx/components/integration_tests/integ_tests.py +2 -0
- torchx/components/serve.py +2 -0
- torchx/components/structured_arg.py +7 -6
- torchx/components/utils.py +15 -4
- torchx/distributed/__init__.py +2 -4
- torchx/examples/apps/datapreproc/datapreproc.py +2 -0
- torchx/examples/apps/lightning/data.py +5 -3
- torchx/examples/apps/lightning/model.py +7 -6
- torchx/examples/apps/lightning/profiler.py +7 -4
- torchx/examples/apps/lightning/train.py +11 -2
- torchx/examples/torchx_out_of_sync_training.py +11 -0
- torchx/notebook.py +2 -0
- torchx/runner/__init__.py +2 -0
- torchx/runner/api.py +167 -60
- torchx/runner/config.py +43 -10
- torchx/runner/events/__init__.py +57 -13
- torchx/runner/events/api.py +14 -3
- torchx/runner/events/handlers.py +2 -0
- torchx/runtime/tracking/__init__.py +2 -0
- torchx/runtime/tracking/api.py +2 -0
- torchx/schedulers/__init__.py +16 -15
- torchx/schedulers/api.py +70 -14
- torchx/schedulers/aws_batch_scheduler.py +79 -5
- torchx/schedulers/aws_sagemaker_scheduler.py +598 -0
- torchx/schedulers/devices.py +17 -4
- torchx/schedulers/docker_scheduler.py +43 -11
- torchx/schedulers/ids.py +29 -23
- torchx/schedulers/kubernetes_mcad_scheduler.py +10 -8
- torchx/schedulers/kubernetes_scheduler.py +383 -38
- torchx/schedulers/local_scheduler.py +100 -27
- torchx/schedulers/lsf_scheduler.py +5 -4
- torchx/schedulers/slurm_scheduler.py +336 -20
- torchx/schedulers/streams.py +2 -0
- torchx/specs/__init__.py +89 -12
- torchx/specs/api.py +431 -32
- torchx/specs/builders.py +176 -38
- torchx/specs/file_linter.py +143 -57
- torchx/specs/finder.py +68 -28
- torchx/specs/named_resources_aws.py +254 -22
- torchx/specs/named_resources_generic.py +2 -0
- torchx/specs/overlays.py +106 -0
- torchx/specs/test/components/__init__.py +2 -0
- torchx/specs/test/components/a/__init__.py +2 -0
- torchx/specs/test/components/a/b/__init__.py +2 -0
- torchx/specs/test/components/a/b/c.py +2 -0
- torchx/specs/test/components/c/__init__.py +2 -0
- torchx/specs/test/components/c/d.py +2 -0
- torchx/tracker/__init__.py +12 -6
- torchx/tracker/api.py +15 -18
- torchx/tracker/backend/fsspec.py +2 -0
- torchx/util/cuda.py +2 -0
- torchx/util/datetime.py +2 -0
- torchx/util/entrypoints.py +39 -15
- torchx/util/io.py +2 -0
- torchx/util/log_tee_helpers.py +210 -0
- torchx/util/modules.py +65 -0
- torchx/util/session.py +42 -0
- torchx/util/shlex.py +2 -0
- torchx/util/strings.py +3 -1
- torchx/util/types.py +90 -29
- torchx/version.py +4 -2
- torchx/workspace/__init__.py +2 -0
- torchx/workspace/api.py +136 -6
- torchx/workspace/dir_workspace.py +2 -0
- torchx/workspace/docker_workspace.py +30 -2
- torchx_nightly-2025.12.24.dist-info/METADATA +167 -0
- torchx_nightly-2025.12.24.dist-info/RECORD +113 -0
- {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/WHEEL +1 -1
- {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/entry_points.txt +0 -1
- torchx/examples/pipelines/__init__.py +0 -0
- torchx/examples/pipelines/kfp/__init__.py +0 -0
- torchx/examples/pipelines/kfp/advanced_pipeline.py +0 -287
- torchx/examples/pipelines/kfp/dist_pipeline.py +0 -69
- torchx/examples/pipelines/kfp/intro_pipeline.py +0 -81
- torchx/pipelines/kfp/__init__.py +0 -28
- torchx/pipelines/kfp/adapter.py +0 -271
- torchx/pipelines/kfp/version.py +0 -17
- torchx/schedulers/gcp_batch_scheduler.py +0 -487
- torchx/schedulers/ray/ray_common.py +0 -22
- torchx/schedulers/ray/ray_driver.py +0 -307
- torchx/schedulers/ray_scheduler.py +0 -453
- torchx_nightly-2023.10.21.dist-info/METADATA +0 -174
- torchx_nightly-2023.10.21.dist-info/RECORD +0 -118
- {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info/licenses}/LICENSE +0 -0
- {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/top_level.txt +0 -0
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
-
# All rights reserved.
|
|
4
|
-
#
|
|
5
|
-
# This source code is licensed under the BSD-style license found in the
|
|
6
|
-
# LICENSE file in the root directory of this source tree.
|
|
7
|
-
|
|
8
|
-
"""
|
|
9
|
-
Intro KubeFlow Pipelines Example
|
|
10
|
-
================================
|
|
11
|
-
|
|
12
|
-
This an introductory pipeline using KubeFlow Pipelines built with only TorchX
|
|
13
|
-
components.
|
|
14
|
-
|
|
15
|
-
TorchX is intended to allow making cross platform components. As such, we have
|
|
16
|
-
a standard definition that uses adapters to convert it to the specific
|
|
17
|
-
pipeline platform. This is an example of using the KFP adapter to run a TorchX
|
|
18
|
-
component as part of a KubeFlow Pipeline.
|
|
19
|
-
|
|
20
|
-
TorchX tries to leverage standard mechanisms wherever possible. For KFP we use
|
|
21
|
-
the existing KFP pipeline definition syntax and add a single
|
|
22
|
-
`component_from_app` conversion step to convert a TorchX component into one
|
|
23
|
-
KFP can understand.
|
|
24
|
-
|
|
25
|
-
Typically you have a separate component file but for this example we define the
|
|
26
|
-
AppDef inline.
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
import kfp
|
|
30
|
-
from torchx import specs
|
|
31
|
-
from torchx.pipelines.kfp.adapter import container_from_app
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def pipeline() -> None:
|
|
35
|
-
# First we define our AppDef for the component. AppDef is a core part of TorchX
|
|
36
|
-
# and can be used to describe complex distributed multi container apps or
|
|
37
|
-
# just a single node component like here.
|
|
38
|
-
echo_app: specs.AppDef = specs.AppDef(
|
|
39
|
-
name="examples-intro",
|
|
40
|
-
roles=[
|
|
41
|
-
specs.Role(
|
|
42
|
-
name="worker",
|
|
43
|
-
entrypoint="/bin/echo",
|
|
44
|
-
args=["Hello TorchX!"],
|
|
45
|
-
image="alpine",
|
|
46
|
-
)
|
|
47
|
-
],
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
# To convert the TorchX AppDef into a KFP container we use
|
|
51
|
-
# the container_from_app adapter. This takes generates a KFP component
|
|
52
|
-
# definition from the TorchX app def and instantiates it into a container.
|
|
53
|
-
echo_container: kfp.dsl.ContainerOp = container_from_app(echo_app)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
# %%
|
|
57
|
-
# To generate the pipeline definition file we need to call into the KFP compiler
|
|
58
|
-
# with our pipeline function.
|
|
59
|
-
|
|
60
|
-
kfp.compiler.Compiler().compile(
|
|
61
|
-
pipeline_func=pipeline,
|
|
62
|
-
package_path="pipeline.yaml",
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
with open("pipeline.yaml", "rt") as f:
|
|
66
|
-
print(f.read())
|
|
67
|
-
|
|
68
|
-
# %%
|
|
69
|
-
# Once this has all run you should have a pipeline file (typically
|
|
70
|
-
# pipeline.yaml) that you can upload to your KFP cluster via the UI or
|
|
71
|
-
# a kfp.Client.
|
|
72
|
-
#
|
|
73
|
-
# See the
|
|
74
|
-
# `KFP SDK Examples <https://www.kubeflow.org/docs/components/pipelines/tutorials/sdk-examples/#examples>`_
|
|
75
|
-
# for more info on launching KFP pipelines.
|
|
76
|
-
|
|
77
|
-
# %%
|
|
78
|
-
# See the :ref:`examples_pipelines/kfp/advanced_pipeline:Advanced KubeFlow Pipelines Example` for how to chain multiple
|
|
79
|
-
# components together and use builtin components.
|
|
80
|
-
|
|
81
|
-
# sphinx_gallery_thumbnail_path = '_static/img/gallery-kfp.png'
|
torchx/pipelines/kfp/__init__.py
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
-
# All rights reserved.
|
|
4
|
-
#
|
|
5
|
-
# This source code is licensed under the BSD-style license found in the
|
|
6
|
-
# LICENSE file in the root directory of this source tree.
|
|
7
|
-
|
|
8
|
-
"""
|
|
9
|
-
This module contains adapters for converting TorchX components into KubeFlow
|
|
10
|
-
Pipeline components.
|
|
11
|
-
|
|
12
|
-
The current KFP adapters only support single node (1 role and 1 replica)
|
|
13
|
-
components.
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
import kfp
|
|
17
|
-
|
|
18
|
-
from .version import __version__ as __version__ # noqa F401
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def _check_kfp_version() -> None:
|
|
22
|
-
if not kfp.__version__.startswith("1."):
|
|
23
|
-
raise ImportError(
|
|
24
|
-
f"Only kfp version 1.x.x is supported! kfp version {kfp.__version__}"
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
_check_kfp_version()
|
torchx/pipelines/kfp/adapter.py
DELETED
|
@@ -1,271 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
-
# All rights reserved.
|
|
4
|
-
#
|
|
5
|
-
# This source code is licensed under the BSD-style license found in the
|
|
6
|
-
# LICENSE file in the root directory of this source tree.
|
|
7
|
-
|
|
8
|
-
import json
|
|
9
|
-
import os
|
|
10
|
-
import os.path
|
|
11
|
-
import shlex
|
|
12
|
-
from typing import Mapping, Optional, Tuple
|
|
13
|
-
|
|
14
|
-
import yaml
|
|
15
|
-
from kfp import components, dsl
|
|
16
|
-
|
|
17
|
-
# @manual=fbsource//third-party/pypi/kfp:kfp
|
|
18
|
-
from kfp.components.structures import ComponentSpec, OutputSpec
|
|
19
|
-
from kubernetes.client.models import (
|
|
20
|
-
V1ContainerPort,
|
|
21
|
-
V1EmptyDirVolumeSource,
|
|
22
|
-
V1Volume,
|
|
23
|
-
V1VolumeMount,
|
|
24
|
-
)
|
|
25
|
-
from torchx.schedulers.kubernetes_scheduler import app_to_resource, pod_labels
|
|
26
|
-
from torchx.specs import api
|
|
27
|
-
from typing_extensions import Protocol
|
|
28
|
-
|
|
29
|
-
from .version import __version__ as __version__ # noqa F401
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def component_spec_from_app(app: api.AppDef) -> Tuple[str, api.Role]:
|
|
33
|
-
"""
|
|
34
|
-
component_spec_from_app takes in a TorchX component and generates the yaml
|
|
35
|
-
spec for it. Notably this doesn't apply resources or port_maps since those
|
|
36
|
-
must be applied at runtime which is why it returns the role spec as well.
|
|
37
|
-
|
|
38
|
-
>>> from torchx import specs
|
|
39
|
-
>>> from torchx.pipelines.kfp.adapter import component_spec_from_app
|
|
40
|
-
>>> app_def = specs.AppDef(
|
|
41
|
-
... name="trainer",
|
|
42
|
-
... roles=[specs.Role("trainer", image="foo:latest")],
|
|
43
|
-
... )
|
|
44
|
-
>>> component_spec_from_app(app_def)
|
|
45
|
-
('description: ...', Role(...))
|
|
46
|
-
"""
|
|
47
|
-
assert len(app.roles) == 1, f"KFP adapter only support one role, got {app.roles}"
|
|
48
|
-
|
|
49
|
-
role = app.roles[0]
|
|
50
|
-
assert (
|
|
51
|
-
role.num_replicas == 1
|
|
52
|
-
), f"KFP adapter only supports one replica, got {app.num_replicas}"
|
|
53
|
-
|
|
54
|
-
command = [role.entrypoint, *role.args]
|
|
55
|
-
|
|
56
|
-
spec = {
|
|
57
|
-
"name": f"{app.name}-{role.name}",
|
|
58
|
-
"description": f"KFP wrapper for TorchX component {app.name}, role {role.name}",
|
|
59
|
-
"implementation": {
|
|
60
|
-
"container": {
|
|
61
|
-
"image": role.image,
|
|
62
|
-
"command": command,
|
|
63
|
-
"env": role.env,
|
|
64
|
-
}
|
|
65
|
-
},
|
|
66
|
-
"outputs": [],
|
|
67
|
-
}
|
|
68
|
-
return yaml.dump(spec), role
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
class ContainerFactory(Protocol):
|
|
72
|
-
"""
|
|
73
|
-
ContainerFactory is a protocol that represents a function that when called produces a
|
|
74
|
-
kfp.dsl.ContainerOp.
|
|
75
|
-
"""
|
|
76
|
-
|
|
77
|
-
def __call__(self, *args: object, **kwargs: object) -> dsl.ContainerOp:
|
|
78
|
-
...
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
class KFPContainerFactory(ContainerFactory, Protocol):
|
|
82
|
-
"""
|
|
83
|
-
KFPContainerFactory is a ContainerFactory that also has some KFP metadata
|
|
84
|
-
attached to it.
|
|
85
|
-
"""
|
|
86
|
-
|
|
87
|
-
component_spec: ComponentSpec
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
METADATA_FILE = "/tmp/outputs/mlpipeline-ui-metadata/data.json"
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def component_from_app(
|
|
94
|
-
app: api.AppDef, ui_metadata: Optional[Mapping[str, object]] = None
|
|
95
|
-
) -> ContainerFactory:
|
|
96
|
-
"""
|
|
97
|
-
component_from_app takes in a TorchX component/AppDef and returns a KFP
|
|
98
|
-
ContainerOp factory. This is equivalent to the
|
|
99
|
-
`kfp.components.load_component_from_*
|
|
100
|
-
<https://kubeflow-pipelines.readthedocs.io/en/1.8.22/source/kfp.components.html#kfp.components.load_component_from_text>`_
|
|
101
|
-
methods.
|
|
102
|
-
|
|
103
|
-
Args:
|
|
104
|
-
app: The AppDef to generate a KFP container factory for.
|
|
105
|
-
ui_metadata: KFP UI Metadata to output so you can have model results show
|
|
106
|
-
up in the UI. See
|
|
107
|
-
https://www.kubeflow.org/docs/components/pipelines/sdk/output-viewer/
|
|
108
|
-
for more info on the format.
|
|
109
|
-
|
|
110
|
-
>>> from torchx import specs
|
|
111
|
-
>>> from torchx.pipelines.kfp.adapter import component_from_app
|
|
112
|
-
>>> app_def = specs.AppDef(
|
|
113
|
-
... name="trainer",
|
|
114
|
-
... roles=[specs.Role("trainer", image="foo:latest")],
|
|
115
|
-
... )
|
|
116
|
-
>>> component_from_app(app_def)
|
|
117
|
-
<function component_from_app...>
|
|
118
|
-
"""
|
|
119
|
-
|
|
120
|
-
role_spec: api.Role
|
|
121
|
-
spec, role_spec = component_spec_from_app(app)
|
|
122
|
-
resources: api.Resource = role_spec.resource
|
|
123
|
-
assert (
|
|
124
|
-
len(resources.capabilities) == 0
|
|
125
|
-
), f"KFP doesn't support capabilities, got {resources.capabilities}"
|
|
126
|
-
component_factory: KFPContainerFactory = components.load_component_from_text(spec)
|
|
127
|
-
|
|
128
|
-
if ui_metadata is not None:
|
|
129
|
-
# pyre-fixme[16]: `ComponentSpec` has no attribute `outputs`
|
|
130
|
-
component_factory.component_spec.outputs.append(
|
|
131
|
-
OutputSpec(
|
|
132
|
-
name="mlpipeline-ui-metadata",
|
|
133
|
-
type="MLPipeline UI Metadata",
|
|
134
|
-
description="ui metadata",
|
|
135
|
-
)
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
def factory_wrapper(*args: object, **kwargs: object) -> dsl.ContainerOp:
|
|
139
|
-
c = component_factory(*args, **kwargs)
|
|
140
|
-
container = c.container
|
|
141
|
-
|
|
142
|
-
if ui_metadata is not None:
|
|
143
|
-
# We generate the UI metadata from the sidecar so we need to make
|
|
144
|
-
# both the container and the sidecar share the same tmp directory so
|
|
145
|
-
# the outputs appear in the original container.
|
|
146
|
-
c.add_volume(V1Volume(name="tmp", empty_dir=V1EmptyDirVolumeSource()))
|
|
147
|
-
container.add_volume_mount(
|
|
148
|
-
V1VolumeMount(
|
|
149
|
-
name="tmp",
|
|
150
|
-
mount_path="/tmp/",
|
|
151
|
-
)
|
|
152
|
-
)
|
|
153
|
-
c.output_artifact_paths["mlpipeline-ui-metadata"] = METADATA_FILE
|
|
154
|
-
c.add_sidecar(_ui_metadata_sidecar(ui_metadata))
|
|
155
|
-
|
|
156
|
-
cpu = resources.cpu
|
|
157
|
-
if cpu >= 0:
|
|
158
|
-
cpu_str = f"{int(cpu*1000)}m"
|
|
159
|
-
container.set_cpu_request(cpu_str)
|
|
160
|
-
container.set_cpu_limit(cpu_str)
|
|
161
|
-
mem = resources.memMB
|
|
162
|
-
if mem >= 0:
|
|
163
|
-
mem_str = f"{int(mem)}M"
|
|
164
|
-
container.set_memory_request(mem_str)
|
|
165
|
-
container.set_memory_limit(mem_str)
|
|
166
|
-
gpu = resources.gpu
|
|
167
|
-
if gpu > 0:
|
|
168
|
-
container.set_gpu_limit(str(gpu))
|
|
169
|
-
|
|
170
|
-
for name, port in role_spec.port_map.items():
|
|
171
|
-
container.add_port(
|
|
172
|
-
V1ContainerPort(
|
|
173
|
-
name=name,
|
|
174
|
-
container_port=port,
|
|
175
|
-
),
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
c.pod_labels.update(pod_labels(app, 0, role_spec, 0, app.name))
|
|
179
|
-
|
|
180
|
-
return c
|
|
181
|
-
|
|
182
|
-
return factory_wrapper
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
def _ui_metadata_sidecar(
|
|
186
|
-
ui_metadata: Mapping[str, object], image: str = "alpine"
|
|
187
|
-
) -> dsl.Sidecar:
|
|
188
|
-
shell_encoded = shlex.quote(json.dumps(ui_metadata))
|
|
189
|
-
dirname = os.path.dirname(METADATA_FILE)
|
|
190
|
-
return dsl.Sidecar(
|
|
191
|
-
name="ui-metadata-sidecar",
|
|
192
|
-
image=image,
|
|
193
|
-
command=[
|
|
194
|
-
"sh",
|
|
195
|
-
"-c",
|
|
196
|
-
f"mkdir -p {dirname}; echo {shell_encoded} > {METADATA_FILE}",
|
|
197
|
-
],
|
|
198
|
-
mirror_volume_mounts=True,
|
|
199
|
-
)
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
def container_from_app(
|
|
203
|
-
app: api.AppDef,
|
|
204
|
-
*args: object,
|
|
205
|
-
ui_metadata: Optional[Mapping[str, object]] = None,
|
|
206
|
-
**kwargs: object,
|
|
207
|
-
) -> dsl.ContainerOp:
|
|
208
|
-
"""
|
|
209
|
-
container_from_app transforms the app into a KFP component and returns a
|
|
210
|
-
corresponding ContainerOp instance.
|
|
211
|
-
|
|
212
|
-
See component_from_app for description on the arguments. Any unspecified
|
|
213
|
-
arguments are passed through to the KFP container factory method.
|
|
214
|
-
|
|
215
|
-
>>> import kfp
|
|
216
|
-
>>> from torchx import specs
|
|
217
|
-
>>> from torchx.pipelines.kfp.adapter import container_from_app
|
|
218
|
-
>>> app_def = specs.AppDef(
|
|
219
|
-
... name="trainer",
|
|
220
|
-
... roles=[specs.Role("trainer", image="foo:latest")],
|
|
221
|
-
... )
|
|
222
|
-
>>> def pipeline():
|
|
223
|
-
... trainer = container_from_app(app_def)
|
|
224
|
-
... print(trainer)
|
|
225
|
-
>>> kfp.compiler.Compiler().compile(
|
|
226
|
-
... pipeline_func=pipeline,
|
|
227
|
-
... package_path="/tmp/pipeline.yaml",
|
|
228
|
-
... )
|
|
229
|
-
{'ContainerOp': {... 'name': 'trainer-trainer', ...}}
|
|
230
|
-
"""
|
|
231
|
-
factory = component_from_app(app, ui_metadata)
|
|
232
|
-
return factory(*args, **kwargs)
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
def resource_from_app(
|
|
236
|
-
app: api.AppDef,
|
|
237
|
-
queue: str,
|
|
238
|
-
service_account: Optional[str] = None,
|
|
239
|
-
) -> dsl.ResourceOp:
|
|
240
|
-
"""
|
|
241
|
-
resource_from_app generates a KFP ResourceOp from the provided app that uses
|
|
242
|
-
the Volcano job scheduler on Kubernetes to run distributed apps. See
|
|
243
|
-
https://volcano.sh/en/docs/ for more info on Volcano and how to install.
|
|
244
|
-
|
|
245
|
-
Args:
|
|
246
|
-
app: The torchx AppDef to adapt.
|
|
247
|
-
queue: the Volcano queue to schedule the operator in.
|
|
248
|
-
|
|
249
|
-
>>> import kfp
|
|
250
|
-
>>> from torchx import specs
|
|
251
|
-
>>> from torchx.pipelines.kfp.adapter import resource_from_app
|
|
252
|
-
>>> app_def = specs.AppDef(
|
|
253
|
-
... name="trainer",
|
|
254
|
-
... roles=[specs.Role("trainer", image="foo:latest", num_replicas=3)],
|
|
255
|
-
... )
|
|
256
|
-
>>> def pipeline():
|
|
257
|
-
... trainer = resource_from_app(app_def, queue="test")
|
|
258
|
-
... print(trainer)
|
|
259
|
-
>>> kfp.compiler.Compiler().compile(
|
|
260
|
-
... pipeline_func=pipeline,
|
|
261
|
-
... package_path="/tmp/pipeline.yaml",
|
|
262
|
-
... )
|
|
263
|
-
{'ResourceOp': {... 'name': 'trainer-0', ... 'name': 'trainer-1', ... 'name': 'trainer-2', ...}}
|
|
264
|
-
"""
|
|
265
|
-
return dsl.ResourceOp(
|
|
266
|
-
name=app.name,
|
|
267
|
-
action="create",
|
|
268
|
-
success_condition="status.state.phase = Completed",
|
|
269
|
-
failure_condition="status.state.phase = Failed",
|
|
270
|
-
k8s_resource=app_to_resource(app, queue, service_account=service_account),
|
|
271
|
-
)
|
torchx/pipelines/kfp/version.py
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
-
# All rights reserved.
|
|
4
|
-
#
|
|
5
|
-
# This source code is licensed under the BSD-style license found in the
|
|
6
|
-
# LICENSE file in the root directory of this source tree.
|
|
7
|
-
|
|
8
|
-
# Follows PEP-0440 version scheme guidelines
|
|
9
|
-
# https://www.python.org/dev/peps/pep-0440/#version-scheme
|
|
10
|
-
#
|
|
11
|
-
# Examples:
|
|
12
|
-
# 0.1.0.devN # Developmental release
|
|
13
|
-
# 0.1.0aN # Alpha release
|
|
14
|
-
# 0.1.0bN # Beta release
|
|
15
|
-
# 0.1.0rcN # Release Candidate
|
|
16
|
-
# 0.1.0 # Final release
|
|
17
|
-
__version__ = "0.1.0.dev0"
|