torchx-nightly 2025.9.17__py3-none-any.whl → 2025.9.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchx-nightly might be problematic. Click here for more details.

@@ -298,13 +298,6 @@ imagine the component is defined as:
298
298
  * ``*args=["--help"]``: ``torchx run comp.py:f -- --help``
299
299
  * ``*args=["--i", "2"]``: ``torchx run comp.py:f --i 1 -- --i 2``
300
300
 
301
- Run in a Pipeline
302
- --------------------------------
303
-
304
- The :ref:`torchx.pipelines<pipelines:torchx.pipelines>` define adapters that
305
- convert a torchx component into the object that represents a pipeline "stage" in the
306
- target pipeline platform (see :ref:`Pipelines` for a list of supported pipeline orchestrators).
307
-
308
301
  Additional Resources
309
302
  -----------------------
310
303
 
@@ -22,7 +22,6 @@ DEFAULT_SCHEDULER_MODULES: Mapping[str, str] = {
22
22
  "aws_batch": "torchx.schedulers.aws_batch_scheduler",
23
23
  "aws_sagemaker": "torchx.schedulers.aws_sagemaker_scheduler",
24
24
  "gcp_batch": "torchx.schedulers.gcp_batch_scheduler",
25
- "ray": "torchx.schedulers.ray_scheduler",
26
25
  "lsf": "torchx.schedulers.lsf_scheduler",
27
26
  }
28
27
 
torchx/specs/__init__.py CHANGED
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env python3
2
1
  # Copyright (c) Meta Platforms, Inc. and affiliates.
3
2
  # All rights reserved.
4
3
  #
@@ -52,14 +51,19 @@ from torchx.util.entrypoints import load_group
52
51
 
53
52
  from torchx.util.modules import import_attr
54
53
 
55
- AWS_NAMED_RESOURCES: Mapping[str, Callable[[], Resource]] = import_attr(
54
+ GiB: int = 1024
55
+
56
+ ResourceFactory = Callable[[], Resource]
57
+
58
+ AWS_NAMED_RESOURCES: Mapping[str, ResourceFactory] = import_attr(
56
59
  "torchx.specs.named_resources_aws", "NAMED_RESOURCES", default={}
57
60
  )
58
- GENERIC_NAMED_RESOURCES: Mapping[str, Callable[[], Resource]] = import_attr(
61
+ GENERIC_NAMED_RESOURCES: Mapping[str, ResourceFactory] = import_attr(
59
62
  "torchx.specs.named_resources_generic", "NAMED_RESOURCES", default={}
60
63
  )
61
-
62
- GiB: int = 1024
64
+ FB_NAMED_RESOURCES: Mapping[str, ResourceFactory] = import_attr(
65
+ "torchx.specs.fb.named_resources", "NAMED_RESOURCES", default={}
66
+ )
63
67
 
64
68
 
65
69
  def _load_named_resources() -> Dict[str, Callable[[], Resource]]:
@@ -69,6 +73,7 @@ def _load_named_resources() -> Dict[str, Callable[[], Resource]]:
69
73
  for name, resource in {
70
74
  **GENERIC_NAMED_RESOURCES,
71
75
  **AWS_NAMED_RESOURCES,
76
+ **FB_NAMED_RESOURCES,
72
77
  **resource_methods,
73
78
  }.items():
74
79
  materialized_resources[name] = resource
torchx/specs/api.py CHANGED
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env python3
2
1
  # Copyright (c) Meta Platforms, Inc. and affiliates.
3
2
  # All rights reserved.
4
3
  #
@@ -83,6 +82,8 @@ class Resource:
83
82
  memMB: MB of ram
84
83
  capabilities: additional hardware specs (interpreted by scheduler)
85
84
  devices: a list of named devices with their quantities
85
+ tags: metadata tags for the resource (not interpreted by schedulers)
86
+ used to add non-functional information about resources (e.g. whether it is an alias of another resource)
86
87
 
87
88
  Note: you should prefer to use named_resources instead of specifying the raw
88
89
  resource requirement directly.
@@ -93,6 +94,7 @@ class Resource:
93
94
  memMB: int
94
95
  capabilities: Dict[str, Any] = field(default_factory=dict)
95
96
  devices: Dict[str, int] = field(default_factory=dict)
97
+ tags: Dict[str, object] = field(default_factory=dict)
96
98
 
97
99
  @staticmethod
98
100
  def copy(original: "Resource", **capabilities: Any) -> "Resource":
@@ -101,6 +103,7 @@ class Resource:
101
103
  are present in the original resource and as parameter, the one from parameter
102
104
  will be used.
103
105
  """
106
+
104
107
  res_capabilities = dict(original.capabilities)
105
108
  res_capabilities.update(capabilities)
106
109
  return Resource(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: torchx-nightly
3
- Version: 2025.9.17
3
+ Version: 2025.9.19
4
4
  Summary: TorchX SDK and Components
5
5
  Home-page: https://github.com/pytorch/torchx
6
6
  Author: TorchX Devs
@@ -41,8 +41,6 @@ Requires-Dist: google-cloud-logging==3.10.0; extra == "dev"
41
41
  Requires-Dist: google-cloud-runtimeconfig==0.34.0; extra == "dev"
42
42
  Requires-Dist: hydra-core; extra == "dev"
43
43
  Requires-Dist: ipython; extra == "dev"
44
- Requires-Dist: kfp==1.8.22; extra == "dev"
45
- Requires-Dist: protobuf==3.20.3; extra == "dev"
46
44
  Requires-Dist: mlflow-skinny; extra == "dev"
47
45
  Requires-Dist: moto~=5.0.8; extra == "dev"
48
46
  Requires-Dist: pyre-extensions; extra == "dev"
@@ -60,7 +58,6 @@ Requires-Dist: torchtext==0.18.0; extra == "dev"
60
58
  Requires-Dist: torchvision==0.23.0; extra == "dev"
61
59
  Requires-Dist: typing-extensions; extra == "dev"
62
60
  Requires-Dist: ts==0.5.1; extra == "dev"
63
- Requires-Dist: ray[default]; extra == "dev"
64
61
  Requires-Dist: wheel; extra == "dev"
65
62
  Requires-Dist: lintrunner; extra == "dev"
66
63
  Requires-Dist: lintrunner-adapters; extra == "dev"
@@ -72,12 +69,8 @@ Provides-Extra: gcp_batch
72
69
  Requires-Dist: google-cloud-batch>=0.5.0; extra == "gcp-batch"
73
70
  Requires-Dist: google-cloud-logging>=3.0.0; extra == "gcp-batch"
74
71
  Requires-Dist: google-cloud-runtimeconfig>=0.33.2; extra == "gcp-batch"
75
- Provides-Extra: kfp
76
- Requires-Dist: kfp==1.6.2; extra == "kfp"
77
72
  Provides-Extra: kubernetes
78
73
  Requires-Dist: kubernetes>=11; extra == "kubernetes"
79
- Provides-Extra: ray
80
- Requires-Dist: ray>=1.12.1; extra == "ray"
81
74
 
82
75
  [![PyPI](https://img.shields.io/pypi/v/torchx)](https://pypi.org/project/torchx/)
83
76
  [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://github.com/pytorch/torchx/blob/main/LICENSE)
@@ -100,7 +93,6 @@ TorchX currently supports:
100
93
  * AWS Batch
101
94
  * Docker
102
95
  * Local
103
- * Ray (prototype)
104
96
  * GCP Batch (prototype)
105
97
 
106
98
  Need a scheduler not listed? [Let us know!](https://github.com/pytorch/torchx/issues?q=is%3Aopen+is%3Aissue+label%3Ascheduler-request)
@@ -136,15 +128,9 @@ pip install torchx
136
128
  # install torchx sdk and CLI -- all dependencies
137
129
  pip install "torchx[dev]"
138
130
 
139
- # install torchx kubeflow pipelines (kfp) support
140
- pip install "torchx[kfp]"
141
-
142
131
  # install torchx Kubernetes / Volcano support
143
132
  pip install "torchx[kubernetes]"
144
133
 
145
- # install torchx Ray support
146
- pip install "torchx[ray]"
147
-
148
134
  # install torchx GCP Batch support
149
135
  pip install "torchx[gcp_batch]"
150
136
  ```
@@ -22,7 +22,7 @@ torchx/cli/cmd_status.py,sha256=22IAEmKs0qkG6kJi83u9dRX2Q-ntT7yehVx7FxtY-vQ,2114
22
22
  torchx/cli/cmd_tracker.py,sha256=RfLxE4Cq1wfk7k051RtZ8RPJp0pEKSCa3KmTeRs3LF8,5218
23
23
  torchx/cli/colors.py,sha256=yLMes7e_UoLAfhxE0W6edhc58t83UHAlnCN2ANPeuXw,568
24
24
  torchx/cli/main.py,sha256=1Jf2cnO6Y2W69Adt88avmNPVrL6ZR4Hkff6GVB4293k,3484
25
- torchx/components/__init__.py,sha256=6Sb8RWRGObajkH7eFSKv5bHaN5bzTqJiSEmrIIo3OIc,12121
25
+ torchx/components/__init__.py,sha256=YT2D_w4df1TN_Soek6vhk6fv8WFxT-_mgs6SINrP0BI,11810
26
26
  torchx/components/component_test_base.py,sha256=22iNSdVa_qTW3SMM30Pw5UEWlK4DZVw0C03EqYiaLOI,4150
27
27
  torchx/components/dist.py,sha256=9jECk3jjQ4Yh4oWDK8vnQ7kcI0-OWCbbwj8uvBdI9FU,14588
28
28
  torchx/components/interpret.py,sha256=g8gkKdDJvsBfX1ZrpVT7n2bMEtmwRV_1AqDyAnnQ_aA,697
@@ -46,15 +46,7 @@ torchx/examples/apps/lightning/interpret.py,sha256=Hd3kE5a6FyhxCmJBfTzb4Tlj518zh
46
46
  torchx/examples/apps/lightning/model.py,sha256=4CgObWfANqDN9emYSdmCpbRe_V_Lef_Hd3M-yayDbZE,4045
47
47
  torchx/examples/apps/lightning/profiler.py,sha256=SSSihnwjeUTkBoz0E3qn1b-wbkfUIowscx2ND_37zyw,1915
48
48
  torchx/examples/apps/lightning/train.py,sha256=0wvvshGHvZowePB4LfclXwn40X7i9euM0ReETWBcPSo,6253
49
- torchx/examples/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- torchx/examples/pipelines/kfp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- torchx/examples/pipelines/kfp/advanced_pipeline.py,sha256=U5N_XmpxbuEIh-hDayjJ5Lnk2lYvmgr7oznFnsKUk5g,8431
52
- torchx/examples/pipelines/kfp/dist_pipeline.py,sha256=xFn59P1S22o2zOJ2LhlIkhjYH3le0zp2sLPNj5idTnE,2203
53
- torchx/examples/pipelines/kfp/intro_pipeline.py,sha256=oWdMHPLWf5nKRm0hS7psF0yUp8Tf7tfR-Sm3YuUCmWk,2776
54
49
  torchx/pipelines/__init__.py,sha256=2MbRVk5xwRjg-d2qPemeXpEhDsocMQumPQ53lsesZAI,606
55
- torchx/pipelines/kfp/__init__.py,sha256=8iJ8lql_fxwuk9VCYSxXnX6tPL228fB5mDZpOs-kpn4,736
56
- torchx/pipelines/kfp/adapter.py,sha256=5GeHULjb1kxG6wJtYVLpNkgdzUi4iYEaR42VFOwT6fY,9045
57
- torchx/pipelines/kfp/version.py,sha256=mYBxd6bm4MeR34D--xo-JLQ9wHeAl_ZQLwbItCf9tr0,539
58
50
  torchx/runner/__init__.py,sha256=x8Sz7s_tLxPgJgvWIhK4ju9BNZU61uBFywGwDY6CqJs,315
59
51
  torchx/runner/api.py,sha256=0kDyOgmAcq0X-bTWiBIqX3BJOCYSa5-TZ7o2Hrqxzdw,30053
60
52
  torchx/runner/config.py,sha256=20X-vveAJVjb1AjjDSC6x_BVcdrTj9_ZLt_CHTykiFo,18266
@@ -64,7 +56,7 @@ torchx/runner/events/handlers.py,sha256=ThHCIJW21BfBgB7b6ftyjASJmD1KdizpjuTtsyqn
64
56
  torchx/runtime/__init__.py,sha256=Wxje2BryzeQneFu5r6P9JJiEKG-_C9W1CcZ_JNrKT6g,593
65
57
  torchx/runtime/tracking/__init__.py,sha256=dYnAPnrXYREfPXkpHhdOFkcYIODWEbA13PdD-wLQYBo,3055
66
58
  torchx/runtime/tracking/api.py,sha256=SmUQyUKZqG3KlAhT7CJOGqRz1O274E4m63wQeOVq3CU,5472
67
- torchx/schedulers/__init__.py,sha256=igIBdxGhkuzH7oYVFXIA9xwjkSn3QzWZ_9dhfdl_M0I,2299
59
+ torchx/schedulers/__init__.py,sha256=hliMsZHZNOKue0uTHUWxvO0V7xsKApBxN4Wb_9L0Mz4,2253
68
60
  torchx/schedulers/api.py,sha256=lfxNhrEO6eYYqVuQzzj9sTXrZShuZkyYxJ1jPE-Lvpo,14561
69
61
  torchx/schedulers/aws_batch_scheduler.py,sha256=hFxYzSZEK2SVS5sEyQC5YvNI0JJUJUQsWORlYpj_h3M,28105
70
62
  torchx/schedulers/aws_sagemaker_scheduler.py,sha256=flN8GumKE2Dz4X_foAt6Jnvt-ZVojWs6pcyrHwB0hz0,20921
@@ -76,14 +68,10 @@ torchx/schedulers/kubernetes_mcad_scheduler.py,sha256=1tuzq3OutCMdSPqg_dNmCHt_wy
76
68
  torchx/schedulers/kubernetes_scheduler.py,sha256=0_loGJ7WnxEr9dhgFt3Gw-7nVLirMDVN-MAFTCq7erE,28217
77
69
  torchx/schedulers/local_scheduler.py,sha256=ttnxFDy48_DSYDEW-no27OirFZOyfrjwJ2S1MwBUi74,41929
78
70
  torchx/schedulers/lsf_scheduler.py,sha256=YS6Yel8tXJqLPxbcGz95lZG2nCi36AQXdNDyuBJePKg,17661
79
- torchx/schedulers/ray_scheduler.py,sha256=T-jsGSOa8O-h1kTUU7Q7Fk1RILL1Yzvuos_WFSQF8Fo,15795
80
71
  torchx/schedulers/slurm_scheduler.py,sha256=vZt102OxuTGj0ZE-V9dWbldtOyL2VbHcxADm_osL7Y4,31568
81
72
  torchx/schedulers/streams.py,sha256=8_SLezgnWgfv_zXUsJCUM34-h2dtv25NmZuxEwkzmxw,2007
82
- torchx/schedulers/ray/__init__.py,sha256=fE0IHi1JJpxsNVBNzWNee2thrNXFFRhY94c80RxNSIE,231
83
- torchx/schedulers/ray/ray_common.py,sha256=pyNYFvTKVwdjDAeCBNbPwAWwVNmlLOJWExfn90XY8u8,610
84
- torchx/schedulers/ray/ray_driver.py,sha256=RdaCLfth16ky-5PDVOWRe_RuheWJu9xufWux2F9T7iw,12302
85
- torchx/specs/__init__.py,sha256=Gw_2actqR_oWFtxEkGXCxGk_yrWK5JDZzwysyyqmXao,6438
86
- torchx/specs/api.py,sha256=wkhHOxeWH_tFO3npKqPhNg4VX2NH5gPIFEylkPBo3AU,41315
73
+ torchx/specs/__init__.py,sha256=HU7OoXs7aBBi0IenB49QIONRzoG1Ovs1Qlm9KnsvqfE,6609
74
+ torchx/specs/api.py,sha256=Rzv_Yx8yyseARbC928ZvqAZbvaXhJRDAbcyPsxiblF4,41543
87
75
  torchx/specs/builders.py,sha256=aozVl4q3h0mY5DDJCY1M1CyLC9SW66KJy8JIih8bZJo,13810
88
76
  torchx/specs/file_linter.py,sha256=6_aoeuS5d9UwXseKKfPgWNTwxj-f7G1i3uO9mQepti4,14402
89
77
  torchx/specs/finder.py,sha256=FcB6jQTNwnpc4OMV0F349kk0leu6O7JYjH_GW2d6GXE,17503
@@ -115,9 +103,9 @@ torchx/workspace/__init__.py,sha256=cZsKVvUWwDYcGhe6SCXQGBQfbk_yTnKEImOkI6xmu30,
115
103
  torchx/workspace/api.py,sha256=Ct_75VU94fsH9Rf1WRe-wJGpVgl5O05S_Dq_t2ArJWA,11348
116
104
  torchx/workspace/dir_workspace.py,sha256=npNW_IjUZm_yS5r-8hrRkH46ndDd9a_eApT64m1S1T4,2268
117
105
  torchx/workspace/docker_workspace.py,sha256=PFu2KQNVC-0p2aKJ-W_BKA9ZOmXdCY2ABEkCExp3udQ,10269
118
- torchx_nightly-2025.9.17.dist-info/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
119
- torchx_nightly-2025.9.17.dist-info/METADATA,sha256=yrw7Nu_XLhDmm0HDXN59bbVIADlRARi0yOUl6VnJnyU,6104
120
- torchx_nightly-2025.9.17.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
121
- torchx_nightly-2025.9.17.dist-info/entry_points.txt,sha256=T328AMXeKI3JZnnxfkEew2ZcMN1oQDtkXjMz7lkV-P4,169
122
- torchx_nightly-2025.9.17.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
123
- torchx_nightly-2025.9.17.dist-info/RECORD,,
106
+ torchx_nightly-2025.9.19.dist-info/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
107
+ torchx_nightly-2025.9.19.dist-info/METADATA,sha256=4S3QpZtzHIUqC2VdZK_czPBhOWnQHdbTvjhFyAdE94s,5693
108
+ torchx_nightly-2025.9.19.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
109
+ torchx_nightly-2025.9.19.dist-info/entry_points.txt,sha256=T328AMXeKI3JZnnxfkEew2ZcMN1oQDtkXjMz7lkV-P4,169
110
+ torchx_nightly-2025.9.19.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
111
+ torchx_nightly-2025.9.19.dist-info/RECORD,,
File without changes
File without changes
@@ -1,289 +0,0 @@
1
- #!/usr/bin/env python3
2
- # Copyright (c) Meta Platforms, Inc. and affiliates.
3
- # All rights reserved.
4
- #
5
- # This source code is licensed under the BSD-style license found in the
6
- # LICENSE file in the root directory of this source tree.
7
-
8
- # pyre-strict
9
-
10
- """
11
- Advanced KubeFlow Pipelines Example
12
- ===================================
13
-
14
- This is an example pipeline using KubeFlow Pipelines built with only TorchX
15
- components.
16
-
17
- KFP adapters can be used transform the TorchX components directly into
18
- something that can be used within KFP.
19
- """
20
-
21
- # %%
22
- # Input Arguments
23
- # ###############
24
- # Lets first define some arguments for the pipeline.
25
-
26
- import argparse
27
- import os.path
28
- import sys
29
- from typing import Dict
30
-
31
- import kfp
32
- import torchx
33
- from torchx import specs
34
- from torchx.components.dist import ddp as dist_ddp
35
- from torchx.components.serve import torchserve
36
- from torchx.components.utils import copy as utils_copy, python as utils_python
37
- from torchx.pipelines.kfp.adapter import container_from_app
38
-
39
-
40
- parser = argparse.ArgumentParser(description="example kfp pipeline")
41
-
42
- # %%
43
- # TorchX components are built around images. Depending on what scheduler
44
- # you're using this can vary but for KFP these images are specified as
45
- # docker containers. We have one container for the example apps and one for
46
- # the standard built in apps. If you modify the torchx example code you'll
47
- # need to rebuild the container before launching it on KFP
48
-
49
-
50
- parser.add_argument(
51
- "--image",
52
- type=str,
53
- help="docker image to use for the examples apps",
54
- default=torchx.IMAGE,
55
- )
56
-
57
- # %%
58
- # Most TorchX components use
59
- # `fsspec <https://filesystem-spec.readthedocs.io/en/latest/>`_ to abstract
60
- # away dealing with remote filesystems. This allows the components to take
61
- # paths like ``s3://`` to make it easy to use cloud storage providers.
62
- parser.add_argument(
63
- "--output_path",
64
- type=str,
65
- help="path to place the data",
66
- required=True,
67
- )
68
- parser.add_argument("--load_path", type=str, help="checkpoint path to load from")
69
-
70
- # %%
71
- # This example uses the torchserve for inference so we need to specify some
72
- # options. This assumes you have a TorchServe instance running in the same
73
- # Kubernetes cluster with with the service name ``torchserve`` in the default
74
- # namespace.
75
- #
76
- # See https://github.com/pytorch/serve/blob/master/kubernetes/README.md for info
77
- # on how to setup TorchServe.
78
- parser.add_argument(
79
- "--management_api",
80
- type=str,
81
- help="path to the torchserve management API",
82
- default="http://torchserve.default.svc.cluster.local:8081",
83
- )
84
- parser.add_argument(
85
- "--model_name",
86
- type=str,
87
- help="the name of the inference model",
88
- default="tiny_image_net",
89
- )
90
-
91
- # %% Parse the arguments, you'll need to set these accordingly if running from a
92
- # notebook.
93
-
94
-
95
- if "NOTEBOOK" in globals():
96
- argv = [
97
- "--output_path",
98
- "/tmp/output",
99
- ]
100
- else:
101
- argv = sys.argv[1:]
102
-
103
- args: argparse.Namespace = parser.parse_args(argv)
104
-
105
- # %%
106
- # Creating the Components
107
- # #######################
108
- # The first step is downloading the data to somewhere we can work on it. For
109
- # this we can just the builtin copy component. This component takes two valid
110
- # fsspec paths and copies them from one to another. In this case we're using
111
- # http as the source and a file under the output_path as the output.
112
-
113
-
114
- data_path: str = os.path.join(args.output_path, "tiny-imagenet-200.zip")
115
- copy_app: specs.AppDef = utils_copy(
116
- "http://cs231n.stanford.edu/tiny-imagenet-200.zip",
117
- data_path,
118
- image=args.image,
119
- )
120
-
121
- # %%
122
- # The next component is for data preprocessing. This takes in the raw data from
123
- # the previous operator and runs some transforms on it for use with the trainer.
124
- #
125
- # datapreproc outputs the data to a specified fsspec path. These paths are all
126
- # specified ahead of time so we have a fully static pipeline.
127
-
128
-
129
- processed_data_path: str = os.path.join(args.output_path, "processed")
130
- datapreproc_app: specs.AppDef = utils_python(
131
- "--output_path",
132
- processed_data_path,
133
- "--input_path",
134
- data_path,
135
- "--limit",
136
- "100",
137
- image=args.image,
138
- m="torchx.examples.apps.datapreproc.datapreproc",
139
- cpu=1,
140
- memMB=1024,
141
- )
142
-
143
- # %%
144
- # Next we'll create the trainer component that takes in the training data from the
145
- # previous datapreproc component. We've defined this in a separate component
146
- # file as you normally would.
147
- #
148
- # Having a separate component file allows you to launch your trainer from the
149
- # TorchX CLI via ``torchx run`` for fast iteration as well as run it from a
150
- # pipeline in an automated fashion.
151
-
152
- # make sure examples is on the path
153
- if "__file__" in globals():
154
- sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
155
-
156
-
157
- logs_path: str = os.path.join(args.output_path, "logs")
158
- models_path: str = os.path.join(args.output_path, "models")
159
-
160
- trainer_app: specs.AppDef = dist_ddp(
161
- *(
162
- "--output_path",
163
- models_path,
164
- "--load_path",
165
- args.load_path or "",
166
- "--log_path",
167
- logs_path,
168
- "--data_path",
169
- processed_data_path,
170
- "--epochs",
171
- str(1),
172
- ),
173
- image=args.image,
174
- m="torchx.examples.apps.lightning.train",
175
- j="1x1",
176
- # per node resource settings
177
- cpu=1,
178
- memMB=3000,
179
- )
180
-
181
- # %%
182
- # To have the tensorboard path show up in KFPs UI we need to some metadata so
183
- # KFP knows where to consume the metrics from.
184
- #
185
- # This will get used when we create the KFP container.
186
-
187
-
188
- ui_metadata: Dict[str, object] = {
189
- "outputs": [
190
- {
191
- "type": "tensorboard",
192
- "source": os.path.join(logs_path, "lightning_logs"),
193
- }
194
- ]
195
- }
196
-
197
- # %%
198
- # For the inference, we're leveraging one of the builtin TorchX components. This
199
- # component takes in a model and uploads it to the TorchServe management API
200
- # endpoints.
201
-
202
-
203
- serve_app: specs.AppDef = torchserve(
204
- model_path=os.path.join(models_path, "model.mar"),
205
- management_api=args.management_api,
206
- image=args.image,
207
- params={
208
- "model_name": args.model_name,
209
- # set this to allocate a worker
210
- # "initial_workers": 1,
211
- },
212
- )
213
-
214
- # %%
215
- # For model interpretability we're leveraging a custom component stored in it's
216
- # own component file. This component takes in the output from datapreproc and
217
- # train components and produces images with integrated gradient results.
218
-
219
- interpret_path: str = os.path.join(args.output_path, "interpret")
220
- interpret_app: specs.AppDef = utils_python(
221
- *(
222
- "--load_path",
223
- os.path.join(models_path, "last.ckpt"),
224
- "--data_path",
225
- processed_data_path,
226
- "--output_path",
227
- interpret_path,
228
- ),
229
- image=args.image,
230
- m="torchx.examples.apps.lightning.interpret",
231
- )
232
-
233
- # %%
234
- # Pipeline Definition
235
- # ###################
236
- # The last step is to define the actual pipeline using the torchx components via
237
- # the KFP adapter and export the pipeline package that can be uploaded to a KFP
238
- # cluster.
239
- #
240
- # The KFP adapter currently doesn't track the input and outputs so the
241
- # containers need to have their dependencies specified via `.after()`.
242
- #
243
- # We call `.set_tty()` to make the logs from the components more responsive for
244
- # example purposes.
245
-
246
-
247
- def pipeline() -> None:
248
- # container_from_app creates a KFP container from the TorchX app
249
- # definition.
250
- copy = container_from_app(copy_app)
251
- copy.container.set_tty()
252
-
253
- datapreproc = container_from_app(datapreproc_app)
254
- datapreproc.container.set_tty()
255
- datapreproc.after(copy)
256
-
257
- # For the trainer we want to log that UI metadata so you can access
258
- # tensorboard from the UI.
259
- trainer = container_from_app(trainer_app, ui_metadata=ui_metadata)
260
- trainer.container.set_tty()
261
- trainer.after(datapreproc)
262
-
263
- if False:
264
- serve = container_from_app(serve_app)
265
- serve.container.set_tty()
266
- serve.after(trainer)
267
-
268
- if False:
269
- # Serve and interpret only require the trained model so we can run them
270
- # in parallel to each other.
271
- interpret = container_from_app(interpret_app)
272
- interpret.container.set_tty()
273
- interpret.after(trainer)
274
-
275
-
276
- kfp.compiler.Compiler().compile(
277
- pipeline_func=pipeline,
278
- package_path="pipeline.yaml",
279
- )
280
-
281
- with open("pipeline.yaml", "rt") as f:
282
- print(f.read())
283
-
284
- # %%
285
- # Once this has all run you should have a pipeline file (typically
286
- # pipeline.yaml) that you can upload to your KFP cluster via the UI or
287
- # a kfp.Client.
288
-
289
- # sphinx_gallery_thumbnail_path = '_static/img/gallery-kfp.png'
@@ -1,71 +0,0 @@
1
- #!/usr/bin/env python3
2
- # Copyright (c) Meta Platforms, Inc. and affiliates.
3
- # All rights reserved.
4
- #
5
- # This source code is licensed under the BSD-style license found in the
6
- # LICENSE file in the root directory of this source tree.
7
-
8
- # pyre-strict
9
-
10
- """
11
- Distributed KubeFlow Pipelines Example
12
- ======================================
13
-
14
- This is an example KFP pipeline that uses resource_from_app to launch a
15
- distributed operator using the kubernetes/volcano job scheduler. This only works
16
- in Kubernetes KFP clusters with https://volcano.sh/en/docs/ installed on them.
17
- """
18
-
19
- import kfp
20
- from torchx import specs
21
- from torchx.pipelines.kfp.adapter import resource_from_app
22
-
23
-
24
- def pipeline() -> None:
25
- # First we define our AppDef for the component, we set
26
- echo_app = specs.AppDef(
27
- name="test-dist",
28
- roles=[
29
- specs.Role(
30
- name="dist-echo",
31
- image="alpine",
32
- entrypoint="/bin/echo",
33
- args=["hello dist!"],
34
- num_replicas=3,
35
- ),
36
- ],
37
- )
38
-
39
- # To convert the TorchX AppDef into a KFP container we use
40
- # the resource_from_app adapter. This takes generates a KFP Kubernetes
41
- # resource operator definition from the TorchX app def and instantiates it.
42
- echo_container: kfp.dsl.BaseOp = resource_from_app(echo_app, queue="default")
43
-
44
-
45
- # %%
46
- # To generate the pipeline definition file we need to call into the KFP compiler
47
- # with our pipeline function.
48
-
49
- kfp.compiler.Compiler().compile(
50
- pipeline_func=pipeline,
51
- package_path="pipeline.yaml",
52
- )
53
-
54
- with open("pipeline.yaml", "rt") as f:
55
- print(f.read())
56
-
57
- # %%
58
- # Once this has all run you should have a pipeline file (typically
59
- # pipeline.yaml) that you can upload to your KFP cluster via the UI or
60
- # a kfp.Client.
61
- #
62
- # See the
63
- # `KFP SDK Examples <https://www.kubeflow.org/docs/components/pipelines/legacy-v1/tutorials/sdk-examples/#examples>`_
64
- # for more info on launching KFP pipelines.
65
-
66
- # %%
67
- # See the :ref:`examples_pipelines/kfp/advanced_pipeline:Advanced KubeFlow Pipelines Example` for how to chain multiple
68
- # components together and use builtin components.
69
-
70
-
71
- # sphinx_gallery_thumbnail_path = '_static/img/gallery-kfp.png'
@@ -1,83 +0,0 @@
1
- #!/usr/bin/env python3
2
- # Copyright (c) Meta Platforms, Inc. and affiliates.
3
- # All rights reserved.
4
- #
5
- # This source code is licensed under the BSD-style license found in the
6
- # LICENSE file in the root directory of this source tree.
7
-
8
- # pyre-strict
9
-
10
- """
11
- Intro KubeFlow Pipelines Example
12
- ================================
13
-
14
- This an introductory pipeline using KubeFlow Pipelines built with only TorchX
15
- components.
16
-
17
- TorchX is intended to allow making cross platform components. As such, we have
18
- a standard definition that uses adapters to convert it to the specific
19
- pipeline platform. This is an example of using the KFP adapter to run a TorchX
20
- component as part of a KubeFlow Pipeline.
21
-
22
- TorchX tries to leverage standard mechanisms wherever possible. For KFP we use
23
- the existing KFP pipeline definition syntax and add a single
24
- `component_from_app` conversion step to convert a TorchX component into one
25
- KFP can understand.
26
-
27
- Typically you have a separate component file but for this example we define the
28
- AppDef inline.
29
- """
30
-
31
- import kfp
32
- from torchx import specs
33
- from torchx.pipelines.kfp.adapter import container_from_app
34
-
35
-
36
- def pipeline() -> None:
37
- # First we define our AppDef for the component. AppDef is a core part of TorchX
38
- # and can be used to describe complex distributed multi container apps or
39
- # just a single node component like here.
40
- echo_app: specs.AppDef = specs.AppDef(
41
- name="examples-intro",
42
- roles=[
43
- specs.Role(
44
- name="worker",
45
- entrypoint="/bin/echo",
46
- args=["Hello TorchX!"],
47
- image="alpine",
48
- )
49
- ],
50
- )
51
-
52
- # To convert the TorchX AppDef into a KFP container we use
53
- # the container_from_app adapter. This takes generates a KFP component
54
- # definition from the TorchX app def and instantiates it into a container.
55
- echo_container: kfp.dsl.ContainerOp = container_from_app(echo_app)
56
-
57
-
58
- # %%
59
- # To generate the pipeline definition file we need to call into the KFP compiler
60
- # with our pipeline function.
61
-
62
- kfp.compiler.Compiler().compile(
63
- pipeline_func=pipeline,
64
- package_path="pipeline.yaml",
65
- )
66
-
67
- with open("pipeline.yaml", "rt") as f:
68
- print(f.read())
69
-
70
- # %%
71
- # Once this has all run you should have a pipeline file (typically
72
- # pipeline.yaml) that you can upload to your KFP cluster via the UI or
73
- # a kfp.Client.
74
- #
75
- # See the
76
- # `KFP SDK Examples <https://www.kubeflow.org/docs/components/pipelines/legacy-v1/tutorials/sdk-examples/#examples>`_
77
- # for more info on launching KFP pipelines.
78
-
79
- # %%
80
- # See the :ref:`examples_pipelines/kfp/advanced_pipeline:Advanced KubeFlow Pipelines Example` for how to chain multiple
81
- # components together and use builtin components.
82
-
83
- # sphinx_gallery_thumbnail_path = '_static/img/gallery-kfp.png'