torchx-nightly 2023.10.21__py3-none-any.whl → 2025.12.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchx-nightly might be problematic. Click here for more details.

Files changed (110) hide show
  1. torchx/__init__.py +2 -0
  2. torchx/{schedulers/ray/__init__.py → _version.py} +3 -1
  3. torchx/apps/serve/serve.py +2 -0
  4. torchx/apps/utils/booth_main.py +2 -0
  5. torchx/apps/utils/copy_main.py +2 -0
  6. torchx/apps/utils/process_monitor.py +2 -0
  7. torchx/cli/__init__.py +2 -0
  8. torchx/cli/argparse_util.py +38 -3
  9. torchx/cli/cmd_base.py +2 -0
  10. torchx/cli/cmd_cancel.py +2 -0
  11. torchx/cli/cmd_configure.py +2 -0
  12. torchx/cli/cmd_delete.py +30 -0
  13. torchx/cli/cmd_describe.py +2 -0
  14. torchx/cli/cmd_list.py +8 -4
  15. torchx/cli/cmd_log.py +6 -24
  16. torchx/cli/cmd_run.py +269 -45
  17. torchx/cli/cmd_runopts.py +2 -0
  18. torchx/cli/cmd_status.py +12 -1
  19. torchx/cli/cmd_tracker.py +3 -1
  20. torchx/cli/colors.py +2 -0
  21. torchx/cli/main.py +4 -0
  22. torchx/components/__init__.py +3 -8
  23. torchx/components/component_test_base.py +2 -0
  24. torchx/components/dist.py +18 -7
  25. torchx/components/integration_tests/component_provider.py +4 -2
  26. torchx/components/integration_tests/integ_tests.py +2 -0
  27. torchx/components/serve.py +2 -0
  28. torchx/components/structured_arg.py +7 -6
  29. torchx/components/utils.py +15 -4
  30. torchx/distributed/__init__.py +2 -4
  31. torchx/examples/apps/datapreproc/datapreproc.py +2 -0
  32. torchx/examples/apps/lightning/data.py +5 -3
  33. torchx/examples/apps/lightning/model.py +7 -6
  34. torchx/examples/apps/lightning/profiler.py +7 -4
  35. torchx/examples/apps/lightning/train.py +11 -2
  36. torchx/examples/torchx_out_of_sync_training.py +11 -0
  37. torchx/notebook.py +2 -0
  38. torchx/runner/__init__.py +2 -0
  39. torchx/runner/api.py +167 -60
  40. torchx/runner/config.py +43 -10
  41. torchx/runner/events/__init__.py +57 -13
  42. torchx/runner/events/api.py +14 -3
  43. torchx/runner/events/handlers.py +2 -0
  44. torchx/runtime/tracking/__init__.py +2 -0
  45. torchx/runtime/tracking/api.py +2 -0
  46. torchx/schedulers/__init__.py +16 -15
  47. torchx/schedulers/api.py +70 -14
  48. torchx/schedulers/aws_batch_scheduler.py +79 -5
  49. torchx/schedulers/aws_sagemaker_scheduler.py +598 -0
  50. torchx/schedulers/devices.py +17 -4
  51. torchx/schedulers/docker_scheduler.py +43 -11
  52. torchx/schedulers/ids.py +29 -23
  53. torchx/schedulers/kubernetes_mcad_scheduler.py +10 -8
  54. torchx/schedulers/kubernetes_scheduler.py +383 -38
  55. torchx/schedulers/local_scheduler.py +100 -27
  56. torchx/schedulers/lsf_scheduler.py +5 -4
  57. torchx/schedulers/slurm_scheduler.py +336 -20
  58. torchx/schedulers/streams.py +2 -0
  59. torchx/specs/__init__.py +89 -12
  60. torchx/specs/api.py +431 -32
  61. torchx/specs/builders.py +176 -38
  62. torchx/specs/file_linter.py +143 -57
  63. torchx/specs/finder.py +68 -28
  64. torchx/specs/named_resources_aws.py +254 -22
  65. torchx/specs/named_resources_generic.py +2 -0
  66. torchx/specs/overlays.py +106 -0
  67. torchx/specs/test/components/__init__.py +2 -0
  68. torchx/specs/test/components/a/__init__.py +2 -0
  69. torchx/specs/test/components/a/b/__init__.py +2 -0
  70. torchx/specs/test/components/a/b/c.py +2 -0
  71. torchx/specs/test/components/c/__init__.py +2 -0
  72. torchx/specs/test/components/c/d.py +2 -0
  73. torchx/tracker/__init__.py +12 -6
  74. torchx/tracker/api.py +15 -18
  75. torchx/tracker/backend/fsspec.py +2 -0
  76. torchx/util/cuda.py +2 -0
  77. torchx/util/datetime.py +2 -0
  78. torchx/util/entrypoints.py +39 -15
  79. torchx/util/io.py +2 -0
  80. torchx/util/log_tee_helpers.py +210 -0
  81. torchx/util/modules.py +65 -0
  82. torchx/util/session.py +42 -0
  83. torchx/util/shlex.py +2 -0
  84. torchx/util/strings.py +3 -1
  85. torchx/util/types.py +90 -29
  86. torchx/version.py +4 -2
  87. torchx/workspace/__init__.py +2 -0
  88. torchx/workspace/api.py +136 -6
  89. torchx/workspace/dir_workspace.py +2 -0
  90. torchx/workspace/docker_workspace.py +30 -2
  91. torchx_nightly-2025.12.24.dist-info/METADATA +167 -0
  92. torchx_nightly-2025.12.24.dist-info/RECORD +113 -0
  93. {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/WHEEL +1 -1
  94. {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/entry_points.txt +0 -1
  95. torchx/examples/pipelines/__init__.py +0 -0
  96. torchx/examples/pipelines/kfp/__init__.py +0 -0
  97. torchx/examples/pipelines/kfp/advanced_pipeline.py +0 -287
  98. torchx/examples/pipelines/kfp/dist_pipeline.py +0 -69
  99. torchx/examples/pipelines/kfp/intro_pipeline.py +0 -81
  100. torchx/pipelines/kfp/__init__.py +0 -28
  101. torchx/pipelines/kfp/adapter.py +0 -271
  102. torchx/pipelines/kfp/version.py +0 -17
  103. torchx/schedulers/gcp_batch_scheduler.py +0 -487
  104. torchx/schedulers/ray/ray_common.py +0 -22
  105. torchx/schedulers/ray/ray_driver.py +0 -307
  106. torchx/schedulers/ray_scheduler.py +0 -453
  107. torchx_nightly-2023.10.21.dist-info/METADATA +0 -174
  108. torchx_nightly-2023.10.21.dist-info/RECORD +0 -118
  109. {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info/licenses}/LICENSE +0 -0
  110. {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,167 @@
1
+ Metadata-Version: 2.4
2
+ Name: torchx-nightly
3
+ Version: 2025.12.24
4
+ Summary: TorchX SDK and Components
5
+ Home-page: https://github.com/meta-pytorch/torchx
6
+ Author: TorchX Devs
7
+ Author-email: torchx@fb.com
8
+ License: BSD-3
9
+ Keywords: pytorch,machine learning
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: BSD License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Requires-Python: >=3.7
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: docstring-parser>=0.8.1
21
+ Requires-Dist: pyyaml
22
+ Requires-Dist: docker
23
+ Requires-Dist: filelock
24
+ Requires-Dist: fsspec>=2023.10.0
25
+ Requires-Dist: tabulate
26
+ Provides-Extra: aws-batch
27
+ Requires-Dist: boto3; extra == "aws-batch"
28
+ Provides-Extra: kubernetes
29
+ Requires-Dist: kubernetes>=11; extra == "kubernetes"
30
+ Provides-Extra: dev
31
+ Requires-Dist: aiobotocore==2.20.0; extra == "dev"
32
+ Requires-Dist: ax-platform[mysql]==0.2.3; extra == "dev"
33
+ Requires-Dist: boto3==1.36.0; extra == "dev"
34
+ Requires-Dist: captum>=0.4.0; extra == "dev"
35
+ Requires-Dist: docker; extra == "dev"
36
+ Requires-Dist: kubernetes==25.3.0; extra == "dev"
37
+ Requires-Dist: flake8==3.9.0; extra == "dev"
38
+ Requires-Dist: fsspec==2024.3.1; extra == "dev"
39
+ Requires-Dist: s3fs==2024.3.1; extra == "dev"
40
+ Requires-Dist: hydra-core; extra == "dev"
41
+ Requires-Dist: ipython; extra == "dev"
42
+ Requires-Dist: mlflow-skinny; extra == "dev"
43
+ Requires-Dist: moto~=5.0.8; extra == "dev"
44
+ Requires-Dist: pyre-extensions; extra == "dev"
45
+ Requires-Dist: pyre-check; extra == "dev"
46
+ Requires-Dist: pytest; extra == "dev"
47
+ Requires-Dist: pytest-cov; extra == "dev"
48
+ Requires-Dist: pytorch-lightning==2.5.0; extra == "dev"
49
+ Requires-Dist: tensorboard==2.14.0; extra == "dev"
50
+ Requires-Dist: sagemaker==2.237.3; extra == "dev"
51
+ Requires-Dist: torch-model-archiver>=0.4.2; extra == "dev"
52
+ Requires-Dist: torch; extra == "dev"
53
+ Requires-Dist: torchmetrics==1.6.3; extra == "dev"
54
+ Requires-Dist: torchserve>=0.10.0; extra == "dev"
55
+ Requires-Dist: torchtext; extra == "dev"
56
+ Requires-Dist: torchvision; extra == "dev"
57
+ Requires-Dist: typing-extensions; extra == "dev"
58
+ Requires-Dist: ts==0.5.1; extra == "dev"
59
+ Requires-Dist: wheel; extra == "dev"
60
+ Requires-Dist: lintrunner; extra == "dev"
61
+ Requires-Dist: lintrunner-adapters; extra == "dev"
62
+ Dynamic: author
63
+ Dynamic: author-email
64
+ Dynamic: classifier
65
+ Dynamic: description
66
+ Dynamic: description-content-type
67
+ Dynamic: home-page
68
+ Dynamic: keywords
69
+ Dynamic: license
70
+ Dynamic: license-file
71
+ Dynamic: provides-extra
72
+ Dynamic: requires-dist
73
+ Dynamic: requires-python
74
+ Dynamic: summary
75
+
76
+ [![PyPI](https://img.shields.io/pypi/v/torchx)](https://pypi.org/project/torchx/)
77
+ [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://github.com/meta-pytorch/torchx/blob/main/LICENSE)
78
+ ![Tests](https://github.com/meta-pytorch/torchx/actions/workflows/python-unittests.yaml/badge.svg)
79
+ ![Lint](https://github.com/meta-pytorch/torchx/actions/workflows/lint.yaml/badge.svg)
80
+ [![codecov](https://codecov.io/gh/pytorch/torchx/branch/main/graph/badge.svg?token=ceHHIm0hXy)](https://codecov.io/gh/pytorch/torchx)
81
+
82
+
83
+ # TorchX
84
+
85
+
86
+ TorchX is a universal job launcher for PyTorch applications.
87
+ TorchX is designed to have fast iteration time for training/research and support
88
+ for E2E production ML pipelines when you're ready.
89
+
90
+ TorchX currently supports:
91
+
92
+ * Kubernetes (EKS, GKE, AKS, etc)
93
+ * Slurm
94
+ * AWS Batch
95
+ * Docker
96
+ * Local
97
+
98
+ Need a scheduler not listed? [Let us know!](https://github.com/meta-pytorch/torchx/issues?q=is%3Aopen+is%3Aissue+label%3Ascheduler-request)
99
+
100
+ ## Quickstart
101
+
102
+ See the [quickstart guide](https://meta-pytorch.org/torchx/latest/quickstart.html).
103
+
104
+ ## Documentation
105
+
106
+ * [Stable Documentation](https://meta-pytorch.org/torchx/latest/)
107
+ * [Nightly Documentation](https://meta-pytorch.org/torchx/main/)
108
+
109
+ ## Requirements
110
+
111
+ torchx:
112
+
113
+ * python3 (3.8+)
114
+ * [PyTorch](https://pytorch.org/get-started/locally/)
115
+ * optional: [Docker](https://docs.docker.com/get-docker/) (needed for docker based schedulers)
116
+
117
+ Certain schedulers may require scheduler specific requirements. See installation
118
+ for info.
119
+
120
+ ## Installation
121
+
122
+ ### Stable
123
+
124
+ ```bash
125
+ # install torchx sdk and CLI -- minimum dependencies
126
+ pip install torchx
127
+
128
+ # install torchx sdk and CLI -- all dependencies
129
+ pip install "torchx[dev]"
130
+
131
+ # install torchx Kubernetes / Volcano support
132
+ pip install "torchx[kubernetes]"
133
+
134
+ # install torchx GCP Batch support
135
+ pip install "torchx[gcp_batch]"
136
+ ```
137
+
138
+ ### Nightly
139
+
140
+ ```bash
141
+ # install torchx sdk and CLI
142
+ pip install torchx-nightly[dev]
143
+ ```
144
+
145
+ ### Source
146
+
147
+ ```bash
148
+ # install torchx sdk and CLI from source
149
+ $ pip install -e git+https://github.com/meta-pytorch/torchx.git#egg=torchx
150
+
151
+ # install extra dependencies
152
+ $ pip install -e git+https://github.com/meta-pytorch/torchx.git#egg=torchx[dev]
153
+ ```
154
+
155
+ ### Docker
156
+
157
+ TorchX provides a docker container for using as as part of a TorchX role.
158
+
159
+ See: https://github.com/meta-pytorch/torchx/pkgs/container/torchx
160
+
161
+ ## Contributing
162
+
163
+ We welcome PRs! See the [CONTRIBUTING](https://github.com/meta-pytorch/torchx/blob/main/CONTRIBUTING.md) file.
164
+
165
+ ## License
166
+
167
+ TorchX is BSD licensed, as found in the [LICENSE](https://github.com/meta-pytorch/torchx/blob/main/LICENSE) file.
@@ -0,0 +1,113 @@
1
+ torchx/__init__.py,sha256=QFDTdJacncWYWHL-2QyWdY5MUck3jVfSPRRGdvedcKc,355
2
+ torchx/_version.py,sha256=TzDuXIviDldFbXAhGe33redQcoP33jIsVR_hMyqSgdc,250
3
+ torchx/notebook.py,sha256=Rc6XUMzSq7NXtsYdtVluE6T89LpEhcba-3ANxuaLCCU,1008
4
+ torchx/version.py,sha256=YcE66UkBxYHMQMtjVts4jF3l6Qeaj1gK_LzxU77l8Bo,975
5
+ torchx/apps/__init__.py,sha256=fE0IHi1JJpxsNVBNzWNee2thrNXFFRhY94c80RxNSIE,231
6
+ torchx/apps/serve/__init__.py,sha256=Md3cCHD7Ano9kV15PqGbicgUO-RMdh4aVy1yKiDt_xE,208
7
+ torchx/apps/serve/serve.py,sha256=u_h8agld1TwIPq5GRosHL3uxhkljNfS65McLB77O0OE,4386
8
+ torchx/apps/utils/__init__.py,sha256=Md3cCHD7Ano9kV15PqGbicgUO-RMdh4aVy1yKiDt_xE,208
9
+ torchx/apps/utils/booth_main.py,sha256=rG-WWqXK8rqqx4bg1ay28CXlhpnc0AtnKZEjQpBD_dA,1427
10
+ torchx/apps/utils/copy_main.py,sha256=_O7eElApHUSpunEglh81BMiF2PBKBxOyhb8qPMSuXMs,1838
11
+ torchx/apps/utils/process_monitor.py,sha256=9gH2Cn4191Y9dWEcNGPPWyIt_23q03LlGc3H1PG_ipk,3452
12
+ torchx/cli/__init__.py,sha256=3lloxeC_V5KFrTL2X0-tUs7KQJ-XuIH5MuGLA-q3R10,10351
13
+ torchx/cli/argparse_util.py,sha256=kZb1ubEHDrBsmrxpySFRQCW7wmHuRHD8eAInuEZjlsI,3836
14
+ torchx/cli/cmd_base.py,sha256=SdqMtqi04CEqnzcgcS35DbDbsBeMxSgEhfynfpIkMGk,790
15
+ torchx/cli/cmd_cancel.py,sha256=NKfOCu_44Lch9vliGSQ0Uv6BVqpUqj7Tob652TI-ua4,835
16
+ torchx/cli/cmd_configure.py,sha256=1kTv0qbsbV44So74plAySwWu56pQrqjhfW_kbfdC3Rw,1722
17
+ torchx/cli/cmd_delete.py,sha256=US1f6Jvyhz4R_0Q0a8GeNTDMrhzo8WE_ECcdOf0MjKE,835
18
+ torchx/cli/cmd_describe.py,sha256=E5disbHoKTsqYKp2s3DaFW9GDLCCOgdOc3pQoHKoyCs,1283
19
+ torchx/cli/cmd_list.py,sha256=alkS9aIaDI8lX3W8uj8Vtr3IU3G2VeCuokKSd3zOFug,1409
20
+ torchx/cli/cmd_log.py,sha256=v-EZYUDOcG95rEgTnrsmPJMUyxM9Mk8YFAJtUxtgViE,5475
21
+ torchx/cli/cmd_run.py,sha256=z8wS-M2W9hHZfLkA6DFiV6Y0LFS9KfEBc_NTwAwdviQ,18780
22
+ torchx/cli/cmd_runopts.py,sha256=NWZiP8XpQjfTDJgays2c6MgL_8wxFoeDge6NstaZdKk,1302
23
+ torchx/cli/cmd_status.py,sha256=22IAEmKs0qkG6kJi83u9dRX2Q-ntT7yehVx7FxtY-vQ,2114
24
+ torchx/cli/cmd_tracker.py,sha256=9gmOmYi-89qQRGQfSrXCTto7ve54_JKFqs_wa7oRUA8,5223
25
+ torchx/cli/colors.py,sha256=yLMes7e_UoLAfhxE0W6edhc58t83UHAlnCN2ANPeuXw,568
26
+ torchx/cli/main.py,sha256=1DJTmKdvPW_7hod8OUVT3Br2uwsZVEDU-2bTE0NJ0zY,3559
27
+ torchx/components/__init__.py,sha256=JaVte0j9Gqi6IrjZKudJ2Kr3gkdHsvlCdRTo-zYpSRo,11815
28
+ torchx/components/component_test_base.py,sha256=22iNSdVa_qTW3SMM30Pw5UEWlK4DZVw0C03EqYiaLOI,4150
29
+ torchx/components/dist.py,sha256=6DNPEvHVqEifmM8g1L7HVY169cQv_7tSfSlh3o6lTp4,14930
30
+ torchx/components/interpret.py,sha256=g8gkKdDJvsBfX1ZrpVT7n2bMEtmwRV_1AqDyAnnQ_aA,697
31
+ torchx/components/metrics.py,sha256=1gbp8BfzZWGa7PD1db5vRADlONzmae4qSBUUdCWayr0,2814
32
+ torchx/components/serve.py,sha256=uxIC5gU2ecg0EJIPX_oEPzNNOXRAre4j2eXusrgwGAI,2156
33
+ torchx/components/structured_arg.py,sha256=8jMcd0rtUmzCKEQKJ_JYzxSkMMK9q0fYjkwAs6wo78E,9595
34
+ torchx/components/train.py,sha256=vtrQXRcD7bIcbb3lSeyD9BBlIe1mv1WNW6rnLK9R0Mw,1259
35
+ torchx/components/utils.py,sha256=IMjihhgs7nO67YtTetUBjN_CRpyIyyQsaJBkp7mpHfk,9368
36
+ torchx/components/integration_tests/__init__.py,sha256=Md3cCHD7Ano9kV15PqGbicgUO-RMdh4aVy1yKiDt_xE,208
37
+ torchx/components/integration_tests/component_provider.py,sha256=g-4ig1vtd5Vzgug0VAKRAFUt6KAV3TgQrBCrwRSJ7ZY,3981
38
+ torchx/components/integration_tests/integ_tests.py,sha256=O8jd8Jq5O0mns7xzIFsHexBDHkIIAIfELQkWCzNPzRw,5165
39
+ torchx/distributed/__init__.py,sha256=kh9YzDwWX7zFJJ8StR9qhMM2V3-66INs9i3ztDF-1ho,10252
40
+ torchx/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ torchx/examples/torchx_out_of_sync_training.py,sha256=sXiI1G8aGsfuvxRdBszDgM8pSplqhgfXjRnAcgRwNGM,397
42
+ torchx/examples/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
+ torchx/examples/apps/datapreproc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
+ torchx/examples/apps/datapreproc/datapreproc.py,sha256=cu88O_WZgqZ6g7jVIG2kagAVbJ4oPMzTH03_H65w8RU,4317
45
+ torchx/examples/apps/lightning/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ torchx/examples/apps/lightning/data.py,sha256=kSv_DFqtFVkNjZ46HT7GApImc9lMD7liy929dUrFWwM,6610
47
+ torchx/examples/apps/lightning/interpret.py,sha256=Hd3kE5a6FyhxCmJBfTzb4Tlj518zhX8V0XvZfzu4nqE,5256
48
+ torchx/examples/apps/lightning/model.py,sha256=4CgObWfANqDN9emYSdmCpbRe_V_Lef_Hd3M-yayDbZE,4045
49
+ torchx/examples/apps/lightning/profiler.py,sha256=SSSihnwjeUTkBoz0E3qn1b-wbkfUIowscx2ND_37zyw,1915
50
+ torchx/examples/apps/lightning/train.py,sha256=0wvvshGHvZowePB4LfclXwn40X7i9euM0ReETWBcPSo,6253
51
+ torchx/pipelines/__init__.py,sha256=2MbRVk5xwRjg-d2qPemeXpEhDsocMQumPQ53lsesZAI,606
52
+ torchx/runner/__init__.py,sha256=x8Sz7s_tLxPgJgvWIhK4ju9BNZU61uBFywGwDY6CqJs,315
53
+ torchx/runner/api.py,sha256=Qi12Kjkr_zpQBesbLuCtgKET8JhHnQk22MV7Czi4l1A,30832
54
+ torchx/runner/config.py,sha256=SaKOB50d79WaMFPWK8CC4as6UaNFaRGhrBkfajq3KC4,18311
55
+ torchx/runner/events/__init__.py,sha256=cMiNjnr4eUNQ2Nxxtu4nsvN5lu56b-a6nJ-ct3i7DQk,5536
56
+ torchx/runner/events/api.py,sha256=bvxKBAYK8LzbrBNaNLgL1x0aivtfANmWo1EMGOrSR8k,2668
57
+ torchx/runner/events/handlers.py,sha256=ThHCIJW21BfBgB7b6ftyjASJmD1KdizpjuTtsyqnvJs,522
58
+ torchx/runtime/__init__.py,sha256=Wxje2BryzeQneFu5r6P9JJiEKG-_C9W1CcZ_JNrKT6g,593
59
+ torchx/runtime/tracking/__init__.py,sha256=dYnAPnrXYREfPXkpHhdOFkcYIODWEbA13PdD-wLQYBo,3055
60
+ torchx/runtime/tracking/api.py,sha256=SmUQyUKZqG3KlAhT7CJOGqRz1O274E4m63wQeOVq3CU,5472
61
+ torchx/schedulers/__init__.py,sha256=FQN9boQM4mwOD3sK9LZ3GBgw-gJ7Vx4MFj6z6ATQIrc,2211
62
+ torchx/schedulers/api.py,sha256=wT9H_ZTmpTHHweevDJbkV7NKXfwileHrt1bbhhCgj3c,16488
63
+ torchx/schedulers/aws_batch_scheduler.py,sha256=b6xC4BQKb7zagOGS6_z3_6fmOLsSEOxSprkGUE-yfJE,29412
64
+ torchx/schedulers/aws_sagemaker_scheduler.py,sha256=DnNF6huHGZLSUGWqKml4qGiWvmyDzX0i45tjsRfkedg,20881
65
+ torchx/schedulers/devices.py,sha256=RjVcu22ZRl_9OKtOtmA1A3vNXgu2qD6A9ST0L0Hsg4I,1734
66
+ torchx/schedulers/docker_scheduler.py,sha256=Kud3AIzQtMekgjlqcg1eNDb8kk29aPbGYOMAvPTZdhM,16840
67
+ torchx/schedulers/ids.py,sha256=8Qhf1Xqh845mwL-RXnWZXqIILNvml3z8udEXPFpyO7U,2247
68
+ torchx/schedulers/kubernetes_mcad_scheduler.py,sha256=FclJEdBdlgtBqKDbgd95oAk5Ya5XNTrwysfX7GS80GY,42896
69
+ torchx/schedulers/kubernetes_scheduler.py,sha256=bB6xS5KQmAgqMljMLsE_4MT-KuZ23Jar7HWwPWiFFiQ,39568
70
+ torchx/schedulers/local_scheduler.py,sha256=xGQbI02BNWGF91g00So6hCcYvR90bUAZ7fPzqnm3Ww8,41892
71
+ torchx/schedulers/lsf_scheduler.py,sha256=vUvEJb02u7WI6y7DsWJxJFXNylRucU7FqkBX7xwLTak,17638
72
+ torchx/schedulers/slurm_scheduler.py,sha256=ipDVDtgfqgL6c35NyoJgSPuQFt8-AeXVXAnXJVvmzrc,32032
73
+ torchx/schedulers/streams.py,sha256=8_SLezgnWgfv_zXUsJCUM34-h2dtv25NmZuxEwkzmxw,2007
74
+ torchx/specs/__init__.py,sha256=TaC0AveTebkCMo5hmdY1wGpo09vFDqzWnsT166ionTw,7108
75
+ torchx/specs/api.py,sha256=7FdLFfadNWqXTLJ_EtP5t1uVS2Vc_4Gj5GLFoI628oE,49338
76
+ torchx/specs/builders.py,sha256=Ye3of4MupJ-da8vLaX6_-nzGo_FRw1BFpYsX6dAZCNk,13730
77
+ torchx/specs/file_linter.py,sha256=z0c4mKJv47BWiPaWCdUM0A8kHwnj4b1s7oTmESuD9Tc,14407
78
+ torchx/specs/finder.py,sha256=gWQNEFrLYqrZoI0gMMhQ70YAC4sxqS0ZFpoWAmcVi44,17438
79
+ torchx/specs/named_resources_aws.py,sha256=ZNAbw6lD8NUlMfcJ-LpX14dMSaHO7m4Yt9iHwAF44yg,11674
80
+ torchx/specs/named_resources_generic.py,sha256=Sg4tAdqiiWDrDz2Lj_pnfsjzGIXKTou73wPseh6j55w,2646
81
+ torchx/specs/overlays.py,sha256=HmY2yzC8ejgihviNWFT4rbYmP-gTcqpxVZTP6qBiIYM,3778
82
+ torchx/specs/test/components/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
83
+ torchx/specs/test/components/a/__init__.py,sha256=kdxEgnI8QBSBiuTjaB4qDD7JX84hWowyPWU4B2Cqe9A,561
84
+ torchx/specs/test/components/a/b/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
85
+ torchx/specs/test/components/a/b/c.py,sha256=FhixafzNqpS5zvggtWIWLxRd6HIxsOmct-d1Hs-rDoc,554
86
+ torchx/specs/test/components/c/__init__.py,sha256=5CBMckkpqJUdxBQBYHGSsItqq1gj2V0UiCw02Qfq6MM,246
87
+ torchx/specs/test/components/c/d.py,sha256=2AjE-FmQXJTw3hws66O83ToQPmjOEZLDf-jDAKrrUkQ,546
88
+ torchx/tracker/__init__.py,sha256=qo39aOa0Dz9zt4TtFkqPeIaH7MNqdAkFlGaOFiDLXTI,4375
89
+ torchx/tracker/api.py,sha256=WZ7TYdbSVx_5h5MlX9EwQLRpxmIf0oKdiQwQ0zvkO3o,11262
90
+ torchx/tracker/mlflow.py,sha256=poeoIXVPzr2sxgi515fMGRH83KAFNL6XFILMh0EQ2Dw,14487
91
+ torchx/tracker/backend/__init__.py,sha256=fE0IHi1JJpxsNVBNzWNee2thrNXFFRhY94c80RxNSIE,231
92
+ torchx/tracker/backend/fsspec.py,sha256=528xKryBE27Rm_OHD7r2R6fmVAclknBtoy1s034Ny6c,10440
93
+ torchx/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
94
+ torchx/util/cuda.py,sha256=-ZTa1WCLnY2WtSWAdWufLQqZSDCZfZsloBuiS84LIkU,1099
95
+ torchx/util/datetime.py,sha256=hV6Sg0u5KTBe68yrmy_RGCC5su0i4Tb_mAYphWamiXI,405
96
+ torchx/util/entrypoints.py,sha256=YUv7F-Vr4uuY4_82IBPdrz5vrch_qsx_dIr6e08kSD4,3800
97
+ torchx/util/io.py,sha256=HNpWLcFUX0WTAP3CsdamHz--FR5A4kSdLCPfNqa2UkA,1807
98
+ torchx/util/log_tee_helpers.py,sha256=wPyozmh9BOt_2d3Gxa0iNogwnjzwFitIIMBJOJ1arIw,6330
99
+ torchx/util/modules.py,sha256=o4y_d07gTpJ4nIVBcoUVJ0JtXIHEsEC5kbgBM6NGpgA,2135
100
+ torchx/util/session.py,sha256=r6M_nyzXgcbk1GgYGZ324F_ehRGCqjjdVk4YgKxMj8M,1214
101
+ torchx/util/shlex.py,sha256=eXEKu8KC3zIcd8tEy9_s8Ds5oma8BORr-0VGWNpG2dk,463
102
+ torchx/util/strings.py,sha256=7Ef1loz2IYMrzeJ6Lewywi5cBIc3X3g7lSPbT1Tn_z4,664
103
+ torchx/util/types.py,sha256=E9dxAWQnsJkIDuHtg-poeOJ4etucSI_xP_Z5kNJX8uI,9229
104
+ torchx/workspace/__init__.py,sha256=FqN8AN4VhR1C_SBY10MggQvNZmyanbbuPuE-JCjkyUY,798
105
+ torchx/workspace/api.py,sha256=UESQ4qgxXjsb6Y1wP9OGv2ixaFgaTs3SqghmNuOJIZM,10235
106
+ torchx/workspace/dir_workspace.py,sha256=npNW_IjUZm_yS5r-8hrRkH46ndDd9a_eApT64m1S1T4,2268
107
+ torchx/workspace/docker_workspace.py,sha256=PFu2KQNVC-0p2aKJ-W_BKA9ZOmXdCY2ABEkCExp3udQ,10269
108
+ torchx_nightly-2025.12.24.dist-info/licenses/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
109
+ torchx_nightly-2025.12.24.dist-info/METADATA,sha256=y1vkSZkFLHeYLtyinI3R5pi3sB-wdaoc0qnSxOx0law,5324
110
+ torchx_nightly-2025.12.24.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
111
+ torchx_nightly-2025.12.24.dist-info/entry_points.txt,sha256=T328AMXeKI3JZnnxfkEew2ZcMN1oQDtkXjMz7lkV-P4,169
112
+ torchx_nightly-2025.12.24.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
113
+ torchx_nightly-2025.12.24.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.2)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -6,4 +6,3 @@ torchx_minio = torchx.test.minio.MinioFS
6
6
 
7
7
  [torchx.tracker]
8
8
  fsspec = torchx.tracker.backend.fsspec:create
9
-
File without changes
File without changes
@@ -1,287 +0,0 @@
1
- #!/usr/bin/env python3
2
- # Copyright (c) Meta Platforms, Inc. and affiliates.
3
- # All rights reserved.
4
- #
5
- # This source code is licensed under the BSD-style license found in the
6
- # LICENSE file in the root directory of this source tree.
7
-
8
- """
9
- Advanced KubeFlow Pipelines Example
10
- ===================================
11
-
12
- This is an example pipeline using KubeFlow Pipelines built with only TorchX
13
- components.
14
-
15
- KFP adapters can be used transform the TorchX components directly into
16
- something that can be used within KFP.
17
- """
18
-
19
- # %%
20
- # Input Arguments
21
- # ###############
22
- # Lets first define some arguments for the pipeline.
23
-
24
- import argparse
25
- import os.path
26
- import sys
27
- from typing import Dict
28
-
29
- import kfp
30
- import torchx
31
- from torchx import specs
32
- from torchx.components.dist import ddp as dist_ddp
33
- from torchx.components.serve import torchserve
34
- from torchx.components.utils import copy as utils_copy, python as utils_python
35
- from torchx.pipelines.kfp.adapter import container_from_app
36
-
37
-
38
- parser = argparse.ArgumentParser(description="example kfp pipeline")
39
-
40
- # %%
41
- # TorchX components are built around images. Depending on what scheduler
42
- # you're using this can vary but for KFP these images are specified as
43
- # docker containers. We have one container for the example apps and one for
44
- # the standard built in apps. If you modify the torchx example code you'll
45
- # need to rebuild the container before launching it on KFP
46
-
47
-
48
- parser.add_argument(
49
- "--image",
50
- type=str,
51
- help="docker image to use for the examples apps",
52
- default=torchx.IMAGE,
53
- )
54
-
55
- # %%
56
- # Most TorchX components use
57
- # `fsspec <https://filesystem-spec.readthedocs.io/en/latest/>`_ to abstract
58
- # away dealing with remote filesystems. This allows the components to take
59
- # paths like ``s3://`` to make it easy to use cloud storage providers.
60
- parser.add_argument(
61
- "--output_path",
62
- type=str,
63
- help="path to place the data",
64
- required=True,
65
- )
66
- parser.add_argument("--load_path", type=str, help="checkpoint path to load from")
67
-
68
- # %%
69
- # This example uses the torchserve for inference so we need to specify some
70
- # options. This assumes you have a TorchServe instance running in the same
71
- # Kubernetes cluster with with the service name ``torchserve`` in the default
72
- # namespace.
73
- #
74
- # See https://github.com/pytorch/serve/blob/master/kubernetes/README.md for info
75
- # on how to setup TorchServe.
76
- parser.add_argument(
77
- "--management_api",
78
- type=str,
79
- help="path to the torchserve management API",
80
- default="http://torchserve.default.svc.cluster.local:8081",
81
- )
82
- parser.add_argument(
83
- "--model_name",
84
- type=str,
85
- help="the name of the inference model",
86
- default="tiny_image_net",
87
- )
88
-
89
- # %% Parse the arguments, you'll need to set these accordingly if running from a
90
- # notebook.
91
-
92
-
93
- if "NOTEBOOK" in globals():
94
- argv = [
95
- "--output_path",
96
- "/tmp/output",
97
- ]
98
- else:
99
- argv = sys.argv[1:]
100
-
101
- args: argparse.Namespace = parser.parse_args(argv)
102
-
103
- # %%
104
- # Creating the Components
105
- # #######################
106
- # The first step is downloading the data to somewhere we can work on it. For
107
- # this we can just the builtin copy component. This component takes two valid
108
- # fsspec paths and copies them from one to another. In this case we're using
109
- # http as the source and a file under the output_path as the output.
110
-
111
-
112
- data_path: str = os.path.join(args.output_path, "tiny-imagenet-200.zip")
113
- copy_app: specs.AppDef = utils_copy(
114
- "http://cs231n.stanford.edu/tiny-imagenet-200.zip",
115
- data_path,
116
- image=args.image,
117
- )
118
-
119
- # %%
120
- # The next component is for data preprocessing. This takes in the raw data from
121
- # the previous operator and runs some transforms on it for use with the trainer.
122
- #
123
- # datapreproc outputs the data to a specified fsspec path. These paths are all
124
- # specified ahead of time so we have a fully static pipeline.
125
-
126
-
127
- processed_data_path: str = os.path.join(args.output_path, "processed")
128
- datapreproc_app: specs.AppDef = utils_python(
129
- "--output_path",
130
- processed_data_path,
131
- "--input_path",
132
- data_path,
133
- "--limit",
134
- "100",
135
- image=args.image,
136
- m="torchx.examples.apps.datapreproc.datapreproc",
137
- cpu=1,
138
- memMB=1024,
139
- )
140
-
141
- # %%
142
- # Next we'll create the trainer component that takes in the training data from the
143
- # previous datapreproc component. We've defined this in a separate component
144
- # file as you normally would.
145
- #
146
- # Having a separate component file allows you to launch your trainer from the
147
- # TorchX CLI via ``torchx run`` for fast iteration as well as run it from a
148
- # pipeline in an automated fashion.
149
-
150
- # make sure examples is on the path
151
- if "__file__" in globals():
152
- sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
153
-
154
-
155
- logs_path: str = os.path.join(args.output_path, "logs")
156
- models_path: str = os.path.join(args.output_path, "models")
157
-
158
- trainer_app: specs.AppDef = dist_ddp(
159
- *(
160
- "--output_path",
161
- models_path,
162
- "--load_path",
163
- args.load_path or "",
164
- "--log_path",
165
- logs_path,
166
- "--data_path",
167
- processed_data_path,
168
- "--epochs",
169
- str(1),
170
- ),
171
- image=args.image,
172
- m="torchx.examples.apps.lightning.train",
173
- j="1x1",
174
- # per node resource settings
175
- cpu=1,
176
- memMB=3000,
177
- )
178
-
179
- # %%
180
- # To have the tensorboard path show up in KFPs UI we need to some metadata so
181
- # KFP knows where to consume the metrics from.
182
- #
183
- # This will get used when we create the KFP container.
184
-
185
-
186
- ui_metadata: Dict[str, object] = {
187
- "outputs": [
188
- {
189
- "type": "tensorboard",
190
- "source": os.path.join(logs_path, "lightning_logs"),
191
- }
192
- ]
193
- }
194
-
195
- # %%
196
- # For the inference, we're leveraging one of the builtin TorchX components. This
197
- # component takes in a model and uploads it to the TorchServe management API
198
- # endpoints.
199
-
200
-
201
- serve_app: specs.AppDef = torchserve(
202
- model_path=os.path.join(models_path, "model.mar"),
203
- management_api=args.management_api,
204
- image=args.image,
205
- params={
206
- "model_name": args.model_name,
207
- # set this to allocate a worker
208
- # "initial_workers": 1,
209
- },
210
- )
211
-
212
- # %%
213
- # For model interpretability we're leveraging a custom component stored in it's
214
- # own component file. This component takes in the output from datapreproc and
215
- # train components and produces images with integrated gradient results.
216
-
217
- interpret_path: str = os.path.join(args.output_path, "interpret")
218
- interpret_app: specs.AppDef = utils_python(
219
- *(
220
- "--load_path",
221
- os.path.join(models_path, "last.ckpt"),
222
- "--data_path",
223
- processed_data_path,
224
- "--output_path",
225
- interpret_path,
226
- ),
227
- image=args.image,
228
- m="torchx.examples.apps.lightning.interpret",
229
- )
230
-
231
- # %%
232
- # Pipeline Definition
233
- # ###################
234
- # The last step is to define the actual pipeline using the torchx components via
235
- # the KFP adapter and export the pipeline package that can be uploaded to a KFP
236
- # cluster.
237
- #
238
- # The KFP adapter currently doesn't track the input and outputs so the
239
- # containers need to have their dependencies specified via `.after()`.
240
- #
241
- # We call `.set_tty()` to make the logs from the components more responsive for
242
- # example purposes.
243
-
244
-
245
- def pipeline() -> None:
246
- # container_from_app creates a KFP container from the TorchX app
247
- # definition.
248
- copy = container_from_app(copy_app)
249
- copy.container.set_tty()
250
-
251
- datapreproc = container_from_app(datapreproc_app)
252
- datapreproc.container.set_tty()
253
- datapreproc.after(copy)
254
-
255
- # For the trainer we want to log that UI metadata so you can access
256
- # tensorboard from the UI.
257
- trainer = container_from_app(trainer_app, ui_metadata=ui_metadata)
258
- trainer.container.set_tty()
259
- trainer.after(datapreproc)
260
-
261
- if False:
262
- serve = container_from_app(serve_app)
263
- serve.container.set_tty()
264
- serve.after(trainer)
265
-
266
- if False:
267
- # Serve and interpret only require the trained model so we can run them
268
- # in parallel to each other.
269
- interpret = container_from_app(interpret_app)
270
- interpret.container.set_tty()
271
- interpret.after(trainer)
272
-
273
-
274
- kfp.compiler.Compiler().compile(
275
- pipeline_func=pipeline,
276
- package_path="pipeline.yaml",
277
- )
278
-
279
- with open("pipeline.yaml", "rt") as f:
280
- print(f.read())
281
-
282
- # %%
283
- # Once this has all run you should have a pipeline file (typically
284
- # pipeline.yaml) that you can upload to your KFP cluster via the UI or
285
- # a kfp.Client.
286
-
287
- # sphinx_gallery_thumbnail_path = '_static/img/gallery-kfp.png'
@@ -1,69 +0,0 @@
1
- #!/usr/bin/env python3
2
- # Copyright (c) Meta Platforms, Inc. and affiliates.
3
- # All rights reserved.
4
- #
5
- # This source code is licensed under the BSD-style license found in the
6
- # LICENSE file in the root directory of this source tree.
7
-
8
- """
9
- Distributed KubeFlow Pipelines Example
10
- ======================================
11
-
12
- This is an example KFP pipeline that uses resource_from_app to launch a
13
- distributed operator using the kubernetes/volcano job scheduler. This only works
14
- in Kubernetes KFP clusters with https://volcano.sh/en/docs/ installed on them.
15
- """
16
-
17
- import kfp
18
- from torchx import specs
19
- from torchx.pipelines.kfp.adapter import resource_from_app
20
-
21
-
22
- def pipeline() -> None:
23
- # First we define our AppDef for the component, we set
24
- echo_app = specs.AppDef(
25
- name="test-dist",
26
- roles=[
27
- specs.Role(
28
- name="dist-echo",
29
- image="alpine",
30
- entrypoint="/bin/echo",
31
- args=["hello dist!"],
32
- num_replicas=3,
33
- ),
34
- ],
35
- )
36
-
37
- # To convert the TorchX AppDef into a KFP container we use
38
- # the resource_from_app adapter. This takes generates a KFP Kubernetes
39
- # resource operator definition from the TorchX app def and instantiates it.
40
- echo_container: kfp.dsl.BaseOp = resource_from_app(echo_app, queue="default")
41
-
42
-
43
- # %%
44
- # To generate the pipeline definition file we need to call into the KFP compiler
45
- # with our pipeline function.
46
-
47
- kfp.compiler.Compiler().compile(
48
- pipeline_func=pipeline,
49
- package_path="pipeline.yaml",
50
- )
51
-
52
- with open("pipeline.yaml", "rt") as f:
53
- print(f.read())
54
-
55
- # %%
56
- # Once this has all run you should have a pipeline file (typically
57
- # pipeline.yaml) that you can upload to your KFP cluster via the UI or
58
- # a kfp.Client.
59
- #
60
- # See the
61
- # `KFP SDK Examples <https://www.kubeflow.org/docs/components/pipelines/tutorials/sdk-examples/#examples>`_
62
- # for more info on launching KFP pipelines.
63
-
64
- # %%
65
- # See the :ref:`examples_pipelines/kfp/advanced_pipeline:Advanced KubeFlow Pipelines Example` for how to chain multiple
66
- # components together and use builtin components.
67
-
68
-
69
- # sphinx_gallery_thumbnail_path = '_static/img/gallery-kfp.png'