torchx-nightly 2025.11.17__py3-none-any.whl → 2025.11.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchx-nightly might be problematic. Click here for more details.
- torchx/cli/cmd_delete.py +30 -0
- torchx/cli/main.py +2 -0
- torchx/runner/api.py +10 -0
- torchx/schedulers/api.py +40 -0
- torchx/schedulers/kubernetes_scheduler.py +35 -0
- {torchx_nightly-2025.11.17.dist-info → torchx_nightly-2025.11.20.dist-info}/METADATA +1 -1
- {torchx_nightly-2025.11.17.dist-info → torchx_nightly-2025.11.20.dist-info}/RECORD +11 -10
- {torchx_nightly-2025.11.17.dist-info → torchx_nightly-2025.11.20.dist-info}/WHEEL +0 -0
- {torchx_nightly-2025.11.17.dist-info → torchx_nightly-2025.11.20.dist-info}/entry_points.txt +0 -0
- {torchx_nightly-2025.11.17.dist-info → torchx_nightly-2025.11.20.dist-info}/licenses/LICENSE +0 -0
- {torchx_nightly-2025.11.17.dist-info → torchx_nightly-2025.11.20.dist-info}/top_level.txt +0 -0
torchx/cli/cmd_delete.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This source code is licensed under the BSD-style license found in the
|
|
6
|
+
# LICENSE file in the root directory of this source tree.
|
|
7
|
+
|
|
8
|
+
# pyre-strict
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import logging
|
|
12
|
+
|
|
13
|
+
from torchx.cli.cmd_base import SubCommand
|
|
14
|
+
from torchx.runner import get_runner
|
|
15
|
+
|
|
16
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class CmdDelete(SubCommand):
|
|
20
|
+
def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
|
|
21
|
+
subparser.add_argument(
|
|
22
|
+
"app_handle",
|
|
23
|
+
type=str,
|
|
24
|
+
help="torchx app handle (e.g. local://session-name/app-id)",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def run(self, args: argparse.Namespace) -> None:
|
|
28
|
+
app_handle = args.app_handle
|
|
29
|
+
runner = get_runner()
|
|
30
|
+
runner.delete(app_handle)
|
torchx/cli/main.py
CHANGED
|
@@ -16,6 +16,7 @@ import torchx
|
|
|
16
16
|
from torchx.cli.cmd_base import SubCommand
|
|
17
17
|
from torchx.cli.cmd_cancel import CmdCancel
|
|
18
18
|
from torchx.cli.cmd_configure import CmdConfigure
|
|
19
|
+
from torchx.cli.cmd_delete import CmdDelete
|
|
19
20
|
from torchx.cli.cmd_describe import CmdDescribe
|
|
20
21
|
from torchx.cli.cmd_list import CmdList
|
|
21
22
|
from torchx.cli.cmd_log import CmdLog
|
|
@@ -37,6 +38,7 @@ def get_default_sub_cmds() -> Dict[str, SubCommand]:
|
|
|
37
38
|
"builtins": CmdBuiltins(),
|
|
38
39
|
"cancel": CmdCancel(),
|
|
39
40
|
"configure": CmdConfigure(),
|
|
41
|
+
"delete": CmdDelete(),
|
|
40
42
|
"describe": CmdDescribe(),
|
|
41
43
|
"list": CmdList(),
|
|
42
44
|
"log": CmdLog(),
|
torchx/runner/api.py
CHANGED
|
@@ -587,6 +587,16 @@ class Runner:
|
|
|
587
587
|
if status is not None and not status.is_terminal():
|
|
588
588
|
scheduler.cancel(app_id)
|
|
589
589
|
|
|
590
|
+
def delete(self, app_handle: AppHandle) -> None:
|
|
591
|
+
"""
|
|
592
|
+
Deletes the application from the scheduler.
|
|
593
|
+
"""
|
|
594
|
+
scheduler, scheduler_backend, app_id = self._scheduler_app_id(app_handle)
|
|
595
|
+
with log_event("delete", scheduler_backend, app_id):
|
|
596
|
+
status = self.status(app_handle)
|
|
597
|
+
if status is not None:
|
|
598
|
+
scheduler.delete(app_id)
|
|
599
|
+
|
|
590
600
|
def stop(self, app_handle: AppHandle) -> None:
|
|
591
601
|
"""
|
|
592
602
|
See method ``cancel``.
|
torchx/schedulers/api.py
CHANGED
|
@@ -264,6 +264,46 @@ class Scheduler(abc.ABC, Generic[T, A, D]):
|
|
|
264
264
|
# do nothing if the app does not exist
|
|
265
265
|
return
|
|
266
266
|
|
|
267
|
+
def delete(self, app_id: str) -> None:
|
|
268
|
+
"""
|
|
269
|
+
Deletes the job information for the specified ``app_id`` from the
|
|
270
|
+
scheduler's data-plane. Basically "deep-purging" the job from the
|
|
271
|
+
scheduler's data-plane. Calling this API on a "live" job (e.g in a
|
|
272
|
+
non-terminal status such as PENDING or RUNNING) cancels the job.
|
|
273
|
+
|
|
274
|
+
Note that this API is only relevant for schedulers for which its
|
|
275
|
+
data-plane persistently stores the "JobDefinition" (which is often
|
|
276
|
+
versioned). AWS Batch and Kubernetes are examples of such schedulers.
|
|
277
|
+
On these schedulers, a finished job may fall out of the data-plane
|
|
278
|
+
(e.g. really old finished jobs get deleted) but the JobDefinition is
|
|
279
|
+
typically permanently stored. In this case, calling
|
|
280
|
+
:py:meth:`~cancel` would not delete the job definition.
|
|
281
|
+
|
|
282
|
+
In schedulers with no such feature (e.g. SLURM)
|
|
283
|
+
:py:meth:`~delete` is the same as :py:meth:`~cancel`, which is the
|
|
284
|
+
default implementation. Hence implementors of such schedulers need not
|
|
285
|
+
override this method.
|
|
286
|
+
|
|
287
|
+
.. warning::
|
|
288
|
+
Calling :py:meth:`~delete` on an ``app_id`` that has fallen out of
|
|
289
|
+
the scheduler's data-plane does nothing. The user is responsible for
|
|
290
|
+
manually tracking down and cleaning up any dangling resources related
|
|
291
|
+
to the job.
|
|
292
|
+
"""
|
|
293
|
+
if self.exists(app_id):
|
|
294
|
+
self._delete_existing(app_id)
|
|
295
|
+
|
|
296
|
+
def _delete_existing(self, app_id: str) -> None:
|
|
297
|
+
"""
|
|
298
|
+
Deletes the job information for the specified ``app_id`` from the
|
|
299
|
+
scheduler's data-plane. This method will only be called on an
|
|
300
|
+
application that exists.
|
|
301
|
+
|
|
302
|
+
The default implementation calls :py:meth:`~_cancel_existing` which is
|
|
303
|
+
appropriate for schedulers without persistent job definitions.
|
|
304
|
+
"""
|
|
305
|
+
self._cancel_existing(app_id)
|
|
306
|
+
|
|
267
307
|
def log_iter(
|
|
268
308
|
self,
|
|
269
309
|
app_id: str,
|
|
@@ -622,6 +622,16 @@ class KubernetesScheduler(
|
|
|
622
622
|
$ torchx status kubernetes://torchx_user/1234
|
|
623
623
|
...
|
|
624
624
|
|
|
625
|
+
**Cancellation**
|
|
626
|
+
|
|
627
|
+
Canceling a job aborts it while preserving the job spec for inspection
|
|
628
|
+
and cloning via kubectl apply. Use the delete command to remove the job entirely:
|
|
629
|
+
|
|
630
|
+
.. code-block:: bash
|
|
631
|
+
|
|
632
|
+
$ torchx cancel kubernetes://namespace/jobname # abort, preserves spec
|
|
633
|
+
$ torchx delete kubernetes://namespace/jobname # delete completely
|
|
634
|
+
|
|
625
635
|
**Config Options**
|
|
626
636
|
|
|
627
637
|
.. runopts::
|
|
@@ -818,6 +828,31 @@ class KubernetesScheduler(
|
|
|
818
828
|
pass
|
|
819
829
|
|
|
820
830
|
def _cancel_existing(self, app_id: str) -> None:
|
|
831
|
+
"""
|
|
832
|
+
Abort a Volcano job while preserving the spec for inspection.
|
|
833
|
+
"""
|
|
834
|
+
namespace, name = app_id.split(":")
|
|
835
|
+
vcjob = self._custom_objects_api().get_namespaced_custom_object(
|
|
836
|
+
group="batch.volcano.sh",
|
|
837
|
+
version="v1alpha1",
|
|
838
|
+
namespace=namespace,
|
|
839
|
+
plural="jobs",
|
|
840
|
+
name=name,
|
|
841
|
+
)
|
|
842
|
+
vcjob["status"]["state"]["phase"] = "Aborted"
|
|
843
|
+
self._custom_objects_api().replace_namespaced_custom_object_status(
|
|
844
|
+
group="batch.volcano.sh",
|
|
845
|
+
version="v1alpha1",
|
|
846
|
+
namespace=namespace,
|
|
847
|
+
plural="jobs",
|
|
848
|
+
name=name,
|
|
849
|
+
body=vcjob,
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
def _delete_existing(self, app_id: str) -> None:
|
|
853
|
+
"""
|
|
854
|
+
Delete a Volcano job completely from the cluster.
|
|
855
|
+
"""
|
|
821
856
|
namespace, name = app_id.split(":")
|
|
822
857
|
self._custom_objects_api().delete_namespaced_custom_object(
|
|
823
858
|
group="batch.volcano.sh",
|
|
@@ -14,6 +14,7 @@ torchx/cli/argparse_util.py,sha256=kZb1ubEHDrBsmrxpySFRQCW7wmHuRHD8eAInuEZjlsI,3
|
|
|
14
14
|
torchx/cli/cmd_base.py,sha256=SdqMtqi04CEqnzcgcS35DbDbsBeMxSgEhfynfpIkMGk,790
|
|
15
15
|
torchx/cli/cmd_cancel.py,sha256=NKfOCu_44Lch9vliGSQ0Uv6BVqpUqj7Tob652TI-ua4,835
|
|
16
16
|
torchx/cli/cmd_configure.py,sha256=1kTv0qbsbV44So74plAySwWu56pQrqjhfW_kbfdC3Rw,1722
|
|
17
|
+
torchx/cli/cmd_delete.py,sha256=US1f6Jvyhz4R_0Q0a8GeNTDMrhzo8WE_ECcdOf0MjKE,835
|
|
17
18
|
torchx/cli/cmd_describe.py,sha256=E5disbHoKTsqYKp2s3DaFW9GDLCCOgdOc3pQoHKoyCs,1283
|
|
18
19
|
torchx/cli/cmd_list.py,sha256=alkS9aIaDI8lX3W8uj8Vtr3IU3G2VeCuokKSd3zOFug,1409
|
|
19
20
|
torchx/cli/cmd_log.py,sha256=v-EZYUDOcG95rEgTnrsmPJMUyxM9Mk8YFAJtUxtgViE,5475
|
|
@@ -22,7 +23,7 @@ torchx/cli/cmd_runopts.py,sha256=NWZiP8XpQjfTDJgays2c6MgL_8wxFoeDge6NstaZdKk,130
|
|
|
22
23
|
torchx/cli/cmd_status.py,sha256=22IAEmKs0qkG6kJi83u9dRX2Q-ntT7yehVx7FxtY-vQ,2114
|
|
23
24
|
torchx/cli/cmd_tracker.py,sha256=9gmOmYi-89qQRGQfSrXCTto7ve54_JKFqs_wa7oRUA8,5223
|
|
24
25
|
torchx/cli/colors.py,sha256=yLMes7e_UoLAfhxE0W6edhc58t83UHAlnCN2ANPeuXw,568
|
|
25
|
-
torchx/cli/main.py,sha256=
|
|
26
|
+
torchx/cli/main.py,sha256=1DJTmKdvPW_7hod8OUVT3Br2uwsZVEDU-2bTE0NJ0zY,3559
|
|
26
27
|
torchx/components/__init__.py,sha256=JaVte0j9Gqi6IrjZKudJ2Kr3gkdHsvlCdRTo-zYpSRo,11815
|
|
27
28
|
torchx/components/component_test_base.py,sha256=22iNSdVa_qTW3SMM30Pw5UEWlK4DZVw0C03EqYiaLOI,4150
|
|
28
29
|
torchx/components/dist.py,sha256=6DNPEvHVqEifmM8g1L7HVY169cQv_7tSfSlh3o6lTp4,14930
|
|
@@ -49,7 +50,7 @@ torchx/examples/apps/lightning/profiler.py,sha256=SSSihnwjeUTkBoz0E3qn1b-wbkfUIo
|
|
|
49
50
|
torchx/examples/apps/lightning/train.py,sha256=0wvvshGHvZowePB4LfclXwn40X7i9euM0ReETWBcPSo,6253
|
|
50
51
|
torchx/pipelines/__init__.py,sha256=2MbRVk5xwRjg-d2qPemeXpEhDsocMQumPQ53lsesZAI,606
|
|
51
52
|
torchx/runner/__init__.py,sha256=x8Sz7s_tLxPgJgvWIhK4ju9BNZU61uBFywGwDY6CqJs,315
|
|
52
|
-
torchx/runner/api.py,sha256=
|
|
53
|
+
torchx/runner/api.py,sha256=Qi12Kjkr_zpQBesbLuCtgKET8JhHnQk22MV7Czi4l1A,30832
|
|
53
54
|
torchx/runner/config.py,sha256=SaKOB50d79WaMFPWK8CC4as6UaNFaRGhrBkfajq3KC4,18311
|
|
54
55
|
torchx/runner/events/__init__.py,sha256=cMiNjnr4eUNQ2Nxxtu4nsvN5lu56b-a6nJ-ct3i7DQk,5536
|
|
55
56
|
torchx/runner/events/api.py,sha256=bvxKBAYK8LzbrBNaNLgL1x0aivtfANmWo1EMGOrSR8k,2668
|
|
@@ -58,14 +59,14 @@ torchx/runtime/__init__.py,sha256=Wxje2BryzeQneFu5r6P9JJiEKG-_C9W1CcZ_JNrKT6g,59
|
|
|
58
59
|
torchx/runtime/tracking/__init__.py,sha256=dYnAPnrXYREfPXkpHhdOFkcYIODWEbA13PdD-wLQYBo,3055
|
|
59
60
|
torchx/runtime/tracking/api.py,sha256=SmUQyUKZqG3KlAhT7CJOGqRz1O274E4m63wQeOVq3CU,5472
|
|
60
61
|
torchx/schedulers/__init__.py,sha256=FQN9boQM4mwOD3sK9LZ3GBgw-gJ7Vx4MFj6z6ATQIrc,2211
|
|
61
|
-
torchx/schedulers/api.py,sha256=
|
|
62
|
+
torchx/schedulers/api.py,sha256=PwXmqMDbwDlwpJsnaXcQSX6lf7YkyK6YsTSviMyflGY,16563
|
|
62
63
|
torchx/schedulers/aws_batch_scheduler.py,sha256=-HpjNVhSFBDxZo3cebK-3YEguB49dxoaud2gz30cAVM,29437
|
|
63
64
|
torchx/schedulers/aws_sagemaker_scheduler.py,sha256=flN8GumKE2Dz4X_foAt6Jnvt-ZVojWs6pcyrHwB0hz0,20921
|
|
64
65
|
torchx/schedulers/devices.py,sha256=RjVcu22ZRl_9OKtOtmA1A3vNXgu2qD6A9ST0L0Hsg4I,1734
|
|
65
66
|
torchx/schedulers/docker_scheduler.py,sha256=x-XHCqYnrmiW0dHfVA7hz7Fp2Qgw7fvMgRm058YOngY,16880
|
|
66
67
|
torchx/schedulers/ids.py,sha256=3E-_vwVYC-8Tv8kjuY9-W7TbOe_-Laqd8a65uIN3hQY,1798
|
|
67
68
|
torchx/schedulers/kubernetes_mcad_scheduler.py,sha256=1tuzq3OutCMdSPqg_dNmCHt_wyuSFKG0-ywLc3qITJo,42949
|
|
68
|
-
torchx/schedulers/kubernetes_scheduler.py,sha256=
|
|
69
|
+
torchx/schedulers/kubernetes_scheduler.py,sha256=PTCgDLshK5EUsZIGnTafjZ7LrO2YUjHmgR0mPL9VGFM,35672
|
|
69
70
|
torchx/schedulers/local_scheduler.py,sha256=ttnxFDy48_DSYDEW-no27OirFZOyfrjwJ2S1MwBUi74,41929
|
|
70
71
|
torchx/schedulers/lsf_scheduler.py,sha256=YS6Yel8tXJqLPxbcGz95lZG2nCi36AQXdNDyuBJePKg,17661
|
|
71
72
|
torchx/schedulers/slurm_scheduler.py,sha256=vypGaCZe61bkyNkqRlK4Iwmk_NaAUQi-DsspaWd6BZw,31873
|
|
@@ -103,9 +104,9 @@ torchx/workspace/__init__.py,sha256=FqN8AN4VhR1C_SBY10MggQvNZmyanbbuPuE-JCjkyUY,
|
|
|
103
104
|
torchx/workspace/api.py,sha256=UESQ4qgxXjsb6Y1wP9OGv2ixaFgaTs3SqghmNuOJIZM,10235
|
|
104
105
|
torchx/workspace/dir_workspace.py,sha256=npNW_IjUZm_yS5r-8hrRkH46ndDd9a_eApT64m1S1T4,2268
|
|
105
106
|
torchx/workspace/docker_workspace.py,sha256=PFu2KQNVC-0p2aKJ-W_BKA9ZOmXdCY2ABEkCExp3udQ,10269
|
|
106
|
-
torchx_nightly-2025.11.
|
|
107
|
-
torchx_nightly-2025.11.
|
|
108
|
-
torchx_nightly-2025.11.
|
|
109
|
-
torchx_nightly-2025.11.
|
|
110
|
-
torchx_nightly-2025.11.
|
|
111
|
-
torchx_nightly-2025.11.
|
|
107
|
+
torchx_nightly-2025.11.20.dist-info/licenses/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
|
|
108
|
+
torchx_nightly-2025.11.20.dist-info/METADATA,sha256=yeYyvVFSNXDwzGTXtDktxEfyAHvepkZeM7uzQbSoqjk,5324
|
|
109
|
+
torchx_nightly-2025.11.20.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
110
|
+
torchx_nightly-2025.11.20.dist-info/entry_points.txt,sha256=T328AMXeKI3JZnnxfkEew2ZcMN1oQDtkXjMz7lkV-P4,169
|
|
111
|
+
torchx_nightly-2025.11.20.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
|
|
112
|
+
torchx_nightly-2025.11.20.dist-info/RECORD,,
|
|
File without changes
|
{torchx_nightly-2025.11.17.dist-info → torchx_nightly-2025.11.20.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{torchx_nightly-2025.11.17.dist-info → torchx_nightly-2025.11.20.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|