torchx-nightly 2025.11.17__py3-none-any.whl → 2025.11.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchx-nightly might be problematic. Click here for more details.

@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # pyre-strict
9
+
10
+ import argparse
11
+ import logging
12
+
13
+ from torchx.cli.cmd_base import SubCommand
14
+ from torchx.runner import get_runner
15
+
16
+ logger: logging.Logger = logging.getLogger(__name__)
17
+
18
+
19
+ class CmdDelete(SubCommand):
20
+ def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
21
+ subparser.add_argument(
22
+ "app_handle",
23
+ type=str,
24
+ help="torchx app handle (e.g. local://session-name/app-id)",
25
+ )
26
+
27
+ def run(self, args: argparse.Namespace) -> None:
28
+ app_handle = args.app_handle
29
+ runner = get_runner()
30
+ runner.delete(app_handle)
torchx/cli/main.py CHANGED
@@ -16,6 +16,7 @@ import torchx
16
16
  from torchx.cli.cmd_base import SubCommand
17
17
  from torchx.cli.cmd_cancel import CmdCancel
18
18
  from torchx.cli.cmd_configure import CmdConfigure
19
+ from torchx.cli.cmd_delete import CmdDelete
19
20
  from torchx.cli.cmd_describe import CmdDescribe
20
21
  from torchx.cli.cmd_list import CmdList
21
22
  from torchx.cli.cmd_log import CmdLog
@@ -37,6 +38,7 @@ def get_default_sub_cmds() -> Dict[str, SubCommand]:
37
38
  "builtins": CmdBuiltins(),
38
39
  "cancel": CmdCancel(),
39
40
  "configure": CmdConfigure(),
41
+ "delete": CmdDelete(),
40
42
  "describe": CmdDescribe(),
41
43
  "list": CmdList(),
42
44
  "log": CmdLog(),
torchx/runner/api.py CHANGED
@@ -587,6 +587,16 @@ class Runner:
587
587
  if status is not None and not status.is_terminal():
588
588
  scheduler.cancel(app_id)
589
589
 
590
+ def delete(self, app_handle: AppHandle) -> None:
591
+ """
592
+ Deletes the application from the scheduler.
593
+ """
594
+ scheduler, scheduler_backend, app_id = self._scheduler_app_id(app_handle)
595
+ with log_event("delete", scheduler_backend, app_id):
596
+ status = self.status(app_handle)
597
+ if status is not None:
598
+ scheduler.delete(app_id)
599
+
590
600
  def stop(self, app_handle: AppHandle) -> None:
591
601
  """
592
602
  See method ``cancel``.
torchx/schedulers/api.py CHANGED
@@ -264,6 +264,46 @@ class Scheduler(abc.ABC, Generic[T, A, D]):
264
264
  # do nothing if the app does not exist
265
265
  return
266
266
 
267
+ def delete(self, app_id: str) -> None:
268
+ """
269
+ Deletes the job information for the specified ``app_id`` from the
270
+ scheduler's data-plane. Basically "deep-purging" the job from the
271
+ scheduler's data-plane. Calling this API on a "live" job (e.g in a
272
+ non-terminal status such as PENDING or RUNNING) cancels the job.
273
+
274
+ Note that this API is only relevant for schedulers for which its
275
+ data-plane persistently stores the "JobDefinition" (which is often
276
+ versioned). AWS Batch and Kubernetes are examples of such schedulers.
277
+ On these schedulers, a finished job may fall out of the data-plane
278
+ (e.g. really old finished jobs get deleted) but the JobDefinition is
279
+ typically permanently stored. In this case, calling
280
+ :py:meth:`~cancel` would not delete the job definition.
281
+
282
+ In schedulers with no such feature (e.g. SLURM)
283
+ :py:meth:`~delete` is the same as :py:meth:`~cancel`, which is the
284
+ default implementation. Hence implementors of such schedulers need not
285
+ override this method.
286
+
287
+ .. warning::
288
+ Calling :py:meth:`~delete` on an ``app_id`` that has fallen out of
289
+ the scheduler's data-plane does nothing. The user is responsible for
290
+ manually tracking down and cleaning up any dangling resources related
291
+ to the job.
292
+ """
293
+ if self.exists(app_id):
294
+ self._delete_existing(app_id)
295
+
296
+ def _delete_existing(self, app_id: str) -> None:
297
+ """
298
+ Deletes the job information for the specified ``app_id`` from the
299
+ scheduler's data-plane. This method will only be called on an
300
+ application that exists.
301
+
302
+ The default implementation calls :py:meth:`~_cancel_existing` which is
303
+ appropriate for schedulers without persistent job definitions.
304
+ """
305
+ self._cancel_existing(app_id)
306
+
267
307
  def log_iter(
268
308
  self,
269
309
  app_id: str,
@@ -622,6 +622,16 @@ class KubernetesScheduler(
622
622
  $ torchx status kubernetes://torchx_user/1234
623
623
  ...
624
624
 
625
+ **Cancellation**
626
+
627
+ Canceling a job aborts it while preserving the job spec for inspection
628
+ and cloning via kubectl apply. Use the delete command to remove the job entirely:
629
+
630
+ .. code-block:: bash
631
+
632
+ $ torchx cancel kubernetes://namespace/jobname # abort, preserves spec
633
+ $ torchx delete kubernetes://namespace/jobname # delete completely
634
+
625
635
  **Config Options**
626
636
 
627
637
  .. runopts::
@@ -818,6 +828,31 @@ class KubernetesScheduler(
818
828
  pass
819
829
 
820
830
  def _cancel_existing(self, app_id: str) -> None:
831
+ """
832
+ Abort a Volcano job while preserving the spec for inspection.
833
+ """
834
+ namespace, name = app_id.split(":")
835
+ vcjob = self._custom_objects_api().get_namespaced_custom_object(
836
+ group="batch.volcano.sh",
837
+ version="v1alpha1",
838
+ namespace=namespace,
839
+ plural="jobs",
840
+ name=name,
841
+ )
842
+ vcjob["status"]["state"]["phase"] = "Aborted"
843
+ self._custom_objects_api().replace_namespaced_custom_object_status(
844
+ group="batch.volcano.sh",
845
+ version="v1alpha1",
846
+ namespace=namespace,
847
+ plural="jobs",
848
+ name=name,
849
+ body=vcjob,
850
+ )
851
+
852
+ def _delete_existing(self, app_id: str) -> None:
853
+ """
854
+ Delete a Volcano job completely from the cluster.
855
+ """
821
856
  namespace, name = app_id.split(":")
822
857
  self._custom_objects_api().delete_namespaced_custom_object(
823
858
  group="batch.volcano.sh",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: torchx-nightly
3
- Version: 2025.11.17
3
+ Version: 2025.11.20
4
4
  Summary: TorchX SDK and Components
5
5
  Home-page: https://github.com/meta-pytorch/torchx
6
6
  Author: TorchX Devs
@@ -14,6 +14,7 @@ torchx/cli/argparse_util.py,sha256=kZb1ubEHDrBsmrxpySFRQCW7wmHuRHD8eAInuEZjlsI,3
14
14
  torchx/cli/cmd_base.py,sha256=SdqMtqi04CEqnzcgcS35DbDbsBeMxSgEhfynfpIkMGk,790
15
15
  torchx/cli/cmd_cancel.py,sha256=NKfOCu_44Lch9vliGSQ0Uv6BVqpUqj7Tob652TI-ua4,835
16
16
  torchx/cli/cmd_configure.py,sha256=1kTv0qbsbV44So74plAySwWu56pQrqjhfW_kbfdC3Rw,1722
17
+ torchx/cli/cmd_delete.py,sha256=US1f6Jvyhz4R_0Q0a8GeNTDMrhzo8WE_ECcdOf0MjKE,835
17
18
  torchx/cli/cmd_describe.py,sha256=E5disbHoKTsqYKp2s3DaFW9GDLCCOgdOc3pQoHKoyCs,1283
18
19
  torchx/cli/cmd_list.py,sha256=alkS9aIaDI8lX3W8uj8Vtr3IU3G2VeCuokKSd3zOFug,1409
19
20
  torchx/cli/cmd_log.py,sha256=v-EZYUDOcG95rEgTnrsmPJMUyxM9Mk8YFAJtUxtgViE,5475
@@ -22,7 +23,7 @@ torchx/cli/cmd_runopts.py,sha256=NWZiP8XpQjfTDJgays2c6MgL_8wxFoeDge6NstaZdKk,130
22
23
  torchx/cli/cmd_status.py,sha256=22IAEmKs0qkG6kJi83u9dRX2Q-ntT7yehVx7FxtY-vQ,2114
23
24
  torchx/cli/cmd_tracker.py,sha256=9gmOmYi-89qQRGQfSrXCTto7ve54_JKFqs_wa7oRUA8,5223
24
25
  torchx/cli/colors.py,sha256=yLMes7e_UoLAfhxE0W6edhc58t83UHAlnCN2ANPeuXw,568
25
- torchx/cli/main.py,sha256=1Jf2cnO6Y2W69Adt88avmNPVrL6ZR4Hkff6GVB4293k,3484
26
+ torchx/cli/main.py,sha256=1DJTmKdvPW_7hod8OUVT3Br2uwsZVEDU-2bTE0NJ0zY,3559
26
27
  torchx/components/__init__.py,sha256=JaVte0j9Gqi6IrjZKudJ2Kr3gkdHsvlCdRTo-zYpSRo,11815
27
28
  torchx/components/component_test_base.py,sha256=22iNSdVa_qTW3SMM30Pw5UEWlK4DZVw0C03EqYiaLOI,4150
28
29
  torchx/components/dist.py,sha256=6DNPEvHVqEifmM8g1L7HVY169cQv_7tSfSlh3o6lTp4,14930
@@ -49,7 +50,7 @@ torchx/examples/apps/lightning/profiler.py,sha256=SSSihnwjeUTkBoz0E3qn1b-wbkfUIo
49
50
  torchx/examples/apps/lightning/train.py,sha256=0wvvshGHvZowePB4LfclXwn40X7i9euM0ReETWBcPSo,6253
50
51
  torchx/pipelines/__init__.py,sha256=2MbRVk5xwRjg-d2qPemeXpEhDsocMQumPQ53lsesZAI,606
51
52
  torchx/runner/__init__.py,sha256=x8Sz7s_tLxPgJgvWIhK4ju9BNZU61uBFywGwDY6CqJs,315
52
- torchx/runner/api.py,sha256=xQpgiUz9jCX4zZriubbWk4tTJRe7MxNJQK64g0o7KQ8,30438
53
+ torchx/runner/api.py,sha256=Qi12Kjkr_zpQBesbLuCtgKET8JhHnQk22MV7Czi4l1A,30832
53
54
  torchx/runner/config.py,sha256=SaKOB50d79WaMFPWK8CC4as6UaNFaRGhrBkfajq3KC4,18311
54
55
  torchx/runner/events/__init__.py,sha256=cMiNjnr4eUNQ2Nxxtu4nsvN5lu56b-a6nJ-ct3i7DQk,5536
55
56
  torchx/runner/events/api.py,sha256=bvxKBAYK8LzbrBNaNLgL1x0aivtfANmWo1EMGOrSR8k,2668
@@ -58,14 +59,14 @@ torchx/runtime/__init__.py,sha256=Wxje2BryzeQneFu5r6P9JJiEKG-_C9W1CcZ_JNrKT6g,59
58
59
  torchx/runtime/tracking/__init__.py,sha256=dYnAPnrXYREfPXkpHhdOFkcYIODWEbA13PdD-wLQYBo,3055
59
60
  torchx/runtime/tracking/api.py,sha256=SmUQyUKZqG3KlAhT7CJOGqRz1O274E4m63wQeOVq3CU,5472
60
61
  torchx/schedulers/__init__.py,sha256=FQN9boQM4mwOD3sK9LZ3GBgw-gJ7Vx4MFj6z6ATQIrc,2211
61
- torchx/schedulers/api.py,sha256=smoUv1ocfqsBRmesXbz9i1F86zBOixZ8QHxYmI_MzgQ,14649
62
+ torchx/schedulers/api.py,sha256=PwXmqMDbwDlwpJsnaXcQSX6lf7YkyK6YsTSviMyflGY,16563
62
63
  torchx/schedulers/aws_batch_scheduler.py,sha256=-HpjNVhSFBDxZo3cebK-3YEguB49dxoaud2gz30cAVM,29437
63
64
  torchx/schedulers/aws_sagemaker_scheduler.py,sha256=flN8GumKE2Dz4X_foAt6Jnvt-ZVojWs6pcyrHwB0hz0,20921
64
65
  torchx/schedulers/devices.py,sha256=RjVcu22ZRl_9OKtOtmA1A3vNXgu2qD6A9ST0L0Hsg4I,1734
65
66
  torchx/schedulers/docker_scheduler.py,sha256=x-XHCqYnrmiW0dHfVA7hz7Fp2Qgw7fvMgRm058YOngY,16880
66
67
  torchx/schedulers/ids.py,sha256=3E-_vwVYC-8Tv8kjuY9-W7TbOe_-Laqd8a65uIN3hQY,1798
67
68
  torchx/schedulers/kubernetes_mcad_scheduler.py,sha256=1tuzq3OutCMdSPqg_dNmCHt_wyuSFKG0-ywLc3qITJo,42949
68
- torchx/schedulers/kubernetes_scheduler.py,sha256=86ny9XXt9tdeV6Y7AlVFQ6vhxlviOdNeZUz4gOzU3cc,34478
69
+ torchx/schedulers/kubernetes_scheduler.py,sha256=PTCgDLshK5EUsZIGnTafjZ7LrO2YUjHmgR0mPL9VGFM,35672
69
70
  torchx/schedulers/local_scheduler.py,sha256=ttnxFDy48_DSYDEW-no27OirFZOyfrjwJ2S1MwBUi74,41929
70
71
  torchx/schedulers/lsf_scheduler.py,sha256=YS6Yel8tXJqLPxbcGz95lZG2nCi36AQXdNDyuBJePKg,17661
71
72
  torchx/schedulers/slurm_scheduler.py,sha256=vypGaCZe61bkyNkqRlK4Iwmk_NaAUQi-DsspaWd6BZw,31873
@@ -103,9 +104,9 @@ torchx/workspace/__init__.py,sha256=FqN8AN4VhR1C_SBY10MggQvNZmyanbbuPuE-JCjkyUY,
103
104
  torchx/workspace/api.py,sha256=UESQ4qgxXjsb6Y1wP9OGv2ixaFgaTs3SqghmNuOJIZM,10235
104
105
  torchx/workspace/dir_workspace.py,sha256=npNW_IjUZm_yS5r-8hrRkH46ndDd9a_eApT64m1S1T4,2268
105
106
  torchx/workspace/docker_workspace.py,sha256=PFu2KQNVC-0p2aKJ-W_BKA9ZOmXdCY2ABEkCExp3udQ,10269
106
- torchx_nightly-2025.11.17.dist-info/licenses/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
107
- torchx_nightly-2025.11.17.dist-info/METADATA,sha256=iim6P-wiEztRPHgcWaQCa9_f0GsU-GyxHBILL2cyVJg,5324
108
- torchx_nightly-2025.11.17.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
109
- torchx_nightly-2025.11.17.dist-info/entry_points.txt,sha256=T328AMXeKI3JZnnxfkEew2ZcMN1oQDtkXjMz7lkV-P4,169
110
- torchx_nightly-2025.11.17.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
111
- torchx_nightly-2025.11.17.dist-info/RECORD,,
107
+ torchx_nightly-2025.11.20.dist-info/licenses/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
108
+ torchx_nightly-2025.11.20.dist-info/METADATA,sha256=yeYyvVFSNXDwzGTXtDktxEfyAHvepkZeM7uzQbSoqjk,5324
109
+ torchx_nightly-2025.11.20.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
110
+ torchx_nightly-2025.11.20.dist-info/entry_points.txt,sha256=T328AMXeKI3JZnnxfkEew2ZcMN1oQDtkXjMz7lkV-P4,169
111
+ torchx_nightly-2025.11.20.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
112
+ torchx_nightly-2025.11.20.dist-info/RECORD,,