torchx-nightly 2025.10.16__py3-none-any.whl → 2025.11.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchx-nightly might be problematic. Click here for more details.

torchx/_version.py ADDED
@@ -0,0 +1,8 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # pyre-strict
8
+ BASE_VERSION = "0.8.0dev0"
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # pyre-strict
9
+
10
+ import argparse
11
+ import logging
12
+
13
+ from torchx.cli.cmd_base import SubCommand
14
+ from torchx.runner import get_runner
15
+
16
+ logger: logging.Logger = logging.getLogger(__name__)
17
+
18
+
19
+ class CmdDelete(SubCommand):
20
+ def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
21
+ subparser.add_argument(
22
+ "app_handle",
23
+ type=str,
24
+ help="torchx app handle (e.g. local://session-name/app-id)",
25
+ )
26
+
27
+ def run(self, args: argparse.Namespace) -> None:
28
+ app_handle = args.app_handle
29
+ runner = get_runner()
30
+ runner.delete(app_handle)
torchx/cli/main.py CHANGED
@@ -16,6 +16,7 @@ import torchx
16
16
  from torchx.cli.cmd_base import SubCommand
17
17
  from torchx.cli.cmd_cancel import CmdCancel
18
18
  from torchx.cli.cmd_configure import CmdConfigure
19
+ from torchx.cli.cmd_delete import CmdDelete
19
20
  from torchx.cli.cmd_describe import CmdDescribe
20
21
  from torchx.cli.cmd_list import CmdList
21
22
  from torchx.cli.cmd_log import CmdLog
@@ -37,6 +38,7 @@ def get_default_sub_cmds() -> Dict[str, SubCommand]:
37
38
  "builtins": CmdBuiltins(),
38
39
  "cancel": CmdCancel(),
39
40
  "configure": CmdConfigure(),
41
+ "delete": CmdDelete(),
40
42
  "describe": CmdDescribe(),
41
43
  "list": CmdList(),
42
44
  "log": CmdLog(),
torchx/runner/api.py CHANGED
@@ -420,52 +420,44 @@ class Runner:
420
420
  scheduler,
421
421
  runcfg=json.dumps(cfg) if cfg else None,
422
422
  workspace=str(workspace),
423
- ):
423
+ ) as ctx:
424
424
  sched = self._scheduler(scheduler)
425
425
  resolved_cfg = sched.run_opts().resolve(cfg)
426
426
 
427
427
  sched._pre_build_validate(app, scheduler, resolved_cfg)
428
428
 
429
429
  if isinstance(sched, WorkspaceMixin):
430
- for i, role in enumerate(app.roles):
431
- role_workspace = role.workspace
432
-
433
- if i == 0 and workspace:
434
- # NOTE: torchx originally took workspace as a runner arg and only applied the workspace to role[0]
435
- # later, torchx added support for the workspace attr in Role
436
- # for BC, give precedence to the workspace argument over the workspace attr for role[0]
437
- if role_workspace:
438
- logger.info(
439
- f"Using workspace={workspace} over role[{i}].workspace={role_workspace} for role[{i}]={role.name}."
440
- " To use the role's workspace attr pass: --workspace='' from CLI or workspace=None programmatically." # noqa: B950
441
- )
442
- role_workspace = workspace
443
-
444
- if role_workspace:
445
- old_img = role.image
430
+ if workspace:
431
+ # NOTE: torchx originally took workspace as a runner arg and only applied the workspace to role[0]
432
+ # later, torchx added support for the workspace attr in Role
433
+ # for BC, give precedence to the workspace argument over the workspace attr for role[0]
434
+ if app.roles[0].workspace:
446
435
  logger.info(
447
- f"Checking for changes in workspace `{role_workspace}` for role[{i}]={role.name}..."
448
- )
449
- # TODO kiuk@ once we deprecate the `workspace` argument in runner APIs we can simplify the signature of
450
- # build_workspace_and_update_role2() to just taking the role and resolved_cfg
451
- sched.build_workspace_and_update_role2(
452
- role, role_workspace, resolved_cfg
436
+ "Overriding role[%d] (%s) workspace to `%s`"
437
+ "To use the role's workspace attr pass: --workspace='' from CLI or workspace=None programmatically.",
438
+ 0,
439
+ role.name,
440
+ str(app.roles[0].workspace),
453
441
  )
442
+ app.roles[0].workspace = (
443
+ Workspace.from_str(workspace)
444
+ if isinstance(workspace, str)
445
+ else workspace
446
+ )
454
447
 
455
- if old_img != role.image:
456
- logger.info(
457
- f"Built new image `{role.image}` based on original image `{old_img}`"
458
- f" and changes in workspace `{role_workspace}` for role[{i}]={role.name}."
459
- )
460
- else:
461
- logger.info(
462
- f"Reusing original image `{old_img}` for role[{i}]={role.name}."
463
- " Either a patch was built or no changes to workspace was detected."
464
- )
448
+ sched.build_workspaces(app.roles, resolved_cfg)
465
449
 
466
450
  sched._validate(app, scheduler, resolved_cfg)
467
451
  dryrun_info = sched.submit_dryrun(app, resolved_cfg)
468
452
  dryrun_info._scheduler = scheduler
453
+
454
+ event = ctx._torchx_event
455
+ event.scheduler = scheduler
456
+ event.runcfg = json.dumps(cfg) if cfg else None
457
+ event.app_id = app.name
458
+ event.app_image = none_throws(dryrun_info._app).roles[0].image
459
+ event.app_metadata = app.metadata
460
+
469
461
  return dryrun_info
470
462
 
471
463
  def scheduler_run_opts(self, scheduler: str) -> runopts:
@@ -595,6 +587,16 @@ class Runner:
595
587
  if status is not None and not status.is_terminal():
596
588
  scheduler.cancel(app_id)
597
589
 
590
+ def delete(self, app_handle: AppHandle) -> None:
591
+ """
592
+ Deletes the application from the scheduler.
593
+ """
594
+ scheduler, scheduler_backend, app_id = self._scheduler_app_id(app_handle)
595
+ with log_event("delete", scheduler_backend, app_id):
596
+ status = self.status(app_handle)
597
+ if status is not None:
598
+ scheduler.delete(app_id)
599
+
598
600
  def stop(self, app_handle: AppHandle) -> None:
599
601
  """
600
602
  See method ``cancel``.
torchx/schedulers/api.py CHANGED
@@ -131,7 +131,7 @@ class Scheduler(abc.ABC, Generic[T, A, D]):
131
131
  self,
132
132
  app: A,
133
133
  cfg: T,
134
- workspace: Optional[Union[Workspace, str]] = None,
134
+ workspace: str | Workspace | None = None,
135
135
  ) -> str:
136
136
  """
137
137
  Submits the application to be run by the scheduler.
@@ -145,7 +145,12 @@ class Scheduler(abc.ABC, Generic[T, A, D]):
145
145
  resolved_cfg = self.run_opts().resolve(cfg)
146
146
  if workspace:
147
147
  assert isinstance(self, WorkspaceMixin)
148
- self.build_workspace_and_update_role2(app.roles[0], workspace, resolved_cfg)
148
+
149
+ if isinstance(workspace, str):
150
+ workspace = Workspace.from_str(workspace)
151
+
152
+ app.roles[0].workspace = workspace
153
+ self.build_workspaces(app.roles, resolved_cfg)
149
154
 
150
155
  # pyre-fixme: submit_dryrun takes Generic type for resolved_cfg
151
156
  dryrun_info = self.submit_dryrun(app, resolved_cfg)
@@ -259,6 +264,46 @@ class Scheduler(abc.ABC, Generic[T, A, D]):
259
264
  # do nothing if the app does not exist
260
265
  return
261
266
 
267
+ def delete(self, app_id: str) -> None:
268
+ """
269
+ Deletes the job information for the specified ``app_id`` from the
270
+ scheduler's data-plane. Basically "deep-purging" the job from the
271
+ scheduler's data-plane. Calling this API on a "live" job (e.g in a
272
+ non-terminal status such as PENDING or RUNNING) cancels the job.
273
+
274
+ Note that this API is only relevant for schedulers for which its
275
+ data-plane persistently stores the "JobDefinition" (which is often
276
+ versioned). AWS Batch and Kubernetes are examples of such schedulers.
277
+ On these schedulers, a finished job may fall out of the data-plane
278
+ (e.g. really old finished jobs get deleted) but the JobDefinition is
279
+ typically permanently stored. In this case, calling
280
+ :py:meth:`~cancel` would not delete the job definition.
281
+
282
+ In schedulers with no such feature (e.g. SLURM)
283
+ :py:meth:`~delete` is the same as :py:meth:`~cancel`, which is the
284
+ default implementation. Hence implementors of such schedulers need not
285
+ override this method.
286
+
287
+ .. warning::
288
+ Calling :py:meth:`~delete` on an ``app_id`` that has fallen out of
289
+ the scheduler's data-plane does nothing. The user is responsible for
290
+ manually tracking down and cleaning up any dangling resources related
291
+ to the job.
292
+ """
293
+ if self.exists(app_id):
294
+ self._delete_existing(app_id)
295
+
296
+ def _delete_existing(self, app_id: str) -> None:
297
+ """
298
+ Deletes the job information for the specified ``app_id`` from the
299
+ scheduler's data-plane. This method will only be called on an
300
+ application that exists.
301
+
302
+ The default implementation calls :py:meth:`~_cancel_existing` which is
303
+ appropriate for schedulers without persistent job definitions.
304
+ """
305
+ self._cancel_existing(app_id)
306
+
262
307
  def log_iter(
263
308
  self,
264
309
  app_id: str,
@@ -27,10 +27,81 @@ Install Volcano:
27
27
  See the
28
28
  `Volcano Quickstart <https://github.com/volcano-sh/volcano>`_
29
29
  for more information.
30
+
31
+ Pod Overlay
32
+ ===========
33
+
34
+ You can overlay arbitrary Kubernetes Pod fields on generated pods by setting
35
+ the ``kubernetes`` metadata on your role. The value can be:
36
+
37
+ - A dict with the overlay structure
38
+ - A resource URI pointing to a YAML file (e.g. ``file://``, ``s3://``, ``gs://``)
39
+
40
+ Merge semantics:
41
+ - **dict**: recursive merge (upsert)
42
+ - **list**: append by default, replace if tuple (Python) or ``!!python/tuple`` tag (YAML)
43
+ - **primitives**: replace
44
+
45
+ .. code:: python
46
+
47
+ from torchx.specs import Role
48
+
49
+ # Dict overlay - lists append, tuples replace
50
+ role = Role(
51
+ name="trainer",
52
+ image="my-image:latest",
53
+ entrypoint="train.py",
54
+ metadata={
55
+ "kubernetes": {
56
+ "spec": {
57
+ "nodeSelector": {"gpu": "true"},
58
+ "tolerations": [{"key": "nvidia.com/gpu", "operator": "Exists"}], # appends
59
+ "volumes": ({"name": "my-volume", "emptyDir": {}},) # replaces
60
+ }
61
+ }
62
+ }
63
+ )
64
+
65
+ # File URI overlay
66
+ role = Role(
67
+ name="trainer",
68
+ image="my-image:latest",
69
+ entrypoint="train.py",
70
+ metadata={
71
+ "kubernetes": "file:///path/to/pod_overlay.yaml"
72
+ }
73
+ )
74
+
75
+ CLI usage with builtin components:
76
+
77
+ .. code:: bash
78
+
79
+ $ torchx run --scheduler kubernetes dist.ddp \\
80
+ --metadata kubernetes=file:///path/to/pod_overlay.yaml \\
81
+ --script train.py
82
+
83
+ Example ``pod_overlay.yaml``:
84
+
85
+ .. code:: yaml
86
+
87
+ spec:
88
+ nodeSelector:
89
+ node.kubernetes.io/instance-type: p4d.24xlarge
90
+ tolerations:
91
+ - key: nvidia.com/gpu
92
+ operator: Exists
93
+ effect: NoSchedule
94
+ volumes: !!python/tuple
95
+ - name: my-volume
96
+ emptyDir: {}
97
+
98
+ The overlay is deep-merged with the generated pod, preserving existing fields
99
+ and adding or overriding specified ones.
30
100
  """
31
101
 
32
102
  import json
33
103
  import logging
104
+ import re
34
105
  import warnings
35
106
  from dataclasses import dataclass
36
107
  from datetime import datetime
@@ -45,6 +116,7 @@ from typing import (
45
116
  Tuple,
46
117
  TYPE_CHECKING,
47
118
  TypedDict,
119
+ Union,
48
120
  )
49
121
 
50
122
  import torchx
@@ -97,6 +169,40 @@ logger: logging.Logger = logging.getLogger(__name__)
97
169
  RESERVED_MILLICPU = 100
98
170
  RESERVED_MEMMB = 1024
99
171
 
172
+
173
+ def _apply_pod_overlay(pod: "V1Pod", overlay: Dict[str, Any]) -> None:
174
+ """Apply overlay dict to V1Pod object, merging nested fields.
175
+
176
+ Merge semantics:
177
+ - dict: upsert (recursive merge)
178
+ - list: append by default, replace if tuple
179
+ - primitives: replace
180
+ """
181
+ from kubernetes import client
182
+
183
+ api = client.ApiClient()
184
+ pod_dict = api.sanitize_for_serialization(pod)
185
+
186
+ def deep_merge(base: Dict[str, Any], overlay: Dict[str, Any]) -> None:
187
+ for key, value in overlay.items():
188
+ if isinstance(value, dict) and key in base and isinstance(base[key], dict):
189
+ deep_merge(base[key], value)
190
+ elif isinstance(value, tuple):
191
+ base[key] = list(value)
192
+ elif (
193
+ isinstance(value, list) and key in base and isinstance(base[key], list)
194
+ ):
195
+ base[key].extend(value)
196
+ else:
197
+ base[key] = value
198
+
199
+ deep_merge(pod_dict, overlay)
200
+
201
+ merged_pod = api._ApiClient__deserialize(pod_dict, "V1Pod")
202
+ pod.spec = merged_pod.spec
203
+ pod.metadata = merged_pod.metadata
204
+
205
+
100
206
  RETRY_POLICIES: Mapping[str, Iterable[Mapping[str, str]]] = {
101
207
  RetryPolicy.REPLICA: [],
102
208
  RetryPolicy.APPLICATION: [
@@ -369,7 +475,7 @@ def app_to_resource(
369
475
  queue: str,
370
476
  service_account: Optional[str],
371
477
  priority_class: Optional[str] = None,
372
- ) -> Dict[str, object]:
478
+ ) -> Dict[str, Any]:
373
479
  """
374
480
  app_to_resource creates a volcano job kubernetes resource definition from
375
481
  the provided AppDef. The resource definition can be used to launch the
@@ -402,6 +508,17 @@ def app_to_resource(
402
508
  replica_role.env["TORCHX_IMAGE"] = replica_role.image
403
509
 
404
510
  pod = role_to_pod(name, replica_role, service_account)
511
+ if k8s_metadata := role.metadata.get("kubernetes"):
512
+ if isinstance(k8s_metadata, str):
513
+ import fsspec
514
+
515
+ with fsspec.open(k8s_metadata, "r") as f:
516
+ k8s_metadata = yaml.unsafe_load(f)
517
+ elif not isinstance(k8s_metadata, dict):
518
+ raise ValueError(
519
+ f"metadata['kubernetes'] must be a dict or resource URI, got {type(k8s_metadata)}"
520
+ )
521
+ _apply_pod_overlay(pod, k8s_metadata)
405
522
  pod.metadata.labels.update(
406
523
  pod_labels(
407
524
  app=app,
@@ -444,7 +561,7 @@ does NOT support retries correctly. More info: https://github.com/volcano-sh/vol
444
561
  if priority_class is not None:
445
562
  job_spec["priorityClassName"] = priority_class
446
563
 
447
- resource: Dict[str, object] = {
564
+ resource: Dict[str, Any] = {
448
565
  "apiVersion": "batch.volcano.sh/v1alpha1",
449
566
  "kind": "Job",
450
567
  "metadata": {"name": f"{unique_app_id}"},
@@ -456,7 +573,7 @@ does NOT support retries correctly. More info: https://github.com/volcano-sh/vol
456
573
  @dataclass
457
574
  class KubernetesJob:
458
575
  images_to_push: Dict[str, Tuple[str, str]]
459
- resource: Dict[str, object]
576
+ resource: Dict[str, Any]
460
577
 
461
578
  def __str__(self) -> str:
462
579
  return yaml.dump(sanitize_for_serialization(self.resource))
@@ -471,6 +588,7 @@ class KubernetesOpts(TypedDict, total=False):
471
588
  image_repo: Optional[str]
472
589
  service_account: Optional[str]
473
590
  priority_class: Optional[str]
591
+ validate_spec: Optional[bool]
474
592
 
475
593
 
476
594
  class KubernetesScheduler(
@@ -504,6 +622,16 @@ class KubernetesScheduler(
504
622
  $ torchx status kubernetes://torchx_user/1234
505
623
  ...
506
624
 
625
+ **Cancellation**
626
+
627
+ Canceling a job aborts it while preserving the job spec for inspection
628
+ and cloning via kubectl apply. Use the delete command to remove the job entirely:
629
+
630
+ .. code-block:: bash
631
+
632
+ $ torchx cancel kubernetes://namespace/jobname # abort, preserves spec
633
+ $ torchx delete kubernetes://namespace/jobname # delete completely
634
+
507
635
  **Config Options**
508
636
 
509
637
  .. runopts::
@@ -636,7 +764,7 @@ class KubernetesScheduler(
636
764
  else:
637
765
  raise
638
766
 
639
- return f'{namespace}:{resp["metadata"]["name"]}'
767
+ return f"{namespace}:{resp['metadata']['name']}"
640
768
 
641
769
  def _submit_dryrun(
642
770
  self, app: AppDef, cfg: KubernetesOpts
@@ -659,6 +787,36 @@ class KubernetesScheduler(
659
787
  ), "priority_class must be a str"
660
788
 
661
789
  resource = app_to_resource(app, queue, service_account, priority_class)
790
+
791
+ if cfg.get("validate_spec"):
792
+ try:
793
+ self._custom_objects_api().create_namespaced_custom_object(
794
+ group="batch.volcano.sh",
795
+ version="v1alpha1",
796
+ namespace=cfg.get("namespace") or "default",
797
+ plural="jobs",
798
+ body=resource,
799
+ dry_run="All",
800
+ )
801
+ except Exception as e:
802
+ from kubernetes.client.rest import ApiException
803
+
804
+ if isinstance(e, ApiException):
805
+ raise ValueError(f"Invalid job spec: {e.reason}") from e
806
+ raise
807
+
808
+ job_name = resource["metadata"]["name"]
809
+ for task in resource["spec"]["tasks"]:
810
+ task_name = task["name"]
811
+ replicas = task.get("replicas", 1)
812
+ max_index = replicas - 1
813
+ pod_name = f"{job_name}-{task_name}-{max_index}"
814
+ if len(pod_name) > 63:
815
+ raise ValueError(
816
+ f"Pod name '{pod_name}' ({len(pod_name)} chars) exceeds 63 character limit. "
817
+ f"Shorten app.name or role names"
818
+ )
819
+
662
820
  req = KubernetesJob(
663
821
  resource=resource,
664
822
  images_to_push=images_to_push,
@@ -670,6 +828,31 @@ class KubernetesScheduler(
670
828
  pass
671
829
 
672
830
  def _cancel_existing(self, app_id: str) -> None:
831
+ """
832
+ Abort a Volcano job while preserving the spec for inspection.
833
+ """
834
+ namespace, name = app_id.split(":")
835
+ vcjob = self._custom_objects_api().get_namespaced_custom_object(
836
+ group="batch.volcano.sh",
837
+ version="v1alpha1",
838
+ namespace=namespace,
839
+ plural="jobs",
840
+ name=name,
841
+ )
842
+ vcjob["status"]["state"]["phase"] = "Aborted"
843
+ self._custom_objects_api().replace_namespaced_custom_object_status(
844
+ group="batch.volcano.sh",
845
+ version="v1alpha1",
846
+ namespace=namespace,
847
+ plural="jobs",
848
+ name=name,
849
+ body=vcjob,
850
+ )
851
+
852
+ def _delete_existing(self, app_id: str) -> None:
853
+ """
854
+ Delete a Volcano job completely from the cluster.
855
+ """
673
856
  namespace, name = app_id.split(":")
674
857
  self._custom_objects_api().delete_namespaced_custom_object(
675
858
  group="batch.volcano.sh",
@@ -703,19 +886,32 @@ class KubernetesScheduler(
703
886
  type_=str,
704
887
  help="The name of the PriorityClass to set on the job specs",
705
888
  )
889
+ opts.add(
890
+ "validate_spec",
891
+ type_=bool,
892
+ help="Validate job spec using Kubernetes API dry-run before submission",
893
+ default=True,
894
+ )
706
895
  return opts
707
896
 
708
897
  def describe(self, app_id: str) -> Optional[DescribeAppResponse]:
898
+ from kubernetes.client.rest import ApiException
899
+
709
900
  namespace, name = app_id.split(":")
710
901
  roles = {}
711
902
  roles_statuses = {}
712
- resp = self._custom_objects_api().get_namespaced_custom_object_status(
713
- group="batch.volcano.sh",
714
- version="v1alpha1",
715
- namespace=namespace,
716
- plural="jobs",
717
- name=name,
718
- )
903
+ try:
904
+ resp = self._custom_objects_api().get_namespaced_custom_object_status(
905
+ group="batch.volcano.sh",
906
+ version="v1alpha1",
907
+ namespace=namespace,
908
+ plural="jobs",
909
+ name=name,
910
+ )
911
+ except ApiException as e:
912
+ if e.status == 404:
913
+ return None
914
+ raise
719
915
  status = resp.get("status")
720
916
  if status:
721
917
  state_str = status["state"]["phase"]
@@ -824,13 +1020,34 @@ def create_scheduler(
824
1020
  def pod_labels(
825
1021
  app: AppDef, role_idx: int, role: Role, replica_id: int, app_id: str
826
1022
  ) -> Dict[str, str]:
1023
+
1024
+ def clean(label_value: str) -> str:
1025
+ # cleans the provided `label_value` to make it compliant
1026
+ # to pod label specs as described in
1027
+ # https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
1028
+ #
1029
+ # Valid label value:
1030
+ # must be 63 characters or less (can be empty),
1031
+ # unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]),
1032
+ # could contain dashes (-), underscores (_), dots (.), and alphanumerics between.
1033
+
1034
+ # Replace invalid characters (allow: alphanum, -, _, .) with "."
1035
+ label_value = re.sub(r"[^A-Za-z0-9\-_.]", ".", label_value)
1036
+ # Replace leading non-alphanumeric with "."
1037
+ label_value = re.sub(r"^[^A-Za-z0-9]+", ".", label_value)
1038
+ # Replace trailing non-alphanumeric with "."
1039
+ label_value = re.sub(r"[^A-Za-z0-9]+$", ".", label_value)
1040
+
1041
+ # Trim to 63 characters
1042
+ return label_value[:63]
1043
+
827
1044
  return {
828
- LABEL_VERSION: torchx.__version__,
829
- LABEL_APP_NAME: app.name,
1045
+ LABEL_VERSION: clean(torchx.__version__),
1046
+ LABEL_APP_NAME: clean(app.name),
830
1047
  LABEL_ROLE_INDEX: str(role_idx),
831
- LABEL_ROLE_NAME: role.name,
1048
+ LABEL_ROLE_NAME: clean(role.name),
832
1049
  LABEL_REPLICA_ID: str(replica_id),
833
- LABEL_KUBE_APP_NAME: app.name,
1050
+ LABEL_KUBE_APP_NAME: clean(app.name),
834
1051
  LABEL_ORGANIZATION: "torchx.pytorch.org",
835
- LABEL_UNIQUE_NAME: app_id,
1052
+ LABEL_UNIQUE_NAME: clean(app_id),
836
1053
  }
torchx/specs/__init__.py CHANGED
@@ -14,7 +14,7 @@ scheduler or pipeline adapter.
14
14
  import difflib
15
15
 
16
16
  import os
17
- from typing import Callable, Dict, Mapping, Optional
17
+ from typing import Callable, Dict, Iterator, Mapping, Optional
18
18
 
19
19
  from torchx.specs.api import (
20
20
  ALL,
@@ -113,8 +113,22 @@ class _NamedResourcesLibrary:
113
113
  def __contains__(self, key: str) -> bool:
114
114
  return key in _named_resource_factories
115
115
 
116
- def __iter__(self) -> None:
117
- raise NotImplementedError("named resources doesn't support iterating")
116
+ def __iter__(self) -> Iterator[str]:
117
+ """Iterates through the names of the registered named_resources.
118
+
119
+ Usage:
120
+
121
+ .. doctest::
122
+
123
+ from torchx import specs
124
+
125
+ for resource_name in specs.named_resources:
126
+ resource = specs.resource(h=resource_name)
127
+ assert isinstance(resource, specs.Resource)
128
+
129
+ """
130
+ for key in _named_resource_factories:
131
+ yield (key)
118
132
 
119
133
 
120
134
  named_resources: _NamedResourcesLibrary = _NamedResourcesLibrary()
torchx/specs/api.py CHANGED
@@ -14,10 +14,12 @@ import logging as logger
14
14
  import os
15
15
  import pathlib
16
16
  import re
17
+ import shutil
17
18
  import typing
19
+ import warnings
18
20
  from dataclasses import asdict, dataclass, field
19
21
  from datetime import datetime
20
- from enum import Enum
22
+ from enum import Enum, IntEnum
21
23
  from json import JSONDecodeError
22
24
  from string import Template
23
25
  from typing import (
@@ -380,6 +382,16 @@ class Workspace:
380
382
  """False if no projects mapping. Lets us use workspace object in an if-statement"""
381
383
  return bool(self.projects)
382
384
 
385
+ def __eq__(self, other: object) -> bool:
386
+ if not isinstance(other, Workspace):
387
+ return False
388
+ return self.projects == other.projects
389
+
390
+ def __hash__(self) -> int:
391
+ # makes it possible to use Workspace as the key in the workspace build cache
392
+ # see WorkspaceMixin.caching_build_workspace_and_update_role
393
+ return hash(frozenset(self.projects.items()))
394
+
383
395
  def is_unmapped_single_project(self) -> bool:
384
396
  """
385
397
  Returns ``True`` if this workspace only has 1 project
@@ -387,6 +399,39 @@ class Workspace:
387
399
  """
388
400
  return len(self.projects) == 1 and not next(iter(self.projects.values()))
389
401
 
402
+ def merge_into(self, outdir: str | pathlib.Path) -> None:
403
+ """
404
+ Copies each project dir of this workspace into the specified ``outdir``.
405
+ Each project dir is copied into ``{outdir}/{target}`` where ``target`` is
406
+ the target mapping of the project dir.
407
+
408
+ For example:
409
+
410
+ .. code-block:: python
411
+ from os.path import expanduser
412
+
413
+ workspace = Workspace(
414
+ projects={
415
+ expanduser("~/workspace/torch"): "torch",
416
+ expanduser("~/workspace/my_project": "")
417
+ }
418
+ )
419
+ workspace.merge_into(expanduser("~/tmp"))
420
+
421
+ Copies:
422
+
423
+ * ``~/workspace/torch/**`` into ``~/tmp/torch/**``
424
+ * ``~/workspace/my_project/**`` into ``~/tmp/**``
425
+
426
+ """
427
+
428
+ for src, dst in self.projects.items():
429
+ dst_path = pathlib.Path(outdir) / dst
430
+ if pathlib.Path(src).is_file():
431
+ shutil.copy2(src, dst_path)
432
+ else: # src is dir
433
+ shutil.copytree(src, dst_path, dirs_exist_ok=True)
434
+
390
435
  @staticmethod
391
436
  def from_str(workspace: str | None) -> "Workspace":
392
437
  import yaml
@@ -891,14 +936,12 @@ class runopt:
891
936
  Represents the metadata about the specific run option
892
937
  """
893
938
 
894
- class alias(str):
895
- pass
896
-
897
939
  default: CfgVal
898
940
  opt_type: Type[CfgVal]
899
941
  is_required: bool
900
942
  help: str
901
- aliases: list[alias] | None = None
943
+ aliases: list[str] | None = None
944
+ deprecated_aliases: list[str] | None = None
902
945
 
903
946
  @property
904
947
  def is_type_list_of_str(self) -> bool:
@@ -990,7 +1033,7 @@ class runopts:
990
1033
 
991
1034
  def __init__(self) -> None:
992
1035
  self._opts: Dict[str, runopt] = {}
993
- self._alias_to_key: dict[runopt.alias, str] = {}
1036
+ self._alias_to_key: dict[str, str] = {}
994
1037
 
995
1038
  def __iter__(self) -> Iterator[Tuple[str, runopt]]:
996
1039
  return self._opts.items().__iter__()
@@ -1044,12 +1087,24 @@ class runopts:
1044
1087
  val = resolved_cfg.get(cfg_key)
1045
1088
  resolved_name = None
1046
1089
  aliases = runopt.aliases or []
1090
+ deprecated_aliases = runopt.deprecated_aliases or []
1047
1091
  if val is None:
1048
1092
  for alias in aliases:
1049
1093
  val = resolved_cfg.get(alias)
1050
1094
  if alias in cfg or val is not None:
1051
1095
  resolved_name = alias
1052
1096
  break
1097
+ for alias in deprecated_aliases:
1098
+ val = resolved_cfg.get(alias)
1099
+ if val is not None:
1100
+ resolved_name = alias
1101
+ use_instead = self._alias_to_key.get(alias)
1102
+ warnings.warn(
1103
+ f"Run option `{alias}` is deprecated, use `{use_instead}` instead",
1104
+ UserWarning,
1105
+ stacklevel=2,
1106
+ )
1107
+ break
1053
1108
  else:
1054
1109
  resolved_name = cfg_key
1055
1110
  for alias in aliases:
@@ -1172,49 +1227,23 @@ class runopts:
1172
1227
  cfg[key] = val
1173
1228
  return cfg
1174
1229
 
1175
- def _get_primary_key_and_aliases(
1176
- self,
1177
- cfg_key: list[str] | str,
1178
- ) -> tuple[str, list[runopt.alias]]:
1179
- """
1180
- Returns the primary key and aliases for the given cfg_key.
1181
- """
1182
- if isinstance(cfg_key, str):
1183
- return cfg_key, []
1184
-
1185
- if len(cfg_key) == 0:
1186
- raise ValueError("cfg_key must be a non-empty list")
1187
- primary_key = None
1188
- aliases = list[runopt.alias]()
1189
- for name in cfg_key:
1190
- if isinstance(name, runopt.alias):
1191
- aliases.append(name)
1192
- else:
1193
- if primary_key is not None:
1194
- raise ValueError(
1195
- f" Given more than one primary key: {primary_key}, {name}. Please use runopt.alias type for aliases. "
1196
- )
1197
- primary_key = name
1198
- if primary_key is None or primary_key == "":
1199
- raise ValueError(
1200
- "Missing cfg_key. Please provide one other than the aliases."
1201
- )
1202
- return primary_key, aliases
1203
-
1204
1230
  def add(
1205
1231
  self,
1206
- cfg_key: str | list[str],
1232
+ cfg_key: str,
1207
1233
  type_: Type[CfgVal],
1208
1234
  help: str,
1209
1235
  default: CfgVal = None,
1210
1236
  required: bool = False,
1237
+ aliases: Optional[list[str]] = None,
1238
+ deprecated_aliases: Optional[list[str]] = None,
1211
1239
  ) -> None:
1212
1240
  """
1213
1241
  Adds the ``config`` option with the given help string and ``default``
1214
1242
  value (if any). If the ``default`` is not specified then this option
1215
1243
  is a required option.
1216
1244
  """
1217
- primary_key, aliases = self._get_primary_key_and_aliases(cfg_key)
1245
+ aliases = aliases or []
1246
+ deprecated_aliases = deprecated_aliases or []
1218
1247
  if required and default is not None:
1219
1248
  raise ValueError(
1220
1249
  f"Required option: {cfg_key} must not specify default value. Given: {default}"
@@ -1225,10 +1254,20 @@ class runopts:
1225
1254
  f"Option: {cfg_key}, must be of type: {type_}."
1226
1255
  f" Given: {default} ({type(default).__name__})"
1227
1256
  )
1228
- opt = runopt(default, type_, required, help, aliases)
1257
+
1258
+ opt = runopt(
1259
+ default,
1260
+ type_,
1261
+ required,
1262
+ help,
1263
+ list(set(aliases)),
1264
+ list(set(deprecated_aliases)),
1265
+ )
1229
1266
  for alias in aliases:
1230
- self._alias_to_key[alias] = primary_key
1231
- self._opts[primary_key] = opt
1267
+ self._alias_to_key[alias] = cfg_key
1268
+ for deprecated_alias in deprecated_aliases:
1269
+ self._alias_to_key[deprecated_alias] = cfg_key
1270
+ self._opts[cfg_key] = opt
1232
1271
 
1233
1272
  def update(self, other: "runopts") -> None:
1234
1273
  self._opts.update(other._opts)
torchx/version.py CHANGED
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env python3
2
1
  # Copyright (c) Meta Platforms, Inc. and affiliates.
3
2
  # All rights reserved.
4
3
  #
@@ -7,6 +6,7 @@
7
6
 
8
7
  # pyre-strict
9
8
 
9
+ from torchx._version import BASE_VERSION
10
10
  from torchx.util.entrypoints import load
11
11
 
12
12
  # Follows PEP-0440 version scheme guidelines
@@ -18,7 +18,7 @@ from torchx.util.entrypoints import load
18
18
  # 0.1.0bN # Beta release
19
19
  # 0.1.0rcN # Release Candidate
20
20
  # 0.1.0 # Final release
21
- __version__ = "0.8.0dev0"
21
+ __version__: str = BASE_VERSION
22
22
 
23
23
 
24
24
  # Use the github container registry images corresponding to the current package
torchx/workspace/api.py CHANGED
@@ -8,26 +8,17 @@
8
8
 
9
9
  import abc
10
10
  import fnmatch
11
+ import logging
11
12
  import posixpath
12
- import shutil
13
13
  import tempfile
14
14
  import warnings
15
15
  from dataclasses import dataclass
16
- from pathlib import Path
17
- from typing import (
18
- Any,
19
- Dict,
20
- Generic,
21
- Iterable,
22
- Mapping,
23
- Tuple,
24
- TYPE_CHECKING,
25
- TypeVar,
26
- Union,
27
- )
16
+ from typing import Any, Dict, Generic, Iterable, Mapping, Tuple, TYPE_CHECKING, TypeVar
28
17
 
29
18
  from torchx.specs import AppDef, CfgVal, Role, runopts, Workspace
30
19
 
20
+ logger: logging.Logger = logging.getLogger(__name__)
21
+
31
22
  if TYPE_CHECKING:
32
23
  from fsspec import AbstractFileSystem
33
24
 
@@ -113,45 +104,72 @@ class WorkspaceMixin(abc.ABC, Generic[T]):
113
104
  """
114
105
  return runopts()
115
106
 
116
- def build_workspace_and_update_role2(
107
+ def build_workspaces(self, roles: list[Role], cfg: Mapping[str, CfgVal]) -> None:
108
+ """
109
+ NOTE: this method MUTATES the passed roles!
110
+
111
+ Builds the workspaces (if any) for each role and updates the role to reflect the built workspace.
112
+ Typically ``role.image`` is updated with the newly built image that reflects the local workspace.
113
+ Some workspace implementations may add extra environment variables to make it easier for other
114
+ parts of the program to access the workspace. For example a ``WORKSPACE_DIR`` env var may be added
115
+ to ``role.env`` that scripts can use to refert to the workspace directory in the container.
116
+ """
117
+
118
+ build_cache: dict[object, object] = {}
119
+
120
+ for i, role in enumerate(roles):
121
+ if role.workspace:
122
+ old_img = role.image
123
+ self.caching_build_workspace_and_update_role(role, cfg, build_cache)
124
+
125
+ if old_img != role.image:
126
+ logger.info(
127
+ "role[%d]=%s updated with new image to include workspace changes",
128
+ i,
129
+ role.name,
130
+ )
131
+
132
+ def caching_build_workspace_and_update_role(
117
133
  self,
118
134
  role: Role,
119
- workspace: Union[Workspace, str],
120
135
  cfg: Mapping[str, CfgVal],
136
+ build_cache: dict[object, object],
121
137
  ) -> None:
122
138
  """
123
- Same as :py:meth:`build_workspace_and_update_role` but operates
124
- on :py:class:`Workspace` (supports multi-project workspaces)
125
- as well as ``str`` (for backwards compatibility).
139
+ Same as :py:meth:`build_workspace_and_update_role` but takes
140
+ a ``build_cache`` that can be used to cache pointers to build artifacts
141
+ between building workspace for each role.
126
142
 
127
- If ``workspace`` is a ``str`` this method simply calls
143
+ This is useful when an appdef has multiple roles where the image and workspace
144
+ of the roles are the same but other attributes such as entrypoint or args are different.
145
+
146
+ NOTE: ``build_cache``'s lifetime is within :py:meth:`build_workspace_and_update_roles`
147
+ NOTE: the workspace implementation decides what to cache
148
+
149
+ Workspace subclasses should prefer implementing this method over
128
150
  :py:meth:`build_workspace_and_update_role`.
129
151
 
130
- If ``workspace`` is :py:class:`Workspace` then the default
131
- impl copies all the projects into a tmp directory and passes the tmp dir to
132
- :py:meth:`build_workspace_and_update_role`
152
+ The default implementation of this method simply calls the (deprecated) non-caching
153
+ :py:meth:`build_workspace_and_update_role` and deals with multi-dir workspaces by
154
+ merging them into a single tmpdir before passing it down.
133
155
 
134
- Subclasses can override this method to customize multi-project
135
- workspace building logic.
136
156
  """
137
- if isinstance(workspace, Workspace):
138
- if not workspace.is_unmapped_single_project():
139
- with tempfile.TemporaryDirectory(suffix="torchx_workspace_") as outdir:
140
- for src, dst in workspace.projects.items():
141
- dst_path = Path(outdir) / dst
142
- if Path(src).is_file():
143
- shutil.copy2(src, dst_path)
144
- else: # src is dir
145
- shutil.copytree(src, dst_path, dirs_exist_ok=True)
146
-
147
- self.build_workspace_and_update_role(role, outdir, cfg)
148
- return
149
- else: # single project workspace with no target mapping (treat like a str workspace)
150
- workspace = str(workspace)
151
-
152
- self.build_workspace_and_update_role(role, workspace, cfg)
153
157
 
154
- @abc.abstractmethod
158
+ workspace = role.workspace
159
+
160
+ if not workspace:
161
+ return
162
+
163
+ if workspace.is_unmapped_single_project():
164
+ # single-dir workspace with no target map; no need to copy to a tmp dir
165
+ self.build_workspace_and_update_role(role, str(workspace), cfg)
166
+ else:
167
+ # multi-dirs or single-dir with a target map;
168
+ # copy all dirs to a tmp dir and treat the tmp dir as a single-dir workspace
169
+ with tempfile.TemporaryDirectory(suffix="torchx_workspace_") as outdir:
170
+ workspace.merge_into(outdir)
171
+ self.build_workspace_and_update_role(role, outdir, cfg)
172
+
155
173
  def build_workspace_and_update_role(
156
174
  self,
157
175
  role: Role,
@@ -159,6 +177,9 @@ class WorkspaceMixin(abc.ABC, Generic[T]):
159
177
  cfg: Mapping[str, CfgVal],
160
178
  ) -> None:
161
179
  """
180
+ .. note:: DEPRECATED: Workspace subclasses should implement
181
+ :py:meth:`caching_build_workspace_and_update_role` over this method.
182
+
162
183
  Builds the specified ``workspace`` with respect to ``img``
163
184
  and updates the ``role`` to reflect the built workspace artifacts.
164
185
  In the simplest case, this method builds a new image and updates
@@ -167,7 +188,7 @@ class WorkspaceMixin(abc.ABC, Generic[T]):
167
188
 
168
189
  Note: this method mutates the passed ``role``.
169
190
  """
170
- ...
191
+ raise NotImplementedError("implement `caching_build_workspace_and_update_role`")
171
192
 
172
193
  def dryrun_push_images(self, app: AppDef, cfg: Mapping[str, CfgVal]) -> T:
173
194
  """
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: torchx-nightly
3
- Version: 2025.10.16
3
+ Version: 2025.11.20
4
4
  Summary: TorchX SDK and Components
5
5
  Home-page: https://github.com/meta-pytorch/torchx
6
6
  Author: TorchX Devs
@@ -23,8 +23,10 @@ Requires-Dist: docker
23
23
  Requires-Dist: filelock
24
24
  Requires-Dist: fsspec>=2023.10.0
25
25
  Requires-Dist: tabulate
26
- Provides-Extra: aws_batch
26
+ Provides-Extra: aws-batch
27
27
  Requires-Dist: boto3; extra == "aws-batch"
28
+ Provides-Extra: kubernetes
29
+ Requires-Dist: kubernetes>=11; extra == "kubernetes"
28
30
  Provides-Extra: dev
29
31
  Requires-Dist: aiobotocore==2.20.0; extra == "dev"
30
32
  Requires-Dist: ax-platform[mysql]==0.2.3; extra == "dev"
@@ -47,18 +49,29 @@ Requires-Dist: pytorch-lightning==2.5.0; extra == "dev"
47
49
  Requires-Dist: tensorboard==2.14.0; extra == "dev"
48
50
  Requires-Dist: sagemaker==2.230.0; extra == "dev"
49
51
  Requires-Dist: torch-model-archiver>=0.4.2; extra == "dev"
50
- Requires-Dist: torch>=2.7.0; extra == "dev"
52
+ Requires-Dist: torch; extra == "dev"
51
53
  Requires-Dist: torchmetrics==1.6.3; extra == "dev"
52
54
  Requires-Dist: torchserve>=0.10.0; extra == "dev"
53
- Requires-Dist: torchtext==0.18.0; extra == "dev"
54
- Requires-Dist: torchvision==0.23.0; extra == "dev"
55
+ Requires-Dist: torchtext; extra == "dev"
56
+ Requires-Dist: torchvision; extra == "dev"
55
57
  Requires-Dist: typing-extensions; extra == "dev"
56
58
  Requires-Dist: ts==0.5.1; extra == "dev"
57
59
  Requires-Dist: wheel; extra == "dev"
58
60
  Requires-Dist: lintrunner; extra == "dev"
59
61
  Requires-Dist: lintrunner-adapters; extra == "dev"
60
- Provides-Extra: kubernetes
61
- Requires-Dist: kubernetes>=11; extra == "kubernetes"
62
+ Dynamic: author
63
+ Dynamic: author-email
64
+ Dynamic: classifier
65
+ Dynamic: description
66
+ Dynamic: description-content-type
67
+ Dynamic: home-page
68
+ Dynamic: keywords
69
+ Dynamic: license
70
+ Dynamic: license-file
71
+ Dynamic: provides-extra
72
+ Dynamic: requires-dist
73
+ Dynamic: requires-python
74
+ Dynamic: summary
62
75
 
63
76
  [![PyPI](https://img.shields.io/pypi/v/torchx)](https://pypi.org/project/torchx/)
64
77
  [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://github.com/meta-pytorch/torchx/blob/main/LICENSE)
@@ -1,6 +1,7 @@
1
1
  torchx/__init__.py,sha256=QFDTdJacncWYWHL-2QyWdY5MUck3jVfSPRRGdvedcKc,355
2
+ torchx/_version.py,sha256=TzDuXIviDldFbXAhGe33redQcoP33jIsVR_hMyqSgdc,250
2
3
  torchx/notebook.py,sha256=Rc6XUMzSq7NXtsYdtVluE6T89LpEhcba-3ANxuaLCCU,1008
3
- torchx/version.py,sha256=d28ccaZP21nlF8jEmSLjJiidyquMJo02tDpeVD36inc,951
4
+ torchx/version.py,sha256=YcE66UkBxYHMQMtjVts4jF3l6Qeaj1gK_LzxU77l8Bo,975
4
5
  torchx/apps/__init__.py,sha256=fE0IHi1JJpxsNVBNzWNee2thrNXFFRhY94c80RxNSIE,231
5
6
  torchx/apps/serve/__init__.py,sha256=Md3cCHD7Ano9kV15PqGbicgUO-RMdh4aVy1yKiDt_xE,208
6
7
  torchx/apps/serve/serve.py,sha256=u_h8agld1TwIPq5GRosHL3uxhkljNfS65McLB77O0OE,4386
@@ -13,6 +14,7 @@ torchx/cli/argparse_util.py,sha256=kZb1ubEHDrBsmrxpySFRQCW7wmHuRHD8eAInuEZjlsI,3
13
14
  torchx/cli/cmd_base.py,sha256=SdqMtqi04CEqnzcgcS35DbDbsBeMxSgEhfynfpIkMGk,790
14
15
  torchx/cli/cmd_cancel.py,sha256=NKfOCu_44Lch9vliGSQ0Uv6BVqpUqj7Tob652TI-ua4,835
15
16
  torchx/cli/cmd_configure.py,sha256=1kTv0qbsbV44So74plAySwWu56pQrqjhfW_kbfdC3Rw,1722
17
+ torchx/cli/cmd_delete.py,sha256=US1f6Jvyhz4R_0Q0a8GeNTDMrhzo8WE_ECcdOf0MjKE,835
16
18
  torchx/cli/cmd_describe.py,sha256=E5disbHoKTsqYKp2s3DaFW9GDLCCOgdOc3pQoHKoyCs,1283
17
19
  torchx/cli/cmd_list.py,sha256=alkS9aIaDI8lX3W8uj8Vtr3IU3G2VeCuokKSd3zOFug,1409
18
20
  torchx/cli/cmd_log.py,sha256=v-EZYUDOcG95rEgTnrsmPJMUyxM9Mk8YFAJtUxtgViE,5475
@@ -21,7 +23,7 @@ torchx/cli/cmd_runopts.py,sha256=NWZiP8XpQjfTDJgays2c6MgL_8wxFoeDge6NstaZdKk,130
21
23
  torchx/cli/cmd_status.py,sha256=22IAEmKs0qkG6kJi83u9dRX2Q-ntT7yehVx7FxtY-vQ,2114
22
24
  torchx/cli/cmd_tracker.py,sha256=9gmOmYi-89qQRGQfSrXCTto7ve54_JKFqs_wa7oRUA8,5223
23
25
  torchx/cli/colors.py,sha256=yLMes7e_UoLAfhxE0W6edhc58t83UHAlnCN2ANPeuXw,568
24
- torchx/cli/main.py,sha256=1Jf2cnO6Y2W69Adt88avmNPVrL6ZR4Hkff6GVB4293k,3484
26
+ torchx/cli/main.py,sha256=1DJTmKdvPW_7hod8OUVT3Br2uwsZVEDU-2bTE0NJ0zY,3559
25
27
  torchx/components/__init__.py,sha256=JaVte0j9Gqi6IrjZKudJ2Kr3gkdHsvlCdRTo-zYpSRo,11815
26
28
  torchx/components/component_test_base.py,sha256=22iNSdVa_qTW3SMM30Pw5UEWlK4DZVw0C03EqYiaLOI,4150
27
29
  torchx/components/dist.py,sha256=6DNPEvHVqEifmM8g1L7HVY169cQv_7tSfSlh3o6lTp4,14930
@@ -48,7 +50,7 @@ torchx/examples/apps/lightning/profiler.py,sha256=SSSihnwjeUTkBoz0E3qn1b-wbkfUIo
48
50
  torchx/examples/apps/lightning/train.py,sha256=0wvvshGHvZowePB4LfclXwn40X7i9euM0ReETWBcPSo,6253
49
51
  torchx/pipelines/__init__.py,sha256=2MbRVk5xwRjg-d2qPemeXpEhDsocMQumPQ53lsesZAI,606
50
52
  torchx/runner/__init__.py,sha256=x8Sz7s_tLxPgJgvWIhK4ju9BNZU61uBFywGwDY6CqJs,315
51
- torchx/runner/api.py,sha256=jxtgOl7nNOqpzG-sjUJngXhIOachqaVfKu9rF8YqHWI,31271
53
+ torchx/runner/api.py,sha256=Qi12Kjkr_zpQBesbLuCtgKET8JhHnQk22MV7Czi4l1A,30832
52
54
  torchx/runner/config.py,sha256=SaKOB50d79WaMFPWK8CC4as6UaNFaRGhrBkfajq3KC4,18311
53
55
  torchx/runner/events/__init__.py,sha256=cMiNjnr4eUNQ2Nxxtu4nsvN5lu56b-a6nJ-ct3i7DQk,5536
54
56
  torchx/runner/events/api.py,sha256=bvxKBAYK8LzbrBNaNLgL1x0aivtfANmWo1EMGOrSR8k,2668
@@ -57,20 +59,20 @@ torchx/runtime/__init__.py,sha256=Wxje2BryzeQneFu5r6P9JJiEKG-_C9W1CcZ_JNrKT6g,59
57
59
  torchx/runtime/tracking/__init__.py,sha256=dYnAPnrXYREfPXkpHhdOFkcYIODWEbA13PdD-wLQYBo,3055
58
60
  torchx/runtime/tracking/api.py,sha256=SmUQyUKZqG3KlAhT7CJOGqRz1O274E4m63wQeOVq3CU,5472
59
61
  torchx/schedulers/__init__.py,sha256=FQN9boQM4mwOD3sK9LZ3GBgw-gJ7Vx4MFj6z6ATQIrc,2211
60
- torchx/schedulers/api.py,sha256=5Amli1httEl82XebAqd8vl3dM8zMKwYfRgfd0mEq3is,14538
62
+ torchx/schedulers/api.py,sha256=PwXmqMDbwDlwpJsnaXcQSX6lf7YkyK6YsTSviMyflGY,16563
61
63
  torchx/schedulers/aws_batch_scheduler.py,sha256=-HpjNVhSFBDxZo3cebK-3YEguB49dxoaud2gz30cAVM,29437
62
64
  torchx/schedulers/aws_sagemaker_scheduler.py,sha256=flN8GumKE2Dz4X_foAt6Jnvt-ZVojWs6pcyrHwB0hz0,20921
63
65
  torchx/schedulers/devices.py,sha256=RjVcu22ZRl_9OKtOtmA1A3vNXgu2qD6A9ST0L0Hsg4I,1734
64
66
  torchx/schedulers/docker_scheduler.py,sha256=x-XHCqYnrmiW0dHfVA7hz7Fp2Qgw7fvMgRm058YOngY,16880
65
67
  torchx/schedulers/ids.py,sha256=3E-_vwVYC-8Tv8kjuY9-W7TbOe_-Laqd8a65uIN3hQY,1798
66
68
  torchx/schedulers/kubernetes_mcad_scheduler.py,sha256=1tuzq3OutCMdSPqg_dNmCHt_wyuSFKG0-ywLc3qITJo,42949
67
- torchx/schedulers/kubernetes_scheduler.py,sha256=Wb6XDzwcvp3-NqBhKrjtgDC4L6GVOmcyP6fuoPFByBE,28288
69
+ torchx/schedulers/kubernetes_scheduler.py,sha256=PTCgDLshK5EUsZIGnTafjZ7LrO2YUjHmgR0mPL9VGFM,35672
68
70
  torchx/schedulers/local_scheduler.py,sha256=ttnxFDy48_DSYDEW-no27OirFZOyfrjwJ2S1MwBUi74,41929
69
71
  torchx/schedulers/lsf_scheduler.py,sha256=YS6Yel8tXJqLPxbcGz95lZG2nCi36AQXdNDyuBJePKg,17661
70
72
  torchx/schedulers/slurm_scheduler.py,sha256=vypGaCZe61bkyNkqRlK4Iwmk_NaAUQi-DsspaWd6BZw,31873
71
73
  torchx/schedulers/streams.py,sha256=8_SLezgnWgfv_zXUsJCUM34-h2dtv25NmZuxEwkzmxw,2007
72
- torchx/specs/__init__.py,sha256=SXS4r_roOkbbAL-p7EY5fl5ou-AG7S9Ck-zKtRBdHOk,6760
73
- torchx/specs/api.py,sha256=ICKsTWxEats9IwWXUm-D1NJy4jyONMV2zdrWfUrpKNg,47827
74
+ torchx/specs/__init__.py,sha256=TaC0AveTebkCMo5hmdY1wGpo09vFDqzWnsT166ionTw,7108
75
+ torchx/specs/api.py,sha256=OrLX4gGa97qtjUbl3x_YnOKCdP0rQkVEruPIbNjo7fk,49230
74
76
  torchx/specs/builders.py,sha256=Ye3of4MupJ-da8vLaX6_-nzGo_FRw1BFpYsX6dAZCNk,13730
75
77
  torchx/specs/file_linter.py,sha256=z0c4mKJv47BWiPaWCdUM0A8kHwnj4b1s7oTmESuD9Tc,14407
76
78
  torchx/specs/finder.py,sha256=gWQNEFrLYqrZoI0gMMhQ70YAC4sxqS0ZFpoWAmcVi44,17438
@@ -99,12 +101,12 @@ torchx/util/shlex.py,sha256=eXEKu8KC3zIcd8tEy9_s8Ds5oma8BORr-0VGWNpG2dk,463
99
101
  torchx/util/strings.py,sha256=7Ef1loz2IYMrzeJ6Lewywi5cBIc3X3g7lSPbT1Tn_z4,664
100
102
  torchx/util/types.py,sha256=E9dxAWQnsJkIDuHtg-poeOJ4etucSI_xP_Z5kNJX8uI,9229
101
103
  torchx/workspace/__init__.py,sha256=FqN8AN4VhR1C_SBY10MggQvNZmyanbbuPuE-JCjkyUY,798
102
- torchx/workspace/api.py,sha256=h2SaC-pYPBLuo3XtkXJ0APMoro-C-ry7KucI7r3EUf4,8753
104
+ torchx/workspace/api.py,sha256=UESQ4qgxXjsb6Y1wP9OGv2ixaFgaTs3SqghmNuOJIZM,10235
103
105
  torchx/workspace/dir_workspace.py,sha256=npNW_IjUZm_yS5r-8hrRkH46ndDd9a_eApT64m1S1T4,2268
104
106
  torchx/workspace/docker_workspace.py,sha256=PFu2KQNVC-0p2aKJ-W_BKA9ZOmXdCY2ABEkCExp3udQ,10269
105
- torchx_nightly-2025.10.16.dist-info/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
106
- torchx_nightly-2025.10.16.dist-info/METADATA,sha256=LdONpXnVGtW8end6ZL0EIZ1W4TwP6sJx1TypIYVg8z8,5069
107
- torchx_nightly-2025.10.16.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
108
- torchx_nightly-2025.10.16.dist-info/entry_points.txt,sha256=T328AMXeKI3JZnnxfkEew2ZcMN1oQDtkXjMz7lkV-P4,169
109
- torchx_nightly-2025.10.16.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
110
- torchx_nightly-2025.10.16.dist-info/RECORD,,
107
+ torchx_nightly-2025.11.20.dist-info/licenses/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
108
+ torchx_nightly-2025.11.20.dist-info/METADATA,sha256=yeYyvVFSNXDwzGTXtDktxEfyAHvepkZeM7uzQbSoqjk,5324
109
+ torchx_nightly-2025.11.20.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
110
+ torchx_nightly-2025.11.20.dist-info/entry_points.txt,sha256=T328AMXeKI3JZnnxfkEew2ZcMN1oQDtkXjMz7lkV-P4,169
111
+ torchx_nightly-2025.11.20.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
112
+ torchx_nightly-2025.11.20.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.45.1)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5