torchx-nightly 2025.10.16__py3-none-any.whl → 2025.11.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchx-nightly might be problematic. Click here for more details.
- torchx/_version.py +8 -0
- torchx/runner/api.py +25 -33
- torchx/schedulers/api.py +7 -2
- torchx/schedulers/kubernetes_scheduler.py +198 -16
- torchx/specs/__init__.py +17 -3
- torchx/specs/api.py +79 -40
- torchx/version.py +2 -2
- torchx/workspace/api.py +63 -42
- {torchx_nightly-2025.10.16.dist-info → torchx_nightly-2025.11.17.dist-info}/METADATA +21 -8
- {torchx_nightly-2025.10.16.dist-info → torchx_nightly-2025.11.17.dist-info}/RECORD +14 -13
- {torchx_nightly-2025.10.16.dist-info → torchx_nightly-2025.11.17.dist-info}/WHEEL +1 -1
- {torchx_nightly-2025.10.16.dist-info → torchx_nightly-2025.11.17.dist-info}/entry_points.txt +0 -0
- {torchx_nightly-2025.10.16.dist-info → torchx_nightly-2025.11.17.dist-info/licenses}/LICENSE +0 -0
- {torchx_nightly-2025.10.16.dist-info → torchx_nightly-2025.11.17.dist-info}/top_level.txt +0 -0
torchx/_version.py
ADDED
torchx/runner/api.py
CHANGED
|
@@ -420,52 +420,44 @@ class Runner:
|
|
|
420
420
|
scheduler,
|
|
421
421
|
runcfg=json.dumps(cfg) if cfg else None,
|
|
422
422
|
workspace=str(workspace),
|
|
423
|
-
):
|
|
423
|
+
) as ctx:
|
|
424
424
|
sched = self._scheduler(scheduler)
|
|
425
425
|
resolved_cfg = sched.run_opts().resolve(cfg)
|
|
426
426
|
|
|
427
427
|
sched._pre_build_validate(app, scheduler, resolved_cfg)
|
|
428
428
|
|
|
429
429
|
if isinstance(sched, WorkspaceMixin):
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
# later, torchx added support for the workspace attr in Role
|
|
436
|
-
# for BC, give precedence to the workspace argument over the workspace attr for role[0]
|
|
437
|
-
if role_workspace:
|
|
438
|
-
logger.info(
|
|
439
|
-
f"Using workspace={workspace} over role[{i}].workspace={role_workspace} for role[{i}]={role.name}."
|
|
440
|
-
" To use the role's workspace attr pass: --workspace='' from CLI or workspace=None programmatically." # noqa: B950
|
|
441
|
-
)
|
|
442
|
-
role_workspace = workspace
|
|
443
|
-
|
|
444
|
-
if role_workspace:
|
|
445
|
-
old_img = role.image
|
|
430
|
+
if workspace:
|
|
431
|
+
# NOTE: torchx originally took workspace as a runner arg and only applied the workspace to role[0]
|
|
432
|
+
# later, torchx added support for the workspace attr in Role
|
|
433
|
+
# for BC, give precedence to the workspace argument over the workspace attr for role[0]
|
|
434
|
+
if app.roles[0].workspace:
|
|
446
435
|
logger.info(
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
role, role_workspace, resolved_cfg
|
|
436
|
+
"Overriding role[%d] (%s) workspace to `%s`"
|
|
437
|
+
"To use the role's workspace attr pass: --workspace='' from CLI or workspace=None programmatically.",
|
|
438
|
+
0,
|
|
439
|
+
role.name,
|
|
440
|
+
str(app.roles[0].workspace),
|
|
453
441
|
)
|
|
442
|
+
app.roles[0].workspace = (
|
|
443
|
+
Workspace.from_str(workspace)
|
|
444
|
+
if isinstance(workspace, str)
|
|
445
|
+
else workspace
|
|
446
|
+
)
|
|
454
447
|
|
|
455
|
-
|
|
456
|
-
logger.info(
|
|
457
|
-
f"Built new image `{role.image}` based on original image `{old_img}`"
|
|
458
|
-
f" and changes in workspace `{role_workspace}` for role[{i}]={role.name}."
|
|
459
|
-
)
|
|
460
|
-
else:
|
|
461
|
-
logger.info(
|
|
462
|
-
f"Reusing original image `{old_img}` for role[{i}]={role.name}."
|
|
463
|
-
" Either a patch was built or no changes to workspace was detected."
|
|
464
|
-
)
|
|
448
|
+
sched.build_workspaces(app.roles, resolved_cfg)
|
|
465
449
|
|
|
466
450
|
sched._validate(app, scheduler, resolved_cfg)
|
|
467
451
|
dryrun_info = sched.submit_dryrun(app, resolved_cfg)
|
|
468
452
|
dryrun_info._scheduler = scheduler
|
|
453
|
+
|
|
454
|
+
event = ctx._torchx_event
|
|
455
|
+
event.scheduler = scheduler
|
|
456
|
+
event.runcfg = json.dumps(cfg) if cfg else None
|
|
457
|
+
event.app_id = app.name
|
|
458
|
+
event.app_image = none_throws(dryrun_info._app).roles[0].image
|
|
459
|
+
event.app_metadata = app.metadata
|
|
460
|
+
|
|
469
461
|
return dryrun_info
|
|
470
462
|
|
|
471
463
|
def scheduler_run_opts(self, scheduler: str) -> runopts:
|
torchx/schedulers/api.py
CHANGED
|
@@ -131,7 +131,7 @@ class Scheduler(abc.ABC, Generic[T, A, D]):
|
|
|
131
131
|
self,
|
|
132
132
|
app: A,
|
|
133
133
|
cfg: T,
|
|
134
|
-
workspace:
|
|
134
|
+
workspace: str | Workspace | None = None,
|
|
135
135
|
) -> str:
|
|
136
136
|
"""
|
|
137
137
|
Submits the application to be run by the scheduler.
|
|
@@ -145,7 +145,12 @@ class Scheduler(abc.ABC, Generic[T, A, D]):
|
|
|
145
145
|
resolved_cfg = self.run_opts().resolve(cfg)
|
|
146
146
|
if workspace:
|
|
147
147
|
assert isinstance(self, WorkspaceMixin)
|
|
148
|
-
|
|
148
|
+
|
|
149
|
+
if isinstance(workspace, str):
|
|
150
|
+
workspace = Workspace.from_str(workspace)
|
|
151
|
+
|
|
152
|
+
app.roles[0].workspace = workspace
|
|
153
|
+
self.build_workspaces(app.roles, resolved_cfg)
|
|
149
154
|
|
|
150
155
|
# pyre-fixme: submit_dryrun takes Generic type for resolved_cfg
|
|
151
156
|
dryrun_info = self.submit_dryrun(app, resolved_cfg)
|
|
@@ -27,10 +27,81 @@ Install Volcano:
|
|
|
27
27
|
See the
|
|
28
28
|
`Volcano Quickstart <https://github.com/volcano-sh/volcano>`_
|
|
29
29
|
for more information.
|
|
30
|
+
|
|
31
|
+
Pod Overlay
|
|
32
|
+
===========
|
|
33
|
+
|
|
34
|
+
You can overlay arbitrary Kubernetes Pod fields on generated pods by setting
|
|
35
|
+
the ``kubernetes`` metadata on your role. The value can be:
|
|
36
|
+
|
|
37
|
+
- A dict with the overlay structure
|
|
38
|
+
- A resource URI pointing to a YAML file (e.g. ``file://``, ``s3://``, ``gs://``)
|
|
39
|
+
|
|
40
|
+
Merge semantics:
|
|
41
|
+
- **dict**: recursive merge (upsert)
|
|
42
|
+
- **list**: append by default, replace if tuple (Python) or ``!!python/tuple`` tag (YAML)
|
|
43
|
+
- **primitives**: replace
|
|
44
|
+
|
|
45
|
+
.. code:: python
|
|
46
|
+
|
|
47
|
+
from torchx.specs import Role
|
|
48
|
+
|
|
49
|
+
# Dict overlay - lists append, tuples replace
|
|
50
|
+
role = Role(
|
|
51
|
+
name="trainer",
|
|
52
|
+
image="my-image:latest",
|
|
53
|
+
entrypoint="train.py",
|
|
54
|
+
metadata={
|
|
55
|
+
"kubernetes": {
|
|
56
|
+
"spec": {
|
|
57
|
+
"nodeSelector": {"gpu": "true"},
|
|
58
|
+
"tolerations": [{"key": "nvidia.com/gpu", "operator": "Exists"}], # appends
|
|
59
|
+
"volumes": ({"name": "my-volume", "emptyDir": {}},) # replaces
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# File URI overlay
|
|
66
|
+
role = Role(
|
|
67
|
+
name="trainer",
|
|
68
|
+
image="my-image:latest",
|
|
69
|
+
entrypoint="train.py",
|
|
70
|
+
metadata={
|
|
71
|
+
"kubernetes": "file:///path/to/pod_overlay.yaml"
|
|
72
|
+
}
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
CLI usage with builtin components:
|
|
76
|
+
|
|
77
|
+
.. code:: bash
|
|
78
|
+
|
|
79
|
+
$ torchx run --scheduler kubernetes dist.ddp \\
|
|
80
|
+
--metadata kubernetes=file:///path/to/pod_overlay.yaml \\
|
|
81
|
+
--script train.py
|
|
82
|
+
|
|
83
|
+
Example ``pod_overlay.yaml``:
|
|
84
|
+
|
|
85
|
+
.. code:: yaml
|
|
86
|
+
|
|
87
|
+
spec:
|
|
88
|
+
nodeSelector:
|
|
89
|
+
node.kubernetes.io/instance-type: p4d.24xlarge
|
|
90
|
+
tolerations:
|
|
91
|
+
- key: nvidia.com/gpu
|
|
92
|
+
operator: Exists
|
|
93
|
+
effect: NoSchedule
|
|
94
|
+
volumes: !!python/tuple
|
|
95
|
+
- name: my-volume
|
|
96
|
+
emptyDir: {}
|
|
97
|
+
|
|
98
|
+
The overlay is deep-merged with the generated pod, preserving existing fields
|
|
99
|
+
and adding or overriding specified ones.
|
|
30
100
|
"""
|
|
31
101
|
|
|
32
102
|
import json
|
|
33
103
|
import logging
|
|
104
|
+
import re
|
|
34
105
|
import warnings
|
|
35
106
|
from dataclasses import dataclass
|
|
36
107
|
from datetime import datetime
|
|
@@ -45,6 +116,7 @@ from typing import (
|
|
|
45
116
|
Tuple,
|
|
46
117
|
TYPE_CHECKING,
|
|
47
118
|
TypedDict,
|
|
119
|
+
Union,
|
|
48
120
|
)
|
|
49
121
|
|
|
50
122
|
import torchx
|
|
@@ -97,6 +169,40 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
97
169
|
RESERVED_MILLICPU = 100
|
|
98
170
|
RESERVED_MEMMB = 1024
|
|
99
171
|
|
|
172
|
+
|
|
173
|
+
def _apply_pod_overlay(pod: "V1Pod", overlay: Dict[str, Any]) -> None:
|
|
174
|
+
"""Apply overlay dict to V1Pod object, merging nested fields.
|
|
175
|
+
|
|
176
|
+
Merge semantics:
|
|
177
|
+
- dict: upsert (recursive merge)
|
|
178
|
+
- list: append by default, replace if tuple
|
|
179
|
+
- primitives: replace
|
|
180
|
+
"""
|
|
181
|
+
from kubernetes import client
|
|
182
|
+
|
|
183
|
+
api = client.ApiClient()
|
|
184
|
+
pod_dict = api.sanitize_for_serialization(pod)
|
|
185
|
+
|
|
186
|
+
def deep_merge(base: Dict[str, Any], overlay: Dict[str, Any]) -> None:
|
|
187
|
+
for key, value in overlay.items():
|
|
188
|
+
if isinstance(value, dict) and key in base and isinstance(base[key], dict):
|
|
189
|
+
deep_merge(base[key], value)
|
|
190
|
+
elif isinstance(value, tuple):
|
|
191
|
+
base[key] = list(value)
|
|
192
|
+
elif (
|
|
193
|
+
isinstance(value, list) and key in base and isinstance(base[key], list)
|
|
194
|
+
):
|
|
195
|
+
base[key].extend(value)
|
|
196
|
+
else:
|
|
197
|
+
base[key] = value
|
|
198
|
+
|
|
199
|
+
deep_merge(pod_dict, overlay)
|
|
200
|
+
|
|
201
|
+
merged_pod = api._ApiClient__deserialize(pod_dict, "V1Pod")
|
|
202
|
+
pod.spec = merged_pod.spec
|
|
203
|
+
pod.metadata = merged_pod.metadata
|
|
204
|
+
|
|
205
|
+
|
|
100
206
|
RETRY_POLICIES: Mapping[str, Iterable[Mapping[str, str]]] = {
|
|
101
207
|
RetryPolicy.REPLICA: [],
|
|
102
208
|
RetryPolicy.APPLICATION: [
|
|
@@ -369,7 +475,7 @@ def app_to_resource(
|
|
|
369
475
|
queue: str,
|
|
370
476
|
service_account: Optional[str],
|
|
371
477
|
priority_class: Optional[str] = None,
|
|
372
|
-
) -> Dict[str,
|
|
478
|
+
) -> Dict[str, Any]:
|
|
373
479
|
"""
|
|
374
480
|
app_to_resource creates a volcano job kubernetes resource definition from
|
|
375
481
|
the provided AppDef. The resource definition can be used to launch the
|
|
@@ -402,6 +508,17 @@ def app_to_resource(
|
|
|
402
508
|
replica_role.env["TORCHX_IMAGE"] = replica_role.image
|
|
403
509
|
|
|
404
510
|
pod = role_to_pod(name, replica_role, service_account)
|
|
511
|
+
if k8s_metadata := role.metadata.get("kubernetes"):
|
|
512
|
+
if isinstance(k8s_metadata, str):
|
|
513
|
+
import fsspec
|
|
514
|
+
|
|
515
|
+
with fsspec.open(k8s_metadata, "r") as f:
|
|
516
|
+
k8s_metadata = yaml.unsafe_load(f)
|
|
517
|
+
elif not isinstance(k8s_metadata, dict):
|
|
518
|
+
raise ValueError(
|
|
519
|
+
f"metadata['kubernetes'] must be a dict or resource URI, got {type(k8s_metadata)}"
|
|
520
|
+
)
|
|
521
|
+
_apply_pod_overlay(pod, k8s_metadata)
|
|
405
522
|
pod.metadata.labels.update(
|
|
406
523
|
pod_labels(
|
|
407
524
|
app=app,
|
|
@@ -444,7 +561,7 @@ does NOT support retries correctly. More info: https://github.com/volcano-sh/vol
|
|
|
444
561
|
if priority_class is not None:
|
|
445
562
|
job_spec["priorityClassName"] = priority_class
|
|
446
563
|
|
|
447
|
-
resource: Dict[str,
|
|
564
|
+
resource: Dict[str, Any] = {
|
|
448
565
|
"apiVersion": "batch.volcano.sh/v1alpha1",
|
|
449
566
|
"kind": "Job",
|
|
450
567
|
"metadata": {"name": f"{unique_app_id}"},
|
|
@@ -456,7 +573,7 @@ does NOT support retries correctly. More info: https://github.com/volcano-sh/vol
|
|
|
456
573
|
@dataclass
|
|
457
574
|
class KubernetesJob:
|
|
458
575
|
images_to_push: Dict[str, Tuple[str, str]]
|
|
459
|
-
resource: Dict[str,
|
|
576
|
+
resource: Dict[str, Any]
|
|
460
577
|
|
|
461
578
|
def __str__(self) -> str:
|
|
462
579
|
return yaml.dump(sanitize_for_serialization(self.resource))
|
|
@@ -471,6 +588,7 @@ class KubernetesOpts(TypedDict, total=False):
|
|
|
471
588
|
image_repo: Optional[str]
|
|
472
589
|
service_account: Optional[str]
|
|
473
590
|
priority_class: Optional[str]
|
|
591
|
+
validate_spec: Optional[bool]
|
|
474
592
|
|
|
475
593
|
|
|
476
594
|
class KubernetesScheduler(
|
|
@@ -636,7 +754,7 @@ class KubernetesScheduler(
|
|
|
636
754
|
else:
|
|
637
755
|
raise
|
|
638
756
|
|
|
639
|
-
return f
|
|
757
|
+
return f"{namespace}:{resp['metadata']['name']}"
|
|
640
758
|
|
|
641
759
|
def _submit_dryrun(
|
|
642
760
|
self, app: AppDef, cfg: KubernetesOpts
|
|
@@ -659,6 +777,36 @@ class KubernetesScheduler(
|
|
|
659
777
|
), "priority_class must be a str"
|
|
660
778
|
|
|
661
779
|
resource = app_to_resource(app, queue, service_account, priority_class)
|
|
780
|
+
|
|
781
|
+
if cfg.get("validate_spec"):
|
|
782
|
+
try:
|
|
783
|
+
self._custom_objects_api().create_namespaced_custom_object(
|
|
784
|
+
group="batch.volcano.sh",
|
|
785
|
+
version="v1alpha1",
|
|
786
|
+
namespace=cfg.get("namespace") or "default",
|
|
787
|
+
plural="jobs",
|
|
788
|
+
body=resource,
|
|
789
|
+
dry_run="All",
|
|
790
|
+
)
|
|
791
|
+
except Exception as e:
|
|
792
|
+
from kubernetes.client.rest import ApiException
|
|
793
|
+
|
|
794
|
+
if isinstance(e, ApiException):
|
|
795
|
+
raise ValueError(f"Invalid job spec: {e.reason}") from e
|
|
796
|
+
raise
|
|
797
|
+
|
|
798
|
+
job_name = resource["metadata"]["name"]
|
|
799
|
+
for task in resource["spec"]["tasks"]:
|
|
800
|
+
task_name = task["name"]
|
|
801
|
+
replicas = task.get("replicas", 1)
|
|
802
|
+
max_index = replicas - 1
|
|
803
|
+
pod_name = f"{job_name}-{task_name}-{max_index}"
|
|
804
|
+
if len(pod_name) > 63:
|
|
805
|
+
raise ValueError(
|
|
806
|
+
f"Pod name '{pod_name}' ({len(pod_name)} chars) exceeds 63 character limit. "
|
|
807
|
+
f"Shorten app.name or role names"
|
|
808
|
+
)
|
|
809
|
+
|
|
662
810
|
req = KubernetesJob(
|
|
663
811
|
resource=resource,
|
|
664
812
|
images_to_push=images_to_push,
|
|
@@ -703,19 +851,32 @@ class KubernetesScheduler(
|
|
|
703
851
|
type_=str,
|
|
704
852
|
help="The name of the PriorityClass to set on the job specs",
|
|
705
853
|
)
|
|
854
|
+
opts.add(
|
|
855
|
+
"validate_spec",
|
|
856
|
+
type_=bool,
|
|
857
|
+
help="Validate job spec using Kubernetes API dry-run before submission",
|
|
858
|
+
default=True,
|
|
859
|
+
)
|
|
706
860
|
return opts
|
|
707
861
|
|
|
708
862
|
def describe(self, app_id: str) -> Optional[DescribeAppResponse]:
|
|
863
|
+
from kubernetes.client.rest import ApiException
|
|
864
|
+
|
|
709
865
|
namespace, name = app_id.split(":")
|
|
710
866
|
roles = {}
|
|
711
867
|
roles_statuses = {}
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
868
|
+
try:
|
|
869
|
+
resp = self._custom_objects_api().get_namespaced_custom_object_status(
|
|
870
|
+
group="batch.volcano.sh",
|
|
871
|
+
version="v1alpha1",
|
|
872
|
+
namespace=namespace,
|
|
873
|
+
plural="jobs",
|
|
874
|
+
name=name,
|
|
875
|
+
)
|
|
876
|
+
except ApiException as e:
|
|
877
|
+
if e.status == 404:
|
|
878
|
+
return None
|
|
879
|
+
raise
|
|
719
880
|
status = resp.get("status")
|
|
720
881
|
if status:
|
|
721
882
|
state_str = status["state"]["phase"]
|
|
@@ -824,13 +985,34 @@ def create_scheduler(
|
|
|
824
985
|
def pod_labels(
|
|
825
986
|
app: AppDef, role_idx: int, role: Role, replica_id: int, app_id: str
|
|
826
987
|
) -> Dict[str, str]:
|
|
988
|
+
|
|
989
|
+
def clean(label_value: str) -> str:
|
|
990
|
+
# cleans the provided `label_value` to make it compliant
|
|
991
|
+
# to pod label specs as described in
|
|
992
|
+
# https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
|
|
993
|
+
#
|
|
994
|
+
# Valid label value:
|
|
995
|
+
# must be 63 characters or less (can be empty),
|
|
996
|
+
# unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]),
|
|
997
|
+
# could contain dashes (-), underscores (_), dots (.), and alphanumerics between.
|
|
998
|
+
|
|
999
|
+
# Replace invalid characters (allow: alphanum, -, _, .) with "."
|
|
1000
|
+
label_value = re.sub(r"[^A-Za-z0-9\-_.]", ".", label_value)
|
|
1001
|
+
# Replace leading non-alphanumeric with "."
|
|
1002
|
+
label_value = re.sub(r"^[^A-Za-z0-9]+", ".", label_value)
|
|
1003
|
+
# Replace trailing non-alphanumeric with "."
|
|
1004
|
+
label_value = re.sub(r"[^A-Za-z0-9]+$", ".", label_value)
|
|
1005
|
+
|
|
1006
|
+
# Trim to 63 characters
|
|
1007
|
+
return label_value[:63]
|
|
1008
|
+
|
|
827
1009
|
return {
|
|
828
|
-
LABEL_VERSION: torchx.__version__,
|
|
829
|
-
LABEL_APP_NAME: app.name,
|
|
1010
|
+
LABEL_VERSION: clean(torchx.__version__),
|
|
1011
|
+
LABEL_APP_NAME: clean(app.name),
|
|
830
1012
|
LABEL_ROLE_INDEX: str(role_idx),
|
|
831
|
-
LABEL_ROLE_NAME: role.name,
|
|
1013
|
+
LABEL_ROLE_NAME: clean(role.name),
|
|
832
1014
|
LABEL_REPLICA_ID: str(replica_id),
|
|
833
|
-
LABEL_KUBE_APP_NAME: app.name,
|
|
1015
|
+
LABEL_KUBE_APP_NAME: clean(app.name),
|
|
834
1016
|
LABEL_ORGANIZATION: "torchx.pytorch.org",
|
|
835
|
-
LABEL_UNIQUE_NAME: app_id,
|
|
1017
|
+
LABEL_UNIQUE_NAME: clean(app_id),
|
|
836
1018
|
}
|
torchx/specs/__init__.py
CHANGED
|
@@ -14,7 +14,7 @@ scheduler or pipeline adapter.
|
|
|
14
14
|
import difflib
|
|
15
15
|
|
|
16
16
|
import os
|
|
17
|
-
from typing import Callable, Dict, Mapping, Optional
|
|
17
|
+
from typing import Callable, Dict, Iterator, Mapping, Optional
|
|
18
18
|
|
|
19
19
|
from torchx.specs.api import (
|
|
20
20
|
ALL,
|
|
@@ -113,8 +113,22 @@ class _NamedResourcesLibrary:
|
|
|
113
113
|
def __contains__(self, key: str) -> bool:
|
|
114
114
|
return key in _named_resource_factories
|
|
115
115
|
|
|
116
|
-
def __iter__(self) ->
|
|
117
|
-
|
|
116
|
+
def __iter__(self) -> Iterator[str]:
|
|
117
|
+
"""Iterates through the names of the registered named_resources.
|
|
118
|
+
|
|
119
|
+
Usage:
|
|
120
|
+
|
|
121
|
+
.. doctest::
|
|
122
|
+
|
|
123
|
+
from torchx import specs
|
|
124
|
+
|
|
125
|
+
for resource_name in specs.named_resources:
|
|
126
|
+
resource = specs.resource(h=resource_name)
|
|
127
|
+
assert isinstance(resource, specs.Resource)
|
|
128
|
+
|
|
129
|
+
"""
|
|
130
|
+
for key in _named_resource_factories:
|
|
131
|
+
yield (key)
|
|
118
132
|
|
|
119
133
|
|
|
120
134
|
named_resources: _NamedResourcesLibrary = _NamedResourcesLibrary()
|
torchx/specs/api.py
CHANGED
|
@@ -14,10 +14,12 @@ import logging as logger
|
|
|
14
14
|
import os
|
|
15
15
|
import pathlib
|
|
16
16
|
import re
|
|
17
|
+
import shutil
|
|
17
18
|
import typing
|
|
19
|
+
import warnings
|
|
18
20
|
from dataclasses import asdict, dataclass, field
|
|
19
21
|
from datetime import datetime
|
|
20
|
-
from enum import Enum
|
|
22
|
+
from enum import Enum, IntEnum
|
|
21
23
|
from json import JSONDecodeError
|
|
22
24
|
from string import Template
|
|
23
25
|
from typing import (
|
|
@@ -380,6 +382,16 @@ class Workspace:
|
|
|
380
382
|
"""False if no projects mapping. Lets us use workspace object in an if-statement"""
|
|
381
383
|
return bool(self.projects)
|
|
382
384
|
|
|
385
|
+
def __eq__(self, other: object) -> bool:
|
|
386
|
+
if not isinstance(other, Workspace):
|
|
387
|
+
return False
|
|
388
|
+
return self.projects == other.projects
|
|
389
|
+
|
|
390
|
+
def __hash__(self) -> int:
|
|
391
|
+
# makes it possible to use Workspace as the key in the workspace build cache
|
|
392
|
+
# see WorkspaceMixin.caching_build_workspace_and_update_role
|
|
393
|
+
return hash(frozenset(self.projects.items()))
|
|
394
|
+
|
|
383
395
|
def is_unmapped_single_project(self) -> bool:
|
|
384
396
|
"""
|
|
385
397
|
Returns ``True`` if this workspace only has 1 project
|
|
@@ -387,6 +399,39 @@ class Workspace:
|
|
|
387
399
|
"""
|
|
388
400
|
return len(self.projects) == 1 and not next(iter(self.projects.values()))
|
|
389
401
|
|
|
402
|
+
def merge_into(self, outdir: str | pathlib.Path) -> None:
|
|
403
|
+
"""
|
|
404
|
+
Copies each project dir of this workspace into the specified ``outdir``.
|
|
405
|
+
Each project dir is copied into ``{outdir}/{target}`` where ``target`` is
|
|
406
|
+
the target mapping of the project dir.
|
|
407
|
+
|
|
408
|
+
For example:
|
|
409
|
+
|
|
410
|
+
.. code-block:: python
|
|
411
|
+
from os.path import expanduser
|
|
412
|
+
|
|
413
|
+
workspace = Workspace(
|
|
414
|
+
projects={
|
|
415
|
+
expanduser("~/workspace/torch"): "torch",
|
|
416
|
+
expanduser("~/workspace/my_project": "")
|
|
417
|
+
}
|
|
418
|
+
)
|
|
419
|
+
workspace.merge_into(expanduser("~/tmp"))
|
|
420
|
+
|
|
421
|
+
Copies:
|
|
422
|
+
|
|
423
|
+
* ``~/workspace/torch/**`` into ``~/tmp/torch/**``
|
|
424
|
+
* ``~/workspace/my_project/**`` into ``~/tmp/**``
|
|
425
|
+
|
|
426
|
+
"""
|
|
427
|
+
|
|
428
|
+
for src, dst in self.projects.items():
|
|
429
|
+
dst_path = pathlib.Path(outdir) / dst
|
|
430
|
+
if pathlib.Path(src).is_file():
|
|
431
|
+
shutil.copy2(src, dst_path)
|
|
432
|
+
else: # src is dir
|
|
433
|
+
shutil.copytree(src, dst_path, dirs_exist_ok=True)
|
|
434
|
+
|
|
390
435
|
@staticmethod
|
|
391
436
|
def from_str(workspace: str | None) -> "Workspace":
|
|
392
437
|
import yaml
|
|
@@ -891,14 +936,12 @@ class runopt:
|
|
|
891
936
|
Represents the metadata about the specific run option
|
|
892
937
|
"""
|
|
893
938
|
|
|
894
|
-
class alias(str):
|
|
895
|
-
pass
|
|
896
|
-
|
|
897
939
|
default: CfgVal
|
|
898
940
|
opt_type: Type[CfgVal]
|
|
899
941
|
is_required: bool
|
|
900
942
|
help: str
|
|
901
|
-
aliases: list[
|
|
943
|
+
aliases: list[str] | None = None
|
|
944
|
+
deprecated_aliases: list[str] | None = None
|
|
902
945
|
|
|
903
946
|
@property
|
|
904
947
|
def is_type_list_of_str(self) -> bool:
|
|
@@ -990,7 +1033,7 @@ class runopts:
|
|
|
990
1033
|
|
|
991
1034
|
def __init__(self) -> None:
|
|
992
1035
|
self._opts: Dict[str, runopt] = {}
|
|
993
|
-
self._alias_to_key: dict[
|
|
1036
|
+
self._alias_to_key: dict[str, str] = {}
|
|
994
1037
|
|
|
995
1038
|
def __iter__(self) -> Iterator[Tuple[str, runopt]]:
|
|
996
1039
|
return self._opts.items().__iter__()
|
|
@@ -1044,12 +1087,24 @@ class runopts:
|
|
|
1044
1087
|
val = resolved_cfg.get(cfg_key)
|
|
1045
1088
|
resolved_name = None
|
|
1046
1089
|
aliases = runopt.aliases or []
|
|
1090
|
+
deprecated_aliases = runopt.deprecated_aliases or []
|
|
1047
1091
|
if val is None:
|
|
1048
1092
|
for alias in aliases:
|
|
1049
1093
|
val = resolved_cfg.get(alias)
|
|
1050
1094
|
if alias in cfg or val is not None:
|
|
1051
1095
|
resolved_name = alias
|
|
1052
1096
|
break
|
|
1097
|
+
for alias in deprecated_aliases:
|
|
1098
|
+
val = resolved_cfg.get(alias)
|
|
1099
|
+
if val is not None:
|
|
1100
|
+
resolved_name = alias
|
|
1101
|
+
use_instead = self._alias_to_key.get(alias)
|
|
1102
|
+
warnings.warn(
|
|
1103
|
+
f"Run option `{alias}` is deprecated, use `{use_instead}` instead",
|
|
1104
|
+
UserWarning,
|
|
1105
|
+
stacklevel=2,
|
|
1106
|
+
)
|
|
1107
|
+
break
|
|
1053
1108
|
else:
|
|
1054
1109
|
resolved_name = cfg_key
|
|
1055
1110
|
for alias in aliases:
|
|
@@ -1172,49 +1227,23 @@ class runopts:
|
|
|
1172
1227
|
cfg[key] = val
|
|
1173
1228
|
return cfg
|
|
1174
1229
|
|
|
1175
|
-
def _get_primary_key_and_aliases(
|
|
1176
|
-
self,
|
|
1177
|
-
cfg_key: list[str] | str,
|
|
1178
|
-
) -> tuple[str, list[runopt.alias]]:
|
|
1179
|
-
"""
|
|
1180
|
-
Returns the primary key and aliases for the given cfg_key.
|
|
1181
|
-
"""
|
|
1182
|
-
if isinstance(cfg_key, str):
|
|
1183
|
-
return cfg_key, []
|
|
1184
|
-
|
|
1185
|
-
if len(cfg_key) == 0:
|
|
1186
|
-
raise ValueError("cfg_key must be a non-empty list")
|
|
1187
|
-
primary_key = None
|
|
1188
|
-
aliases = list[runopt.alias]()
|
|
1189
|
-
for name in cfg_key:
|
|
1190
|
-
if isinstance(name, runopt.alias):
|
|
1191
|
-
aliases.append(name)
|
|
1192
|
-
else:
|
|
1193
|
-
if primary_key is not None:
|
|
1194
|
-
raise ValueError(
|
|
1195
|
-
f" Given more than one primary key: {primary_key}, {name}. Please use runopt.alias type for aliases. "
|
|
1196
|
-
)
|
|
1197
|
-
primary_key = name
|
|
1198
|
-
if primary_key is None or primary_key == "":
|
|
1199
|
-
raise ValueError(
|
|
1200
|
-
"Missing cfg_key. Please provide one other than the aliases."
|
|
1201
|
-
)
|
|
1202
|
-
return primary_key, aliases
|
|
1203
|
-
|
|
1204
1230
|
def add(
|
|
1205
1231
|
self,
|
|
1206
|
-
cfg_key: str
|
|
1232
|
+
cfg_key: str,
|
|
1207
1233
|
type_: Type[CfgVal],
|
|
1208
1234
|
help: str,
|
|
1209
1235
|
default: CfgVal = None,
|
|
1210
1236
|
required: bool = False,
|
|
1237
|
+
aliases: Optional[list[str]] = None,
|
|
1238
|
+
deprecated_aliases: Optional[list[str]] = None,
|
|
1211
1239
|
) -> None:
|
|
1212
1240
|
"""
|
|
1213
1241
|
Adds the ``config`` option with the given help string and ``default``
|
|
1214
1242
|
value (if any). If the ``default`` is not specified then this option
|
|
1215
1243
|
is a required option.
|
|
1216
1244
|
"""
|
|
1217
|
-
|
|
1245
|
+
aliases = aliases or []
|
|
1246
|
+
deprecated_aliases = deprecated_aliases or []
|
|
1218
1247
|
if required and default is not None:
|
|
1219
1248
|
raise ValueError(
|
|
1220
1249
|
f"Required option: {cfg_key} must not specify default value. Given: {default}"
|
|
@@ -1225,10 +1254,20 @@ class runopts:
|
|
|
1225
1254
|
f"Option: {cfg_key}, must be of type: {type_}."
|
|
1226
1255
|
f" Given: {default} ({type(default).__name__})"
|
|
1227
1256
|
)
|
|
1228
|
-
|
|
1257
|
+
|
|
1258
|
+
opt = runopt(
|
|
1259
|
+
default,
|
|
1260
|
+
type_,
|
|
1261
|
+
required,
|
|
1262
|
+
help,
|
|
1263
|
+
list(set(aliases)),
|
|
1264
|
+
list(set(deprecated_aliases)),
|
|
1265
|
+
)
|
|
1229
1266
|
for alias in aliases:
|
|
1230
|
-
self._alias_to_key[alias] =
|
|
1231
|
-
|
|
1267
|
+
self._alias_to_key[alias] = cfg_key
|
|
1268
|
+
for deprecated_alias in deprecated_aliases:
|
|
1269
|
+
self._alias_to_key[deprecated_alias] = cfg_key
|
|
1270
|
+
self._opts[cfg_key] = opt
|
|
1232
1271
|
|
|
1233
1272
|
def update(self, other: "runopts") -> None:
|
|
1234
1273
|
self._opts.update(other._opts)
|
torchx/version.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
1
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
2
|
# All rights reserved.
|
|
4
3
|
#
|
|
@@ -7,6 +6,7 @@
|
|
|
7
6
|
|
|
8
7
|
# pyre-strict
|
|
9
8
|
|
|
9
|
+
from torchx._version import BASE_VERSION
|
|
10
10
|
from torchx.util.entrypoints import load
|
|
11
11
|
|
|
12
12
|
# Follows PEP-0440 version scheme guidelines
|
|
@@ -18,7 +18,7 @@ from torchx.util.entrypoints import load
|
|
|
18
18
|
# 0.1.0bN # Beta release
|
|
19
19
|
# 0.1.0rcN # Release Candidate
|
|
20
20
|
# 0.1.0 # Final release
|
|
21
|
-
__version__ =
|
|
21
|
+
__version__: str = BASE_VERSION
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
# Use the github container registry images corresponding to the current package
|
torchx/workspace/api.py
CHANGED
|
@@ -8,26 +8,17 @@
|
|
|
8
8
|
|
|
9
9
|
import abc
|
|
10
10
|
import fnmatch
|
|
11
|
+
import logging
|
|
11
12
|
import posixpath
|
|
12
|
-
import shutil
|
|
13
13
|
import tempfile
|
|
14
14
|
import warnings
|
|
15
15
|
from dataclasses import dataclass
|
|
16
|
-
from
|
|
17
|
-
from typing import (
|
|
18
|
-
Any,
|
|
19
|
-
Dict,
|
|
20
|
-
Generic,
|
|
21
|
-
Iterable,
|
|
22
|
-
Mapping,
|
|
23
|
-
Tuple,
|
|
24
|
-
TYPE_CHECKING,
|
|
25
|
-
TypeVar,
|
|
26
|
-
Union,
|
|
27
|
-
)
|
|
16
|
+
from typing import Any, Dict, Generic, Iterable, Mapping, Tuple, TYPE_CHECKING, TypeVar
|
|
28
17
|
|
|
29
18
|
from torchx.specs import AppDef, CfgVal, Role, runopts, Workspace
|
|
30
19
|
|
|
20
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
31
22
|
if TYPE_CHECKING:
|
|
32
23
|
from fsspec import AbstractFileSystem
|
|
33
24
|
|
|
@@ -113,45 +104,72 @@ class WorkspaceMixin(abc.ABC, Generic[T]):
|
|
|
113
104
|
"""
|
|
114
105
|
return runopts()
|
|
115
106
|
|
|
116
|
-
def
|
|
107
|
+
def build_workspaces(self, roles: list[Role], cfg: Mapping[str, CfgVal]) -> None:
|
|
108
|
+
"""
|
|
109
|
+
NOTE: this method MUTATES the passed roles!
|
|
110
|
+
|
|
111
|
+
Builds the workspaces (if any) for each role and updates the role to reflect the built workspace.
|
|
112
|
+
Typically ``role.image`` is updated with the newly built image that reflects the local workspace.
|
|
113
|
+
Some workspace implementations may add extra environment variables to make it easier for other
|
|
114
|
+
parts of the program to access the workspace. For example a ``WORKSPACE_DIR`` env var may be added
|
|
115
|
+
to ``role.env`` that scripts can use to refert to the workspace directory in the container.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
build_cache: dict[object, object] = {}
|
|
119
|
+
|
|
120
|
+
for i, role in enumerate(roles):
|
|
121
|
+
if role.workspace:
|
|
122
|
+
old_img = role.image
|
|
123
|
+
self.caching_build_workspace_and_update_role(role, cfg, build_cache)
|
|
124
|
+
|
|
125
|
+
if old_img != role.image:
|
|
126
|
+
logger.info(
|
|
127
|
+
"role[%d]=%s updated with new image to include workspace changes",
|
|
128
|
+
i,
|
|
129
|
+
role.name,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
def caching_build_workspace_and_update_role(
|
|
117
133
|
self,
|
|
118
134
|
role: Role,
|
|
119
|
-
workspace: Union[Workspace, str],
|
|
120
135
|
cfg: Mapping[str, CfgVal],
|
|
136
|
+
build_cache: dict[object, object],
|
|
121
137
|
) -> None:
|
|
122
138
|
"""
|
|
123
|
-
Same as :py:meth:`build_workspace_and_update_role` but
|
|
124
|
-
|
|
125
|
-
|
|
139
|
+
Same as :py:meth:`build_workspace_and_update_role` but takes
|
|
140
|
+
a ``build_cache`` that can be used to cache pointers to build artifacts
|
|
141
|
+
between building workspace for each role.
|
|
126
142
|
|
|
127
|
-
|
|
143
|
+
This is useful when an appdef has multiple roles where the image and workspace
|
|
144
|
+
of the roles are the same but other attributes such as entrypoint or args are different.
|
|
145
|
+
|
|
146
|
+
NOTE: ``build_cache``'s lifetime is within :py:meth:`build_workspace_and_update_roles`
|
|
147
|
+
NOTE: the workspace implementation decides what to cache
|
|
148
|
+
|
|
149
|
+
Workspace subclasses should prefer implementing this method over
|
|
128
150
|
:py:meth:`build_workspace_and_update_role`.
|
|
129
151
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
152
|
+
The default implementation of this method simply calls the (deprecated) non-caching
|
|
153
|
+
:py:meth:`build_workspace_and_update_role` and deals with multi-dir workspaces by
|
|
154
|
+
merging them into a single tmpdir before passing it down.
|
|
133
155
|
|
|
134
|
-
Subclasses can override this method to customize multi-project
|
|
135
|
-
workspace building logic.
|
|
136
156
|
"""
|
|
137
|
-
if isinstance(workspace, Workspace):
|
|
138
|
-
if not workspace.is_unmapped_single_project():
|
|
139
|
-
with tempfile.TemporaryDirectory(suffix="torchx_workspace_") as outdir:
|
|
140
|
-
for src, dst in workspace.projects.items():
|
|
141
|
-
dst_path = Path(outdir) / dst
|
|
142
|
-
if Path(src).is_file():
|
|
143
|
-
shutil.copy2(src, dst_path)
|
|
144
|
-
else: # src is dir
|
|
145
|
-
shutil.copytree(src, dst_path, dirs_exist_ok=True)
|
|
146
|
-
|
|
147
|
-
self.build_workspace_and_update_role(role, outdir, cfg)
|
|
148
|
-
return
|
|
149
|
-
else: # single project workspace with no target mapping (treat like a str workspace)
|
|
150
|
-
workspace = str(workspace)
|
|
151
|
-
|
|
152
|
-
self.build_workspace_and_update_role(role, workspace, cfg)
|
|
153
157
|
|
|
154
|
-
|
|
158
|
+
workspace = role.workspace
|
|
159
|
+
|
|
160
|
+
if not workspace:
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
if workspace.is_unmapped_single_project():
|
|
164
|
+
# single-dir workspace with no target map; no need to copy to a tmp dir
|
|
165
|
+
self.build_workspace_and_update_role(role, str(workspace), cfg)
|
|
166
|
+
else:
|
|
167
|
+
# multi-dirs or single-dir with a target map;
|
|
168
|
+
# copy all dirs to a tmp dir and treat the tmp dir as a single-dir workspace
|
|
169
|
+
with tempfile.TemporaryDirectory(suffix="torchx_workspace_") as outdir:
|
|
170
|
+
workspace.merge_into(outdir)
|
|
171
|
+
self.build_workspace_and_update_role(role, outdir, cfg)
|
|
172
|
+
|
|
155
173
|
def build_workspace_and_update_role(
|
|
156
174
|
self,
|
|
157
175
|
role: Role,
|
|
@@ -159,6 +177,9 @@ class WorkspaceMixin(abc.ABC, Generic[T]):
|
|
|
159
177
|
cfg: Mapping[str, CfgVal],
|
|
160
178
|
) -> None:
|
|
161
179
|
"""
|
|
180
|
+
.. note:: DEPRECATED: Workspace subclasses should implement
|
|
181
|
+
:py:meth:`caching_build_workspace_and_update_role` over this method.
|
|
182
|
+
|
|
162
183
|
Builds the specified ``workspace`` with respect to ``img``
|
|
163
184
|
and updates the ``role`` to reflect the built workspace artifacts.
|
|
164
185
|
In the simplest case, this method builds a new image and updates
|
|
@@ -167,7 +188,7 @@ class WorkspaceMixin(abc.ABC, Generic[T]):
|
|
|
167
188
|
|
|
168
189
|
Note: this method mutates the passed ``role``.
|
|
169
190
|
"""
|
|
170
|
-
|
|
191
|
+
raise NotImplementedError("implement `caching_build_workspace_and_update_role`")
|
|
171
192
|
|
|
172
193
|
def dryrun_push_images(self, app: AppDef, cfg: Mapping[str, CfgVal]) -> T:
|
|
173
194
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: torchx-nightly
|
|
3
|
-
Version: 2025.
|
|
3
|
+
Version: 2025.11.17
|
|
4
4
|
Summary: TorchX SDK and Components
|
|
5
5
|
Home-page: https://github.com/meta-pytorch/torchx
|
|
6
6
|
Author: TorchX Devs
|
|
@@ -23,8 +23,10 @@ Requires-Dist: docker
|
|
|
23
23
|
Requires-Dist: filelock
|
|
24
24
|
Requires-Dist: fsspec>=2023.10.0
|
|
25
25
|
Requires-Dist: tabulate
|
|
26
|
-
Provides-Extra:
|
|
26
|
+
Provides-Extra: aws-batch
|
|
27
27
|
Requires-Dist: boto3; extra == "aws-batch"
|
|
28
|
+
Provides-Extra: kubernetes
|
|
29
|
+
Requires-Dist: kubernetes>=11; extra == "kubernetes"
|
|
28
30
|
Provides-Extra: dev
|
|
29
31
|
Requires-Dist: aiobotocore==2.20.0; extra == "dev"
|
|
30
32
|
Requires-Dist: ax-platform[mysql]==0.2.3; extra == "dev"
|
|
@@ -47,18 +49,29 @@ Requires-Dist: pytorch-lightning==2.5.0; extra == "dev"
|
|
|
47
49
|
Requires-Dist: tensorboard==2.14.0; extra == "dev"
|
|
48
50
|
Requires-Dist: sagemaker==2.230.0; extra == "dev"
|
|
49
51
|
Requires-Dist: torch-model-archiver>=0.4.2; extra == "dev"
|
|
50
|
-
Requires-Dist: torch
|
|
52
|
+
Requires-Dist: torch; extra == "dev"
|
|
51
53
|
Requires-Dist: torchmetrics==1.6.3; extra == "dev"
|
|
52
54
|
Requires-Dist: torchserve>=0.10.0; extra == "dev"
|
|
53
|
-
Requires-Dist: torchtext
|
|
54
|
-
Requires-Dist: torchvision
|
|
55
|
+
Requires-Dist: torchtext; extra == "dev"
|
|
56
|
+
Requires-Dist: torchvision; extra == "dev"
|
|
55
57
|
Requires-Dist: typing-extensions; extra == "dev"
|
|
56
58
|
Requires-Dist: ts==0.5.1; extra == "dev"
|
|
57
59
|
Requires-Dist: wheel; extra == "dev"
|
|
58
60
|
Requires-Dist: lintrunner; extra == "dev"
|
|
59
61
|
Requires-Dist: lintrunner-adapters; extra == "dev"
|
|
60
|
-
|
|
61
|
-
|
|
62
|
+
Dynamic: author
|
|
63
|
+
Dynamic: author-email
|
|
64
|
+
Dynamic: classifier
|
|
65
|
+
Dynamic: description
|
|
66
|
+
Dynamic: description-content-type
|
|
67
|
+
Dynamic: home-page
|
|
68
|
+
Dynamic: keywords
|
|
69
|
+
Dynamic: license
|
|
70
|
+
Dynamic: license-file
|
|
71
|
+
Dynamic: provides-extra
|
|
72
|
+
Dynamic: requires-dist
|
|
73
|
+
Dynamic: requires-python
|
|
74
|
+
Dynamic: summary
|
|
62
75
|
|
|
63
76
|
[](https://pypi.org/project/torchx/)
|
|
64
77
|
[](https://github.com/meta-pytorch/torchx/blob/main/LICENSE)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
torchx/__init__.py,sha256=QFDTdJacncWYWHL-2QyWdY5MUck3jVfSPRRGdvedcKc,355
|
|
2
|
+
torchx/_version.py,sha256=TzDuXIviDldFbXAhGe33redQcoP33jIsVR_hMyqSgdc,250
|
|
2
3
|
torchx/notebook.py,sha256=Rc6XUMzSq7NXtsYdtVluE6T89LpEhcba-3ANxuaLCCU,1008
|
|
3
|
-
torchx/version.py,sha256=
|
|
4
|
+
torchx/version.py,sha256=YcE66UkBxYHMQMtjVts4jF3l6Qeaj1gK_LzxU77l8Bo,975
|
|
4
5
|
torchx/apps/__init__.py,sha256=fE0IHi1JJpxsNVBNzWNee2thrNXFFRhY94c80RxNSIE,231
|
|
5
6
|
torchx/apps/serve/__init__.py,sha256=Md3cCHD7Ano9kV15PqGbicgUO-RMdh4aVy1yKiDt_xE,208
|
|
6
7
|
torchx/apps/serve/serve.py,sha256=u_h8agld1TwIPq5GRosHL3uxhkljNfS65McLB77O0OE,4386
|
|
@@ -48,7 +49,7 @@ torchx/examples/apps/lightning/profiler.py,sha256=SSSihnwjeUTkBoz0E3qn1b-wbkfUIo
|
|
|
48
49
|
torchx/examples/apps/lightning/train.py,sha256=0wvvshGHvZowePB4LfclXwn40X7i9euM0ReETWBcPSo,6253
|
|
49
50
|
torchx/pipelines/__init__.py,sha256=2MbRVk5xwRjg-d2qPemeXpEhDsocMQumPQ53lsesZAI,606
|
|
50
51
|
torchx/runner/__init__.py,sha256=x8Sz7s_tLxPgJgvWIhK4ju9BNZU61uBFywGwDY6CqJs,315
|
|
51
|
-
torchx/runner/api.py,sha256=
|
|
52
|
+
torchx/runner/api.py,sha256=xQpgiUz9jCX4zZriubbWk4tTJRe7MxNJQK64g0o7KQ8,30438
|
|
52
53
|
torchx/runner/config.py,sha256=SaKOB50d79WaMFPWK8CC4as6UaNFaRGhrBkfajq3KC4,18311
|
|
53
54
|
torchx/runner/events/__init__.py,sha256=cMiNjnr4eUNQ2Nxxtu4nsvN5lu56b-a6nJ-ct3i7DQk,5536
|
|
54
55
|
torchx/runner/events/api.py,sha256=bvxKBAYK8LzbrBNaNLgL1x0aivtfANmWo1EMGOrSR8k,2668
|
|
@@ -57,20 +58,20 @@ torchx/runtime/__init__.py,sha256=Wxje2BryzeQneFu5r6P9JJiEKG-_C9W1CcZ_JNrKT6g,59
|
|
|
57
58
|
torchx/runtime/tracking/__init__.py,sha256=dYnAPnrXYREfPXkpHhdOFkcYIODWEbA13PdD-wLQYBo,3055
|
|
58
59
|
torchx/runtime/tracking/api.py,sha256=SmUQyUKZqG3KlAhT7CJOGqRz1O274E4m63wQeOVq3CU,5472
|
|
59
60
|
torchx/schedulers/__init__.py,sha256=FQN9boQM4mwOD3sK9LZ3GBgw-gJ7Vx4MFj6z6ATQIrc,2211
|
|
60
|
-
torchx/schedulers/api.py,sha256=
|
|
61
|
+
torchx/schedulers/api.py,sha256=smoUv1ocfqsBRmesXbz9i1F86zBOixZ8QHxYmI_MzgQ,14649
|
|
61
62
|
torchx/schedulers/aws_batch_scheduler.py,sha256=-HpjNVhSFBDxZo3cebK-3YEguB49dxoaud2gz30cAVM,29437
|
|
62
63
|
torchx/schedulers/aws_sagemaker_scheduler.py,sha256=flN8GumKE2Dz4X_foAt6Jnvt-ZVojWs6pcyrHwB0hz0,20921
|
|
63
64
|
torchx/schedulers/devices.py,sha256=RjVcu22ZRl_9OKtOtmA1A3vNXgu2qD6A9ST0L0Hsg4I,1734
|
|
64
65
|
torchx/schedulers/docker_scheduler.py,sha256=x-XHCqYnrmiW0dHfVA7hz7Fp2Qgw7fvMgRm058YOngY,16880
|
|
65
66
|
torchx/schedulers/ids.py,sha256=3E-_vwVYC-8Tv8kjuY9-W7TbOe_-Laqd8a65uIN3hQY,1798
|
|
66
67
|
torchx/schedulers/kubernetes_mcad_scheduler.py,sha256=1tuzq3OutCMdSPqg_dNmCHt_wyuSFKG0-ywLc3qITJo,42949
|
|
67
|
-
torchx/schedulers/kubernetes_scheduler.py,sha256=
|
|
68
|
+
torchx/schedulers/kubernetes_scheduler.py,sha256=86ny9XXt9tdeV6Y7AlVFQ6vhxlviOdNeZUz4gOzU3cc,34478
|
|
68
69
|
torchx/schedulers/local_scheduler.py,sha256=ttnxFDy48_DSYDEW-no27OirFZOyfrjwJ2S1MwBUi74,41929
|
|
69
70
|
torchx/schedulers/lsf_scheduler.py,sha256=YS6Yel8tXJqLPxbcGz95lZG2nCi36AQXdNDyuBJePKg,17661
|
|
70
71
|
torchx/schedulers/slurm_scheduler.py,sha256=vypGaCZe61bkyNkqRlK4Iwmk_NaAUQi-DsspaWd6BZw,31873
|
|
71
72
|
torchx/schedulers/streams.py,sha256=8_SLezgnWgfv_zXUsJCUM34-h2dtv25NmZuxEwkzmxw,2007
|
|
72
|
-
torchx/specs/__init__.py,sha256=
|
|
73
|
-
torchx/specs/api.py,sha256=
|
|
73
|
+
torchx/specs/__init__.py,sha256=TaC0AveTebkCMo5hmdY1wGpo09vFDqzWnsT166ionTw,7108
|
|
74
|
+
torchx/specs/api.py,sha256=OrLX4gGa97qtjUbl3x_YnOKCdP0rQkVEruPIbNjo7fk,49230
|
|
74
75
|
torchx/specs/builders.py,sha256=Ye3of4MupJ-da8vLaX6_-nzGo_FRw1BFpYsX6dAZCNk,13730
|
|
75
76
|
torchx/specs/file_linter.py,sha256=z0c4mKJv47BWiPaWCdUM0A8kHwnj4b1s7oTmESuD9Tc,14407
|
|
76
77
|
torchx/specs/finder.py,sha256=gWQNEFrLYqrZoI0gMMhQ70YAC4sxqS0ZFpoWAmcVi44,17438
|
|
@@ -99,12 +100,12 @@ torchx/util/shlex.py,sha256=eXEKu8KC3zIcd8tEy9_s8Ds5oma8BORr-0VGWNpG2dk,463
|
|
|
99
100
|
torchx/util/strings.py,sha256=7Ef1loz2IYMrzeJ6Lewywi5cBIc3X3g7lSPbT1Tn_z4,664
|
|
100
101
|
torchx/util/types.py,sha256=E9dxAWQnsJkIDuHtg-poeOJ4etucSI_xP_Z5kNJX8uI,9229
|
|
101
102
|
torchx/workspace/__init__.py,sha256=FqN8AN4VhR1C_SBY10MggQvNZmyanbbuPuE-JCjkyUY,798
|
|
102
|
-
torchx/workspace/api.py,sha256=
|
|
103
|
+
torchx/workspace/api.py,sha256=UESQ4qgxXjsb6Y1wP9OGv2ixaFgaTs3SqghmNuOJIZM,10235
|
|
103
104
|
torchx/workspace/dir_workspace.py,sha256=npNW_IjUZm_yS5r-8hrRkH46ndDd9a_eApT64m1S1T4,2268
|
|
104
105
|
torchx/workspace/docker_workspace.py,sha256=PFu2KQNVC-0p2aKJ-W_BKA9ZOmXdCY2ABEkCExp3udQ,10269
|
|
105
|
-
torchx_nightly-2025.
|
|
106
|
-
torchx_nightly-2025.
|
|
107
|
-
torchx_nightly-2025.
|
|
108
|
-
torchx_nightly-2025.
|
|
109
|
-
torchx_nightly-2025.
|
|
110
|
-
torchx_nightly-2025.
|
|
106
|
+
torchx_nightly-2025.11.17.dist-info/licenses/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
|
|
107
|
+
torchx_nightly-2025.11.17.dist-info/METADATA,sha256=iim6P-wiEztRPHgcWaQCa9_f0GsU-GyxHBILL2cyVJg,5324
|
|
108
|
+
torchx_nightly-2025.11.17.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
109
|
+
torchx_nightly-2025.11.17.dist-info/entry_points.txt,sha256=T328AMXeKI3JZnnxfkEew2ZcMN1oQDtkXjMz7lkV-P4,169
|
|
110
|
+
torchx_nightly-2025.11.17.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
|
|
111
|
+
torchx_nightly-2025.11.17.dist-info/RECORD,,
|
{torchx_nightly-2025.10.16.dist-info → torchx_nightly-2025.11.17.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{torchx_nightly-2025.10.16.dist-info → torchx_nightly-2025.11.17.dist-info/licenses}/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|