torchx-nightly 2025.9.30__py3-none-any.whl → 2025.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchx-nightly might be problematic. Click here for more details.
- torchx/cli/cmd_run.py +1 -1
- torchx/cli/cmd_tracker.py +1 -1
- torchx/components/__init__.py +1 -1
- torchx/components/dist.py +1 -1
- torchx/components/utils.py +1 -1
- torchx/distributed/__init__.py +1 -1
- torchx/schedulers/aws_batch_scheduler.py +44 -1
- torchx/schedulers/docker_scheduler.py +3 -0
- torchx/schedulers/kubernetes_scheduler.py +2 -1
- torchx/schedulers/slurm_scheduler.py +11 -2
- torchx/specs/__init__.py +3 -1
- torchx/specs/api.py +29 -1
- torchx/specs/file_linter.py +1 -1
- torchx/specs/finder.py +1 -1
- torchx/specs/named_resources_aws.py +2 -2
- torchx/tracker/__init__.py +2 -2
- torchx/tracker/api.py +1 -1
- torchx/workspace/api.py +0 -1
- {torchx_nightly-2025.9.30.dist-info → torchx_nightly-2025.10.2.dist-info}/METADATA +14 -14
- {torchx_nightly-2025.9.30.dist-info → torchx_nightly-2025.10.2.dist-info}/RECORD +24 -24
- {torchx_nightly-2025.9.30.dist-info → torchx_nightly-2025.10.2.dist-info}/LICENSE +0 -0
- {torchx_nightly-2025.9.30.dist-info → torchx_nightly-2025.10.2.dist-info}/WHEEL +0 -0
- {torchx_nightly-2025.9.30.dist-info → torchx_nightly-2025.10.2.dist-info}/entry_points.txt +0 -0
- {torchx_nightly-2025.9.30.dist-info → torchx_nightly-2025.10.2.dist-info}/top_level.txt +0 -0
torchx/cli/cmd_run.py
CHANGED
|
@@ -344,7 +344,7 @@ class CmdRun(SubCommand):
|
|
|
344
344
|
"Invalid scheduler configuration: %s\n"
|
|
345
345
|
"To configure scheduler options, either:\n"
|
|
346
346
|
" 1. Use the `-cfg` command-line argument, e.g., `-cfg key1=value1,key2=value2`\n"
|
|
347
|
-
" 2. Set up a `.torchxconfig` file. For more details, visit: https://pytorch.org/torchx/main/runner.config.html\n"
|
|
347
|
+
" 2. Set up a `.torchxconfig` file. For more details, visit: https://meta-pytorch.org/torchx/main/runner.config.html\n"
|
|
348
348
|
"Run `torchx runopts %s` to check all available configuration options for the "
|
|
349
349
|
"`%s` scheduler."
|
|
350
350
|
)
|
torchx/cli/cmd_tracker.py
CHANGED
|
@@ -45,7 +45,7 @@ class CmdTracker(SubCommand):
|
|
|
45
45
|
else:
|
|
46
46
|
raise RuntimeError(
|
|
47
47
|
"No trackers configured."
|
|
48
|
-
" See: https://pytorch.org/torchx/latest/runtime/tracking.html"
|
|
48
|
+
" See: https://meta-pytorch.org/torchx/latest/runtime/tracking.html"
|
|
49
49
|
)
|
|
50
50
|
|
|
51
51
|
def add_list_job_arguments(self, subparser: argparse.ArgumentParser) -> None:
|
torchx/components/__init__.py
CHANGED
|
@@ -181,7 +181,7 @@ To validate that you've defined your component correctly you can either:
|
|
|
181
181
|
|
|
182
182
|
1. (easiest) Dryrun your component's ``--help`` with the cli: ``torchx run --dryrun ~/component.py:train --help``
|
|
183
183
|
2. Use the component :ref:`linter<specs:Component Linter>`
|
|
184
|
-
(see `dist_test.py <https://github.com/pytorch/torchx/blob/main/torchx/components/test/dist_test.py>`_ as an example)
|
|
184
|
+
(see `dist_test.py <https://github.com/meta-pytorch/torchx/blob/main/torchx/components/test/dist_test.py>`_ as an example)
|
|
185
185
|
|
|
186
186
|
|
|
187
187
|
Running as a Job
|
torchx/components/dist.py
CHANGED
|
@@ -186,7 +186,7 @@ def ddp(
|
|
|
186
186
|
|
|
187
187
|
Note: (cpu, gpu, memMB) parameters are mutually exclusive with ``h`` (named resource) where
|
|
188
188
|
``h`` takes precedence if specified for setting resource requirements.
|
|
189
|
-
See `registering named resources <https://pytorch.org/torchx/latest/advanced.html#registering-named-resources>`_.
|
|
189
|
+
See `registering named resources <https://meta-pytorch.org/torchx/latest/advanced.html#registering-named-resources>`_.
|
|
190
190
|
|
|
191
191
|
Args:
|
|
192
192
|
script_args: arguments to the main module
|
torchx/components/utils.py
CHANGED
|
@@ -154,7 +154,7 @@ def python(
|
|
|
154
154
|
|
|
155
155
|
Note: (cpu, gpu, memMB) parameters are mutually exclusive with ``h`` (named resource) where
|
|
156
156
|
``h`` takes precedence if specified for setting resource requirements.
|
|
157
|
-
See `registering named resources <https://pytorch.org/torchx/latest/advanced.html#registering-named-resources>`_.
|
|
157
|
+
See `registering named resources <https://meta-pytorch.org/torchx/latest/advanced.html#registering-named-resources>`_.
|
|
158
158
|
|
|
159
159
|
Args:
|
|
160
160
|
args: arguments passed to the program in sys.argv[1:] (ignored with `--c`)
|
torchx/distributed/__init__.py
CHANGED
|
@@ -48,7 +48,7 @@ def local_rank() -> int:
|
|
|
48
48
|
" but the `LOCAL_RANK` environment variable is not set. Will trivially return 0 for local_rank.\n"
|
|
49
49
|
" It is recommended to use torchrun/torchx to run your script or set the `LOCAL_RANK` manually.\n"
|
|
50
50
|
" For additional details see:\n"
|
|
51
|
-
" 1) https://pytorch.org/torchx/latest/components/distributed.html\n"
|
|
51
|
+
" 1) https://meta-pytorch.org/torchx/latest/components/distributed.html\n"
|
|
52
52
|
" 2) https://pytorch.org/docs/stable/elastic/run.html\n"
|
|
53
53
|
"=============================================================================================="
|
|
54
54
|
)
|
|
@@ -92,6 +92,8 @@ ENV_TORCHX_ROLE_IDX = "TORCHX_ROLE_IDX"
|
|
|
92
92
|
|
|
93
93
|
ENV_TORCHX_ROLE_NAME = "TORCHX_ROLE_NAME"
|
|
94
94
|
|
|
95
|
+
ENV_TORCHX_IMAGE = "TORCHX_IMAGE"
|
|
96
|
+
|
|
95
97
|
DEFAULT_ROLE_NAME = "node"
|
|
96
98
|
|
|
97
99
|
TAG_TORCHX_VER = "torchx.pytorch.org/version"
|
|
@@ -99,6 +101,37 @@ TAG_TORCHX_APPNAME = "torchx.pytorch.org/app-name"
|
|
|
99
101
|
TAG_TORCHX_USER = "torchx.pytorch.org/user"
|
|
100
102
|
|
|
101
103
|
|
|
104
|
+
def parse_ulimits(ulimits_list: list[str]) -> List[Dict[str, Any]]:
|
|
105
|
+
"""
|
|
106
|
+
Parse ulimit string in format: name:softLimit:hardLimit
|
|
107
|
+
Multiple ulimits separated by commas.
|
|
108
|
+
"""
|
|
109
|
+
if not ulimits_list:
|
|
110
|
+
return []
|
|
111
|
+
|
|
112
|
+
ulimits = []
|
|
113
|
+
for ulimit_str in ulimits_list:
|
|
114
|
+
if not ulimit_str.strip():
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
parts = ulimit_str.strip().split(":")
|
|
118
|
+
if len(parts) != 3:
|
|
119
|
+
raise ValueError(
|
|
120
|
+
f"ulimit must be in format name:softLimit:hardLimit, got: {ulimit_str}"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
name, soft_limit, hard_limit = parts
|
|
124
|
+
ulimits.append(
|
|
125
|
+
{
|
|
126
|
+
"name": name,
|
|
127
|
+
"softLimit": int(soft_limit) if soft_limit != "-1" else -1,
|
|
128
|
+
"hardLimit": int(hard_limit) if hard_limit != "-1" else -1,
|
|
129
|
+
}
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return ulimits
|
|
133
|
+
|
|
134
|
+
|
|
102
135
|
if TYPE_CHECKING:
|
|
103
136
|
from docker import DockerClient
|
|
104
137
|
|
|
@@ -177,7 +210,8 @@ def _role_to_node_properties(
|
|
|
177
210
|
privileged: bool = False,
|
|
178
211
|
job_role_arn: Optional[str] = None,
|
|
179
212
|
execution_role_arn: Optional[str] = None,
|
|
180
|
-
|
|
213
|
+
ulimits: Optional[List[Dict[str, Any]]] = None,
|
|
214
|
+
) -> Dict[str, Any]:
|
|
181
215
|
role.mounts += get_device_mounts(role.resource.devices)
|
|
182
216
|
|
|
183
217
|
mount_points = []
|
|
@@ -239,6 +273,7 @@ def _role_to_node_properties(
|
|
|
239
273
|
"environment": [{"name": k, "value": v} for k, v in role.env.items()],
|
|
240
274
|
"privileged": privileged,
|
|
241
275
|
"resourceRequirements": resource_requirements_from_resource(role.resource),
|
|
276
|
+
**({"ulimits": ulimits} if ulimits else {}),
|
|
242
277
|
"linuxParameters": {
|
|
243
278
|
# To support PyTorch dataloaders we need to set /dev/shm to larger
|
|
244
279
|
# than the 64M default.
|
|
@@ -361,6 +396,7 @@ class AWSBatchOpts(TypedDict, total=False):
|
|
|
361
396
|
priority: int
|
|
362
397
|
job_role_arn: Optional[str]
|
|
363
398
|
execution_role_arn: Optional[str]
|
|
399
|
+
ulimits: Optional[list[str]]
|
|
364
400
|
|
|
365
401
|
|
|
366
402
|
class AWSBatchScheduler(
|
|
@@ -506,6 +542,7 @@ class AWSBatchScheduler(
|
|
|
506
542
|
role = values.apply(role)
|
|
507
543
|
role.env[ENV_TORCHX_ROLE_IDX] = str(role_idx)
|
|
508
544
|
role.env[ENV_TORCHX_ROLE_NAME] = str(role.name)
|
|
545
|
+
role.env[ENV_TORCHX_IMAGE] = role.image
|
|
509
546
|
|
|
510
547
|
nodes.append(
|
|
511
548
|
_role_to_node_properties(
|
|
@@ -514,6 +551,7 @@ class AWSBatchScheduler(
|
|
|
514
551
|
privileged=cfg["privileged"],
|
|
515
552
|
job_role_arn=cfg.get("job_role_arn"),
|
|
516
553
|
execution_role_arn=cfg.get("execution_role_arn"),
|
|
554
|
+
ulimits=parse_ulimits(cfg.get("ulimits") or []),
|
|
517
555
|
)
|
|
518
556
|
)
|
|
519
557
|
node_idx += role.num_replicas
|
|
@@ -599,6 +637,11 @@ class AWSBatchScheduler(
|
|
|
599
637
|
type_=str,
|
|
600
638
|
help="The Amazon Resource Name (ARN) of the IAM role that the ECS agent can assume for AWS permissions.",
|
|
601
639
|
)
|
|
640
|
+
opts.add(
|
|
641
|
+
"ulimits",
|
|
642
|
+
type_=List[str],
|
|
643
|
+
help="Ulimit settings in format: name:softLimit:hardLimit (multiple separated by commas)",
|
|
644
|
+
)
|
|
602
645
|
return opts
|
|
603
646
|
|
|
604
647
|
def _get_job_id(self, app_id: str) -> Optional[str]:
|
|
@@ -84,6 +84,8 @@ LABEL_APP_ID: str = "torchx.pytorch.org/app-id"
|
|
|
84
84
|
LABEL_ROLE_NAME: str = "torchx.pytorch.org/role-name"
|
|
85
85
|
LABEL_REPLICA_ID: str = "torchx.pytorch.org/replica-id"
|
|
86
86
|
|
|
87
|
+
ENV_TORCHX_IMAGE: str = "TORCHX_IMAGE"
|
|
88
|
+
|
|
87
89
|
NETWORK = "torchx"
|
|
88
90
|
|
|
89
91
|
|
|
@@ -279,6 +281,7 @@ class DockerScheduler(
|
|
|
279
281
|
|
|
280
282
|
# configure distributed host envs
|
|
281
283
|
env["TORCHX_RANK0_HOST"] = rank0_name
|
|
284
|
+
env[ENV_TORCHX_IMAGE] = replica_role.image
|
|
282
285
|
|
|
283
286
|
c = DockerContainer(
|
|
284
287
|
image=replica_role.image,
|
|
@@ -399,6 +399,7 @@ def app_to_resource(
|
|
|
399
399
|
replica_role = values.apply(role)
|
|
400
400
|
if role_idx == 0 and replica_id == 0:
|
|
401
401
|
replica_role.env["TORCHX_RANK0_HOST"] = "localhost"
|
|
402
|
+
replica_role.env["TORCHX_IMAGE"] = replica_role.image
|
|
402
403
|
|
|
403
404
|
pod = role_to_pod(name, replica_role, service_account)
|
|
404
405
|
pod.metadata.labels.update(
|
|
@@ -485,7 +486,7 @@ class KubernetesScheduler(
|
|
|
485
486
|
For installation instructions see: https://github.com/volcano-sh/volcano
|
|
486
487
|
|
|
487
488
|
This has been confirmed to work with Volcano v1.3.0 and Kubernetes versions
|
|
488
|
-
v1.18-1.21. See https://github.com/pytorch/torchx/issues/120 which is
|
|
489
|
+
v1.18-1.21. See https://github.com/meta-pytorch/torchx/issues/120 which is
|
|
489
490
|
tracking Volcano support for Kubernetes v1.22.
|
|
490
491
|
|
|
491
492
|
.. note::
|
|
@@ -73,6 +73,15 @@ def appstate_from_slurm_state(slurm_state: str) -> AppState:
|
|
|
73
73
|
return SLURM_STATES.get(slurm_state, AppState.UNKNOWN)
|
|
74
74
|
|
|
75
75
|
|
|
76
|
+
def get_appstate_from_job(job: dict[str, object]) -> AppState:
|
|
77
|
+
# Prior to slurm-23.11, job_state was a string and not a list
|
|
78
|
+
job_state = job.get("job_state", None)
|
|
79
|
+
if isinstance(job_state, list):
|
|
80
|
+
return appstate_from_slurm_state(job_state[0])
|
|
81
|
+
else:
|
|
82
|
+
return appstate_from_slurm_state(str(job_state))
|
|
83
|
+
|
|
84
|
+
|
|
76
85
|
def version() -> Tuple[int, int]:
|
|
77
86
|
"""
|
|
78
87
|
Uses ``sinfo --version`` to get the slurm version. If the command fails, it
|
|
@@ -666,7 +675,7 @@ class SlurmScheduler(
|
|
|
666
675
|
|
|
667
676
|
entrypoint = job["command"]
|
|
668
677
|
image = job["current_working_directory"]
|
|
669
|
-
state =
|
|
678
|
+
state = get_appstate_from_job(job)
|
|
670
679
|
|
|
671
680
|
job_resources = job["job_resources"]
|
|
672
681
|
|
|
@@ -881,7 +890,7 @@ class SlurmScheduler(
|
|
|
881
890
|
out.append(
|
|
882
891
|
ListAppResponse(
|
|
883
892
|
app_id=str(job["job_id"]),
|
|
884
|
-
state=
|
|
893
|
+
state=get_appstate_from_job(job),
|
|
885
894
|
name=job["name"],
|
|
886
895
|
)
|
|
887
896
|
)
|
torchx/specs/__init__.py
CHANGED
|
@@ -41,6 +41,7 @@ from torchx.specs.api import (
|
|
|
41
41
|
RoleStatus,
|
|
42
42
|
runopt,
|
|
43
43
|
runopts,
|
|
44
|
+
TORCHX_HOME,
|
|
44
45
|
UnknownAppException,
|
|
45
46
|
UnknownSchedulerException,
|
|
46
47
|
VolumeMount,
|
|
@@ -53,6 +54,7 @@ from torchx.util.modules import import_attr
|
|
|
53
54
|
|
|
54
55
|
GiB: int = 1024
|
|
55
56
|
|
|
57
|
+
|
|
56
58
|
ResourceFactory = Callable[[], Resource]
|
|
57
59
|
|
|
58
60
|
AWS_NAMED_RESOURCES: Mapping[str, ResourceFactory] = import_attr(
|
|
@@ -127,7 +129,7 @@ def resource(
|
|
|
127
129
|
|
|
128
130
|
If ``h`` is specified then it is used to look up the
|
|
129
131
|
resource specs from the list of registered named resources.
|
|
130
|
-
See `registering named resource <https://pytorch.org/torchx/latest/advanced.html#registering-named-resources>`_.
|
|
132
|
+
See `registering named resource <https://meta-pytorch.org/torchx/latest/advanced.html#registering-named-resources>`_.
|
|
131
133
|
|
|
132
134
|
Otherwise a ``Resource`` object is created from the raw resource specs.
|
|
133
135
|
|
torchx/specs/api.py
CHANGED
|
@@ -11,6 +11,8 @@ import copy
|
|
|
11
11
|
import inspect
|
|
12
12
|
import json
|
|
13
13
|
import logging as logger
|
|
14
|
+
import os
|
|
15
|
+
import pathlib
|
|
14
16
|
import re
|
|
15
17
|
import typing
|
|
16
18
|
from dataclasses import asdict, dataclass, field
|
|
@@ -66,6 +68,32 @@ YELLOW_BOLD = "\033[1;33m"
|
|
|
66
68
|
RESET = "\033[0m"
|
|
67
69
|
|
|
68
70
|
|
|
71
|
+
def TORCHX_HOME(*subdir_paths: str) -> pathlib.Path:
|
|
72
|
+
"""
|
|
73
|
+
Path to the "dot-directory" for torchx.
|
|
74
|
+
Defaults to `~/.torchx` and is overridable via the `TORCHX_HOME` environment variable.
|
|
75
|
+
|
|
76
|
+
Usage:
|
|
77
|
+
|
|
78
|
+
.. doc-test::
|
|
79
|
+
|
|
80
|
+
from pathlib import Path
|
|
81
|
+
from torchx.specs import TORCHX_HOME
|
|
82
|
+
|
|
83
|
+
assert TORCHX_HOME() == Path.home() / ".torchx"
|
|
84
|
+
assert TORCHX_HOME("conda-pack-out") == Path.home() / ".torchx" / "conda-pack-out"
|
|
85
|
+
```
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
default_dir = str(pathlib.Path.home() / ".torchx")
|
|
89
|
+
torchx_home = pathlib.Path(os.getenv("TORCHX_HOME", default_dir))
|
|
90
|
+
|
|
91
|
+
torchx_home = torchx_home / os.path.sep.join(subdir_paths)
|
|
92
|
+
torchx_home.mkdir(parents=True, exist_ok=True)
|
|
93
|
+
|
|
94
|
+
return torchx_home
|
|
95
|
+
|
|
96
|
+
|
|
69
97
|
# ========================================
|
|
70
98
|
# ==== Distributed AppDef API =======
|
|
71
99
|
# ========================================
|
|
@@ -826,7 +854,7 @@ class runopt:
|
|
|
826
854
|
|
|
827
855
|
NOTE: dict parsing uses ":" as the kv separator (rather than the standard "=") because "=" is used
|
|
828
856
|
at the top-level cfg to parse runopts (notice the plural) from the CLI. Originally torchx only supported
|
|
829
|
-
primitives and list[str] as CfgVal but dict[str,str] was added in https://github.com/pytorch/torchx/pull/855
|
|
857
|
+
primitives and list[str] as CfgVal but dict[str,str] was added in https://github.com/meta-pytorch/torchx/pull/855
|
|
830
858
|
"""
|
|
831
859
|
|
|
832
860
|
if self.opt_type is None:
|
torchx/specs/file_linter.py
CHANGED
|
@@ -75,7 +75,7 @@ def get_fn_docstring(fn: Callable[..., object]) -> Tuple[str, Dict[str, str]]:
|
|
|
75
75
|
if the description
|
|
76
76
|
"""
|
|
77
77
|
default_fn_desc = f"""{fn.__name__} TIP: improve this help string by adding a docstring
|
|
78
|
-
to your component (see: https://pytorch.org/torchx/latest/component_best_practices.html)"""
|
|
78
|
+
to your component (see: https://meta-pytorch.org/torchx/latest/component_best_practices.html)"""
|
|
79
79
|
args_description = _get_default_arguments_descriptions(fn)
|
|
80
80
|
func_description = inspect.getdoc(fn)
|
|
81
81
|
if not func_description:
|
torchx/specs/finder.py
CHANGED
|
@@ -452,7 +452,7 @@ def get_component(
|
|
|
452
452
|
raise ComponentNotFoundException(
|
|
453
453
|
f"Component `{name}` not found. Please make sure it is one of the "
|
|
454
454
|
"builtins: `torchx builtins`. Or registered via `[torchx.components]` "
|
|
455
|
-
"entry point (see: https://pytorch.org/torchx/latest/configure.html)"
|
|
455
|
+
"entry point (see: https://meta-pytorch.org/torchx/latest/configure.html)"
|
|
456
456
|
)
|
|
457
457
|
|
|
458
458
|
component = components[name]
|
|
@@ -16,7 +16,7 @@ the equvalent resource in mem, cpu and gpu numbers.
|
|
|
16
16
|
|
|
17
17
|
.. note::
|
|
18
18
|
These resource definitions may change in future. It is expected for each user to
|
|
19
|
-
manage their own resources. Follow https://pytorch.org/torchx/latest/specs.html#torchx.specs.get_named_resources
|
|
19
|
+
manage their own resources. Follow https://meta-pytorch.org/torchx/latest/specs.html#torchx.specs.get_named_resources
|
|
20
20
|
to set up named resources.
|
|
21
21
|
|
|
22
22
|
Usage:
|
|
@@ -47,7 +47,7 @@ NEURON_DEVICE = "aws.amazon.com/neurondevice"
|
|
|
47
47
|
MEM_TAX = 0.96
|
|
48
48
|
|
|
49
49
|
# determines instance type for non-honogeneous CEs
|
|
50
|
-
# see https://github.com/pytorch/torchx/issues/780
|
|
50
|
+
# see https://github.com/meta-pytorch/torchx/issues/780
|
|
51
51
|
K8S_ITYPE = "node.kubernetes.io/instance-type"
|
|
52
52
|
GiB: int = int(1024 * MEM_TAX)
|
|
53
53
|
|
torchx/tracker/__init__.py
CHANGED
|
@@ -32,7 +32,7 @@ implementation.
|
|
|
32
32
|
|
|
33
33
|
Example usage
|
|
34
34
|
-------------
|
|
35
|
-
Sample `code <https://github.com/pytorch/torchx/blob/main/torchx/examples/apps/tracker/main.py>`__ using tracker API.
|
|
35
|
+
Sample `code <https://github.com/meta-pytorch/torchx/blob/main/torchx/examples/apps/tracker/main.py>`__ using tracker API.
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
Tracker Setup
|
|
@@ -111,7 +111,7 @@ Use :py:meth:`~torchx.tracker.app_run_from_env`:
|
|
|
111
111
|
Reference :py:class:`~torchx.tracker.api.TrackerBase` implementation
|
|
112
112
|
--------------------------------------------------------------------
|
|
113
113
|
:py:class:`~torchx.tracker.backend.fsspec.FsspecTracker` provides reference implementation of a tracker backend.
|
|
114
|
-
GitHub example `directory <https://github.com/pytorch/torchx/blob/main/torchx/examples/apps/tracker/>`__ provides example on how to
|
|
114
|
+
GitHub example `directory <https://github.com/meta-pytorch/torchx/blob/main/torchx/examples/apps/tracker/>`__ provides example on how to
|
|
115
115
|
configure and use it in user application.
|
|
116
116
|
|
|
117
117
|
|
torchx/tracker/api.py
CHANGED
|
@@ -191,7 +191,7 @@ def build_trackers(
|
|
|
191
191
|
factory = entrypoint_factories.get(factory_name) or load_module(factory_name)
|
|
192
192
|
if not factory:
|
|
193
193
|
logger.warning(
|
|
194
|
-
f"No tracker factory `{factory_name}` found in entry_points or modules. See https://pytorch.org/torchx/main/tracker.html#module-torchx.tracker"
|
|
194
|
+
f"No tracker factory `{factory_name}` found in entry_points or modules. See https://meta-pytorch.org/torchx/main/tracker.html#module-torchx.tracker"
|
|
195
195
|
)
|
|
196
196
|
continue
|
|
197
197
|
if config:
|
torchx/workspace/api.py
CHANGED
|
@@ -275,7 +275,6 @@ def walk_workspace(
|
|
|
275
275
|
walk_workspace walks the filesystem path and applies the ignore rules
|
|
276
276
|
specified via ``ignore_name``.
|
|
277
277
|
This follows the rules for ``.dockerignore``.
|
|
278
|
-
https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
|
279
278
|
"""
|
|
280
279
|
ignore_patterns = []
|
|
281
280
|
ignore_path = posixpath.join(path, ignore_name)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: torchx-nightly
|
|
3
|
-
Version: 2025.
|
|
3
|
+
Version: 2025.10.2
|
|
4
4
|
Summary: TorchX SDK and Components
|
|
5
|
-
Home-page: https://github.com/pytorch/torchx
|
|
5
|
+
Home-page: https://github.com/meta-pytorch/torchx
|
|
6
6
|
Author: TorchX Devs
|
|
7
7
|
Author-email: torchx@fb.com
|
|
8
8
|
License: BSD-3
|
|
@@ -61,9 +61,9 @@ Provides-Extra: kubernetes
|
|
|
61
61
|
Requires-Dist: kubernetes>=11; extra == "kubernetes"
|
|
62
62
|
|
|
63
63
|
[](https://pypi.org/project/torchx/)
|
|
64
|
-
[](https://github.com/pytorch/torchx/blob/main/LICENSE)
|
|
65
|
-

|
|
66
|
-

|
|
64
|
+
[](https://github.com/meta-pytorch/torchx/blob/main/LICENSE)
|
|
65
|
+

|
|
66
|
+

|
|
67
67
|
[](https://codecov.io/gh/pytorch/torchx)
|
|
68
68
|
|
|
69
69
|
|
|
@@ -82,16 +82,16 @@ TorchX currently supports:
|
|
|
82
82
|
* Docker
|
|
83
83
|
* Local
|
|
84
84
|
|
|
85
|
-
Need a scheduler not listed? [Let us know!](https://github.com/pytorch/torchx/issues?q=is%3Aopen+is%3Aissue+label%3Ascheduler-request)
|
|
85
|
+
Need a scheduler not listed? [Let us know!](https://github.com/meta-pytorch/torchx/issues?q=is%3Aopen+is%3Aissue+label%3Ascheduler-request)
|
|
86
86
|
|
|
87
87
|
## Quickstart
|
|
88
88
|
|
|
89
|
-
See the [quickstart guide](https://pytorch.org/torchx/latest/quickstart.html).
|
|
89
|
+
See the [quickstart guide](https://meta-pytorch.org/torchx/latest/quickstart.html).
|
|
90
90
|
|
|
91
91
|
## Documentation
|
|
92
92
|
|
|
93
|
-
* [Stable Documentation](https://pytorch.org/torchx/latest/)
|
|
94
|
-
* [Nightly Documentation](https://pytorch.org/torchx/main/)
|
|
93
|
+
* [Stable Documentation](https://meta-pytorch.org/torchx/latest/)
|
|
94
|
+
* [Nightly Documentation](https://meta-pytorch.org/torchx/main/)
|
|
95
95
|
|
|
96
96
|
## Requirements
|
|
97
97
|
|
|
@@ -133,22 +133,22 @@ pip install torchx-nightly[dev]
|
|
|
133
133
|
|
|
134
134
|
```bash
|
|
135
135
|
# install torchx sdk and CLI from source
|
|
136
|
-
$ pip install -e git+https://github.com/pytorch/torchx.git#egg=torchx
|
|
136
|
+
$ pip install -e git+https://github.com/meta-pytorch/torchx.git#egg=torchx
|
|
137
137
|
|
|
138
138
|
# install extra dependencies
|
|
139
|
-
$ pip install -e git+https://github.com/pytorch/torchx.git#egg=torchx[dev]
|
|
139
|
+
$ pip install -e git+https://github.com/meta-pytorch/torchx.git#egg=torchx[dev]
|
|
140
140
|
```
|
|
141
141
|
|
|
142
142
|
### Docker
|
|
143
143
|
|
|
144
144
|
TorchX provides a docker container for using as as part of a TorchX role.
|
|
145
145
|
|
|
146
|
-
See: https://github.com/pytorch/torchx/pkgs/container/torchx
|
|
146
|
+
See: https://github.com/meta-pytorch/torchx/pkgs/container/torchx
|
|
147
147
|
|
|
148
148
|
## Contributing
|
|
149
149
|
|
|
150
|
-
We welcome PRs! See the [CONTRIBUTING](https://github.com/pytorch/torchx/blob/main/CONTRIBUTING.md) file.
|
|
150
|
+
We welcome PRs! See the [CONTRIBUTING](https://github.com/meta-pytorch/torchx/blob/main/CONTRIBUTING.md) file.
|
|
151
151
|
|
|
152
152
|
## License
|
|
153
153
|
|
|
154
|
-
TorchX is BSD licensed, as found in the [LICENSE](https://github.com/pytorch/torchx/blob/main/LICENSE) file.
|
|
154
|
+
TorchX is BSD licensed, as found in the [LICENSE](https://github.com/meta-pytorch/torchx/blob/main/LICENSE) file.
|
|
@@ -16,25 +16,25 @@ torchx/cli/cmd_configure.py,sha256=1kTv0qbsbV44So74plAySwWu56pQrqjhfW_kbfdC3Rw,1
|
|
|
16
16
|
torchx/cli/cmd_describe.py,sha256=E5disbHoKTsqYKp2s3DaFW9GDLCCOgdOc3pQoHKoyCs,1283
|
|
17
17
|
torchx/cli/cmd_list.py,sha256=alkS9aIaDI8lX3W8uj8Vtr3IU3G2VeCuokKSd3zOFug,1409
|
|
18
18
|
torchx/cli/cmd_log.py,sha256=v-EZYUDOcG95rEgTnrsmPJMUyxM9Mk8YFAJtUxtgViE,5475
|
|
19
|
-
torchx/cli/cmd_run.py,sha256=
|
|
19
|
+
torchx/cli/cmd_run.py,sha256=Rhs7kQc7qI3lY3479D3EKR2gca29jsKbZbJHW8SI_DA,18581
|
|
20
20
|
torchx/cli/cmd_runopts.py,sha256=NWZiP8XpQjfTDJgays2c6MgL_8wxFoeDge6NstaZdKk,1302
|
|
21
21
|
torchx/cli/cmd_status.py,sha256=22IAEmKs0qkG6kJi83u9dRX2Q-ntT7yehVx7FxtY-vQ,2114
|
|
22
|
-
torchx/cli/cmd_tracker.py,sha256=
|
|
22
|
+
torchx/cli/cmd_tracker.py,sha256=9gmOmYi-89qQRGQfSrXCTto7ve54_JKFqs_wa7oRUA8,5223
|
|
23
23
|
torchx/cli/colors.py,sha256=yLMes7e_UoLAfhxE0W6edhc58t83UHAlnCN2ANPeuXw,568
|
|
24
24
|
torchx/cli/main.py,sha256=1Jf2cnO6Y2W69Adt88avmNPVrL6ZR4Hkff6GVB4293k,3484
|
|
25
|
-
torchx/components/__init__.py,sha256=
|
|
25
|
+
torchx/components/__init__.py,sha256=JaVte0j9Gqi6IrjZKudJ2Kr3gkdHsvlCdRTo-zYpSRo,11815
|
|
26
26
|
torchx/components/component_test_base.py,sha256=22iNSdVa_qTW3SMM30Pw5UEWlK4DZVw0C03EqYiaLOI,4150
|
|
27
|
-
torchx/components/dist.py,sha256=
|
|
27
|
+
torchx/components/dist.py,sha256=xmWf7nNuadcwPkbNHowd2bgiaPyZ9QDHv_5tSov11N0,14593
|
|
28
28
|
torchx/components/interpret.py,sha256=g8gkKdDJvsBfX1ZrpVT7n2bMEtmwRV_1AqDyAnnQ_aA,697
|
|
29
29
|
torchx/components/metrics.py,sha256=1gbp8BfzZWGa7PD1db5vRADlONzmae4qSBUUdCWayr0,2814
|
|
30
30
|
torchx/components/serve.py,sha256=uxIC5gU2ecg0EJIPX_oEPzNNOXRAre4j2eXusrgwGAI,2156
|
|
31
31
|
torchx/components/structured_arg.py,sha256=8jMcd0rtUmzCKEQKJ_JYzxSkMMK9q0fYjkwAs6wo78E,9595
|
|
32
32
|
torchx/components/train.py,sha256=vtrQXRcD7bIcbb3lSeyD9BBlIe1mv1WNW6rnLK9R0Mw,1259
|
|
33
|
-
torchx/components/utils.py,sha256=
|
|
33
|
+
torchx/components/utils.py,sha256=IMjihhgs7nO67YtTetUBjN_CRpyIyyQsaJBkp7mpHfk,9368
|
|
34
34
|
torchx/components/integration_tests/__init__.py,sha256=Md3cCHD7Ano9kV15PqGbicgUO-RMdh4aVy1yKiDt_xE,208
|
|
35
35
|
torchx/components/integration_tests/component_provider.py,sha256=g-4ig1vtd5Vzgug0VAKRAFUt6KAV3TgQrBCrwRSJ7ZY,3981
|
|
36
36
|
torchx/components/integration_tests/integ_tests.py,sha256=O8jd8Jq5O0mns7xzIFsHexBDHkIIAIfELQkWCzNPzRw,5165
|
|
37
|
-
torchx/distributed/__init__.py,sha256=
|
|
37
|
+
torchx/distributed/__init__.py,sha256=kh9YzDwWX7zFJJ8StR9qhMM2V3-66INs9i3ztDF-1ho,10252
|
|
38
38
|
torchx/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
39
|
torchx/examples/torchx_out_of_sync_training.py,sha256=sXiI1G8aGsfuvxRdBszDgM8pSplqhgfXjRnAcgRwNGM,397
|
|
40
40
|
torchx/examples/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -58,23 +58,23 @@ torchx/runtime/tracking/__init__.py,sha256=dYnAPnrXYREfPXkpHhdOFkcYIODWEbA13PdD-
|
|
|
58
58
|
torchx/runtime/tracking/api.py,sha256=SmUQyUKZqG3KlAhT7CJOGqRz1O274E4m63wQeOVq3CU,5472
|
|
59
59
|
torchx/schedulers/__init__.py,sha256=_Wx6-X3FNh8RJR82UGgUwKg7V_VQYsAkrveDoSSk2xU,2195
|
|
60
60
|
torchx/schedulers/api.py,sha256=lfxNhrEO6eYYqVuQzzj9sTXrZShuZkyYxJ1jPE-Lvpo,14561
|
|
61
|
-
torchx/schedulers/aws_batch_scheduler.py,sha256
|
|
61
|
+
torchx/schedulers/aws_batch_scheduler.py,sha256=-HpjNVhSFBDxZo3cebK-3YEguB49dxoaud2gz30cAVM,29437
|
|
62
62
|
torchx/schedulers/aws_sagemaker_scheduler.py,sha256=flN8GumKE2Dz4X_foAt6Jnvt-ZVojWs6pcyrHwB0hz0,20921
|
|
63
63
|
torchx/schedulers/devices.py,sha256=RjVcu22ZRl_9OKtOtmA1A3vNXgu2qD6A9ST0L0Hsg4I,1734
|
|
64
|
-
torchx/schedulers/docker_scheduler.py,sha256=
|
|
64
|
+
torchx/schedulers/docker_scheduler.py,sha256=x-XHCqYnrmiW0dHfVA7hz7Fp2Qgw7fvMgRm058YOngY,16880
|
|
65
65
|
torchx/schedulers/ids.py,sha256=3E-_vwVYC-8Tv8kjuY9-W7TbOe_-Laqd8a65uIN3hQY,1798
|
|
66
66
|
torchx/schedulers/kubernetes_mcad_scheduler.py,sha256=1tuzq3OutCMdSPqg_dNmCHt_wyuSFKG0-ywLc3qITJo,42949
|
|
67
|
-
torchx/schedulers/kubernetes_scheduler.py,sha256=
|
|
67
|
+
torchx/schedulers/kubernetes_scheduler.py,sha256=Wb6XDzwcvp3-NqBhKrjtgDC4L6GVOmcyP6fuoPFByBE,28288
|
|
68
68
|
torchx/schedulers/local_scheduler.py,sha256=ttnxFDy48_DSYDEW-no27OirFZOyfrjwJ2S1MwBUi74,41929
|
|
69
69
|
torchx/schedulers/lsf_scheduler.py,sha256=YS6Yel8tXJqLPxbcGz95lZG2nCi36AQXdNDyuBJePKg,17661
|
|
70
|
-
torchx/schedulers/slurm_scheduler.py,sha256=
|
|
70
|
+
torchx/schedulers/slurm_scheduler.py,sha256=vypGaCZe61bkyNkqRlK4Iwmk_NaAUQi-DsspaWd6BZw,31873
|
|
71
71
|
torchx/schedulers/streams.py,sha256=8_SLezgnWgfv_zXUsJCUM34-h2dtv25NmZuxEwkzmxw,2007
|
|
72
|
-
torchx/specs/__init__.py,sha256=
|
|
73
|
-
torchx/specs/api.py,sha256=
|
|
72
|
+
torchx/specs/__init__.py,sha256=RNjj4cV64AXP-2XowHLJJpzub1zYuyS17-2SU-dCcN0,6632
|
|
73
|
+
torchx/specs/api.py,sha256=ZJEqBnEFG2jMMfQuIrBFHiX-Thr_wz2mAMiYeGf-fWo,42311
|
|
74
74
|
torchx/specs/builders.py,sha256=Ye3of4MupJ-da8vLaX6_-nzGo_FRw1BFpYsX6dAZCNk,13730
|
|
75
|
-
torchx/specs/file_linter.py,sha256=
|
|
76
|
-
torchx/specs/finder.py,sha256=
|
|
77
|
-
torchx/specs/named_resources_aws.py,sha256=
|
|
75
|
+
torchx/specs/file_linter.py,sha256=z0c4mKJv47BWiPaWCdUM0A8kHwnj4b1s7oTmESuD9Tc,14407
|
|
76
|
+
torchx/specs/finder.py,sha256=gWQNEFrLYqrZoI0gMMhQ70YAC4sxqS0ZFpoWAmcVi44,17438
|
|
77
|
+
torchx/specs/named_resources_aws.py,sha256=28B1e_sSJbqmzApBMMJvOUKaIFPm1jj04-Nqt0Z30Xw,11425
|
|
78
78
|
torchx/specs/named_resources_generic.py,sha256=Sg4tAdqiiWDrDz2Lj_pnfsjzGIXKTou73wPseh6j55w,2646
|
|
79
79
|
torchx/specs/test/components/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
|
80
80
|
torchx/specs/test/components/a/__init__.py,sha256=kdxEgnI8QBSBiuTjaB4qDD7JX84hWowyPWU4B2Cqe9A,561
|
|
@@ -82,8 +82,8 @@ torchx/specs/test/components/a/b/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxT
|
|
|
82
82
|
torchx/specs/test/components/a/b/c.py,sha256=FhixafzNqpS5zvggtWIWLxRd6HIxsOmct-d1Hs-rDoc,554
|
|
83
83
|
torchx/specs/test/components/c/__init__.py,sha256=5CBMckkpqJUdxBQBYHGSsItqq1gj2V0UiCw02Qfq6MM,246
|
|
84
84
|
torchx/specs/test/components/c/d.py,sha256=2AjE-FmQXJTw3hws66O83ToQPmjOEZLDf-jDAKrrUkQ,546
|
|
85
|
-
torchx/tracker/__init__.py,sha256=
|
|
86
|
-
torchx/tracker/api.py,sha256=
|
|
85
|
+
torchx/tracker/__init__.py,sha256=qo39aOa0Dz9zt4TtFkqPeIaH7MNqdAkFlGaOFiDLXTI,4375
|
|
86
|
+
torchx/tracker/api.py,sha256=WZ7TYdbSVx_5h5MlX9EwQLRpxmIf0oKdiQwQ0zvkO3o,11262
|
|
87
87
|
torchx/tracker/mlflow.py,sha256=poeoIXVPzr2sxgi515fMGRH83KAFNL6XFILMh0EQ2Dw,14487
|
|
88
88
|
torchx/tracker/backend/__init__.py,sha256=fE0IHi1JJpxsNVBNzWNee2thrNXFFRhY94c80RxNSIE,231
|
|
89
89
|
torchx/tracker/backend/fsspec.py,sha256=528xKryBE27Rm_OHD7r2R6fmVAclknBtoy1s034Ny6c,10440
|
|
@@ -99,12 +99,12 @@ torchx/util/shlex.py,sha256=eXEKu8KC3zIcd8tEy9_s8Ds5oma8BORr-0VGWNpG2dk,463
|
|
|
99
99
|
torchx/util/strings.py,sha256=7Ef1loz2IYMrzeJ6Lewywi5cBIc3X3g7lSPbT1Tn_z4,664
|
|
100
100
|
torchx/util/types.py,sha256=E9dxAWQnsJkIDuHtg-poeOJ4etucSI_xP_Z5kNJX8uI,9229
|
|
101
101
|
torchx/workspace/__init__.py,sha256=cZsKVvUWwDYcGhe6SCXQGBQfbk_yTnKEImOkI6xmu30,809
|
|
102
|
-
torchx/workspace/api.py,sha256=
|
|
102
|
+
torchx/workspace/api.py,sha256=MGBQauBoH7wZdvXHXOx7JqefCF41rK0AHWF68IUwr4k,11276
|
|
103
103
|
torchx/workspace/dir_workspace.py,sha256=npNW_IjUZm_yS5r-8hrRkH46ndDd9a_eApT64m1S1T4,2268
|
|
104
104
|
torchx/workspace/docker_workspace.py,sha256=PFu2KQNVC-0p2aKJ-W_BKA9ZOmXdCY2ABEkCExp3udQ,10269
|
|
105
|
-
torchx_nightly-2025.
|
|
106
|
-
torchx_nightly-2025.
|
|
107
|
-
torchx_nightly-2025.
|
|
108
|
-
torchx_nightly-2025.
|
|
109
|
-
torchx_nightly-2025.
|
|
110
|
-
torchx_nightly-2025.
|
|
105
|
+
torchx_nightly-2025.10.2.dist-info/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
|
|
106
|
+
torchx_nightly-2025.10.2.dist-info/METADATA,sha256=X5eR-tfdt5wWYtmM_S-i7NgJm49DxolTkcP4dbs7KfY,5068
|
|
107
|
+
torchx_nightly-2025.10.2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
108
|
+
torchx_nightly-2025.10.2.dist-info/entry_points.txt,sha256=T328AMXeKI3JZnnxfkEew2ZcMN1oQDtkXjMz7lkV-P4,169
|
|
109
|
+
torchx_nightly-2025.10.2.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
|
|
110
|
+
torchx_nightly-2025.10.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|