torchx-nightly 2025.9.28__py3-none-any.whl → 2025.11.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchx-nightly might be problematic. Click here for more details.
- torchx/_version.py +8 -0
- torchx/cli/cmd_run.py +10 -5
- torchx/cli/cmd_tracker.py +1 -1
- torchx/components/__init__.py +1 -1
- torchx/components/dist.py +9 -3
- torchx/components/utils.py +1 -1
- torchx/distributed/__init__.py +1 -1
- torchx/runner/api.py +30 -22
- torchx/runner/config.py +2 -0
- torchx/schedulers/__init__.py +8 -9
- torchx/schedulers/api.py +9 -4
- torchx/schedulers/aws_batch_scheduler.py +44 -1
- torchx/schedulers/docker_scheduler.py +3 -0
- torchx/schedulers/kubernetes_scheduler.py +200 -17
- torchx/schedulers/slurm_scheduler.py +11 -2
- torchx/specs/__init__.py +30 -7
- torchx/specs/api.py +215 -10
- torchx/specs/file_linter.py +1 -1
- torchx/specs/finder.py +1 -1
- torchx/specs/named_resources_aws.py +13 -2
- torchx/tracker/__init__.py +2 -2
- torchx/tracker/api.py +1 -1
- torchx/util/entrypoints.py +1 -6
- torchx/version.py +2 -2
- torchx/workspace/__init__.py +1 -1
- torchx/workspace/api.py +65 -110
- {torchx_nightly-2025.9.28.dist-info → torchx_nightly-2025.11.17.dist-info}/METADATA +34 -21
- {torchx_nightly-2025.9.28.dist-info → torchx_nightly-2025.11.17.dist-info}/RECORD +32 -31
- {torchx_nightly-2025.9.28.dist-info → torchx_nightly-2025.11.17.dist-info}/WHEEL +1 -1
- {torchx_nightly-2025.9.28.dist-info → torchx_nightly-2025.11.17.dist-info}/entry_points.txt +0 -0
- {torchx_nightly-2025.9.28.dist-info → torchx_nightly-2025.11.17.dist-info/licenses}/LICENSE +0 -0
- {torchx_nightly-2025.9.28.dist-info → torchx_nightly-2025.11.17.dist-info}/top_level.txt +0 -0
torchx/specs/api.py
CHANGED
|
@@ -11,11 +11,15 @@ import copy
|
|
|
11
11
|
import inspect
|
|
12
12
|
import json
|
|
13
13
|
import logging as logger
|
|
14
|
+
import os
|
|
15
|
+
import pathlib
|
|
14
16
|
import re
|
|
17
|
+
import shutil
|
|
15
18
|
import typing
|
|
19
|
+
import warnings
|
|
16
20
|
from dataclasses import asdict, dataclass, field
|
|
17
21
|
from datetime import datetime
|
|
18
|
-
from enum import Enum
|
|
22
|
+
from enum import Enum, IntEnum
|
|
19
23
|
from json import JSONDecodeError
|
|
20
24
|
from string import Template
|
|
21
25
|
from typing import (
|
|
@@ -66,6 +70,32 @@ YELLOW_BOLD = "\033[1;33m"
|
|
|
66
70
|
RESET = "\033[0m"
|
|
67
71
|
|
|
68
72
|
|
|
73
|
+
def TORCHX_HOME(*subdir_paths: str) -> pathlib.Path:
|
|
74
|
+
"""
|
|
75
|
+
Path to the "dot-directory" for torchx.
|
|
76
|
+
Defaults to `~/.torchx` and is overridable via the `TORCHX_HOME` environment variable.
|
|
77
|
+
|
|
78
|
+
Usage:
|
|
79
|
+
|
|
80
|
+
.. doc-test::
|
|
81
|
+
|
|
82
|
+
from pathlib import Path
|
|
83
|
+
from torchx.specs import TORCHX_HOME
|
|
84
|
+
|
|
85
|
+
assert TORCHX_HOME() == Path.home() / ".torchx"
|
|
86
|
+
assert TORCHX_HOME("conda-pack-out") == Path.home() / ".torchx" / "conda-pack-out"
|
|
87
|
+
```
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
default_dir = str(pathlib.Path.home() / ".torchx")
|
|
91
|
+
torchx_home = pathlib.Path(os.getenv("TORCHX_HOME", default_dir))
|
|
92
|
+
|
|
93
|
+
torchx_home = torchx_home / os.path.sep.join(subdir_paths)
|
|
94
|
+
torchx_home.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
|
|
96
|
+
return torchx_home
|
|
97
|
+
|
|
98
|
+
|
|
69
99
|
# ========================================
|
|
70
100
|
# ==== Distributed AppDef API =======
|
|
71
101
|
# ========================================
|
|
@@ -322,6 +352,121 @@ class DeviceMount:
|
|
|
322
352
|
permissions: str = "rwm"
|
|
323
353
|
|
|
324
354
|
|
|
355
|
+
@dataclass
|
|
356
|
+
class Workspace:
|
|
357
|
+
"""
|
|
358
|
+
Specifies a local "workspace" (a set of directories). Workspaces are ad-hoc built
|
|
359
|
+
into an (usually ephemeral) image. This effectively mirrors the local code changes
|
|
360
|
+
at job submission time.
|
|
361
|
+
|
|
362
|
+
For example:
|
|
363
|
+
|
|
364
|
+
1. ``projects={"~/github/torch": "torch"}`` copies ``~/github/torch/**`` into ``$REMOTE_WORKSPACE_ROOT/torch/**``
|
|
365
|
+
2. ``projects={"~/github/torch": ""}`` copies ``~/github/torch/**`` into ``$REMOTE_WORKSPACE_ROOT/**``
|
|
366
|
+
|
|
367
|
+
The exact location of ``$REMOTE_WORKSPACE_ROOT`` is implementation dependent and varies between
|
|
368
|
+
different implementations of :py:class:`~torchx.workspace.api.WorkspaceMixin`.
|
|
369
|
+
Check the scheduler documentation for details on which workspace it supports.
|
|
370
|
+
|
|
371
|
+
Note: ``projects`` maps the location of the local project to a sub-directory in the remote workspace root directory.
|
|
372
|
+
Typically the local project location is a directory path (e.g. ``/home/foo/github/torch``).
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
Attributes:
|
|
376
|
+
projects: mapping of local project to the sub-dir in the remote workspace dir.
|
|
377
|
+
"""
|
|
378
|
+
|
|
379
|
+
projects: dict[str, str]
|
|
380
|
+
|
|
381
|
+
def __bool__(self) -> bool:
|
|
382
|
+
"""False if no projects mapping. Lets us use workspace object in an if-statement"""
|
|
383
|
+
return bool(self.projects)
|
|
384
|
+
|
|
385
|
+
def __eq__(self, other: object) -> bool:
|
|
386
|
+
if not isinstance(other, Workspace):
|
|
387
|
+
return False
|
|
388
|
+
return self.projects == other.projects
|
|
389
|
+
|
|
390
|
+
def __hash__(self) -> int:
|
|
391
|
+
# makes it possible to use Workspace as the key in the workspace build cache
|
|
392
|
+
# see WorkspaceMixin.caching_build_workspace_and_update_role
|
|
393
|
+
return hash(frozenset(self.projects.items()))
|
|
394
|
+
|
|
395
|
+
def is_unmapped_single_project(self) -> bool:
|
|
396
|
+
"""
|
|
397
|
+
Returns ``True`` if this workspace only has 1 project
|
|
398
|
+
and its target mapping is an empty string.
|
|
399
|
+
"""
|
|
400
|
+
return len(self.projects) == 1 and not next(iter(self.projects.values()))
|
|
401
|
+
|
|
402
|
+
def merge_into(self, outdir: str | pathlib.Path) -> None:
|
|
403
|
+
"""
|
|
404
|
+
Copies each project dir of this workspace into the specified ``outdir``.
|
|
405
|
+
Each project dir is copied into ``{outdir}/{target}`` where ``target`` is
|
|
406
|
+
the target mapping of the project dir.
|
|
407
|
+
|
|
408
|
+
For example:
|
|
409
|
+
|
|
410
|
+
.. code-block:: python
|
|
411
|
+
from os.path import expanduser
|
|
412
|
+
|
|
413
|
+
workspace = Workspace(
|
|
414
|
+
projects={
|
|
415
|
+
expanduser("~/workspace/torch"): "torch",
|
|
416
|
+
expanduser("~/workspace/my_project": "")
|
|
417
|
+
}
|
|
418
|
+
)
|
|
419
|
+
workspace.merge_into(expanduser("~/tmp"))
|
|
420
|
+
|
|
421
|
+
Copies:
|
|
422
|
+
|
|
423
|
+
* ``~/workspace/torch/**`` into ``~/tmp/torch/**``
|
|
424
|
+
* ``~/workspace/my_project/**`` into ``~/tmp/**``
|
|
425
|
+
|
|
426
|
+
"""
|
|
427
|
+
|
|
428
|
+
for src, dst in self.projects.items():
|
|
429
|
+
dst_path = pathlib.Path(outdir) / dst
|
|
430
|
+
if pathlib.Path(src).is_file():
|
|
431
|
+
shutil.copy2(src, dst_path)
|
|
432
|
+
else: # src is dir
|
|
433
|
+
shutil.copytree(src, dst_path, dirs_exist_ok=True)
|
|
434
|
+
|
|
435
|
+
@staticmethod
|
|
436
|
+
def from_str(workspace: str | None) -> "Workspace":
|
|
437
|
+
import yaml
|
|
438
|
+
|
|
439
|
+
if not workspace:
|
|
440
|
+
return Workspace({})
|
|
441
|
+
|
|
442
|
+
projects = yaml.safe_load(workspace)
|
|
443
|
+
if isinstance(projects, str): # single project workspace
|
|
444
|
+
projects = {projects: ""}
|
|
445
|
+
else: # multi-project workspace
|
|
446
|
+
# Replace None mappings with "" (empty string)
|
|
447
|
+
projects = {k: ("" if v is None else v) for k, v in projects.items()}
|
|
448
|
+
|
|
449
|
+
return Workspace(projects)
|
|
450
|
+
|
|
451
|
+
def __str__(self) -> str:
|
|
452
|
+
"""
|
|
453
|
+
Returns a string representation of the Workspace by concatenating
|
|
454
|
+
the project mappings using ';' as a delimiter and ':' between key and value.
|
|
455
|
+
If the single-project workspace with no target mapping, then simply
|
|
456
|
+
returns the src (local project dir)
|
|
457
|
+
|
|
458
|
+
NOTE: meant to be used for logging purposes not serde.
|
|
459
|
+
Therefore not symmetric with :py:func:`Workspace.from_str`.
|
|
460
|
+
|
|
461
|
+
"""
|
|
462
|
+
if self.is_unmapped_single_project():
|
|
463
|
+
return next(iter(self.projects))
|
|
464
|
+
else:
|
|
465
|
+
return ";".join(
|
|
466
|
+
k if not v else f"{k}:{v}" for k, v in self.projects.items()
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
|
|
325
470
|
@dataclass
|
|
326
471
|
class Role:
|
|
327
472
|
"""
|
|
@@ -374,12 +519,15 @@ class Role:
|
|
|
374
519
|
metadata: Free form information that is associated with the role, for example
|
|
375
520
|
scheduler specific data. The key should follow the pattern: ``$scheduler.$key``
|
|
376
521
|
mounts: a list of mounts on the machine
|
|
522
|
+
workspace: local project directories to be mirrored on the remote job.
|
|
523
|
+
NOTE: The workspace argument provided to the :py:class:`~torchx.runner.api.Runner` APIs
|
|
524
|
+
only takes effect on ``appdef.role[0]`` and overrides this attribute.
|
|
525
|
+
|
|
377
526
|
"""
|
|
378
527
|
|
|
379
528
|
name: str
|
|
380
529
|
image: str
|
|
381
530
|
min_replicas: Optional[int] = None
|
|
382
|
-
base_image: Optional[str] = None # DEPRECATED DO NOT SET, WILL BE REMOVED SOON
|
|
383
531
|
entrypoint: str = MISSING
|
|
384
532
|
args: List[str] = field(default_factory=list)
|
|
385
533
|
env: Dict[str, str] = field(default_factory=dict)
|
|
@@ -389,9 +537,10 @@ class Role:
|
|
|
389
537
|
resource: Resource = field(default_factory=_null_resource)
|
|
390
538
|
port_map: Dict[str, int] = field(default_factory=dict)
|
|
391
539
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
392
|
-
mounts: List[
|
|
393
|
-
|
|
394
|
-
|
|
540
|
+
mounts: List[BindMount | VolumeMount | DeviceMount] = field(default_factory=list)
|
|
541
|
+
workspace: Workspace | None = None
|
|
542
|
+
|
|
543
|
+
# DEPRECATED DO NOT SET, WILL BE REMOVED SOON
|
|
395
544
|
overrides: Dict[str, Any] = field(default_factory=dict)
|
|
396
545
|
|
|
397
546
|
# pyre-ignore
|
|
@@ -791,6 +940,8 @@ class runopt:
|
|
|
791
940
|
opt_type: Type[CfgVal]
|
|
792
941
|
is_required: bool
|
|
793
942
|
help: str
|
|
943
|
+
aliases: list[str] | None = None
|
|
944
|
+
deprecated_aliases: list[str] | None = None
|
|
794
945
|
|
|
795
946
|
@property
|
|
796
947
|
def is_type_list_of_str(self) -> bool:
|
|
@@ -826,7 +977,7 @@ class runopt:
|
|
|
826
977
|
|
|
827
978
|
NOTE: dict parsing uses ":" as the kv separator (rather than the standard "=") because "=" is used
|
|
828
979
|
at the top-level cfg to parse runopts (notice the plural) from the CLI. Originally torchx only supported
|
|
829
|
-
primitives and list[str] as CfgVal but dict[str,str] was added in https://github.com/pytorch/torchx/pull/855
|
|
980
|
+
primitives and list[str] as CfgVal but dict[str,str] was added in https://github.com/meta-pytorch/torchx/pull/855
|
|
830
981
|
"""
|
|
831
982
|
|
|
832
983
|
if self.opt_type is None:
|
|
@@ -882,6 +1033,7 @@ class runopts:
|
|
|
882
1033
|
|
|
883
1034
|
def __init__(self) -> None:
|
|
884
1035
|
self._opts: Dict[str, runopt] = {}
|
|
1036
|
+
self._alias_to_key: dict[str, str] = {}
|
|
885
1037
|
|
|
886
1038
|
def __iter__(self) -> Iterator[Tuple[str, runopt]]:
|
|
887
1039
|
return self._opts.items().__iter__()
|
|
@@ -909,9 +1061,16 @@ class runopts:
|
|
|
909
1061
|
|
|
910
1062
|
def get(self, name: str) -> Optional[runopt]:
|
|
911
1063
|
"""
|
|
912
|
-
Returns option if any was registered, or None otherwise
|
|
1064
|
+
Returns option if any was registered, or None otherwise.
|
|
1065
|
+
First searches for the option by ``name``, then falls-back to matching ``name`` with any
|
|
1066
|
+
registered aliases.
|
|
1067
|
+
|
|
913
1068
|
"""
|
|
914
|
-
|
|
1069
|
+
if name in self._opts:
|
|
1070
|
+
return self._opts[name]
|
|
1071
|
+
if name in self._alias_to_key:
|
|
1072
|
+
return self._opts[self._alias_to_key[name]]
|
|
1073
|
+
return None
|
|
915
1074
|
|
|
916
1075
|
def resolve(self, cfg: Mapping[str, CfgVal]) -> Dict[str, CfgVal]:
|
|
917
1076
|
"""
|
|
@@ -926,6 +1085,36 @@ class runopts:
|
|
|
926
1085
|
|
|
927
1086
|
for cfg_key, runopt in self._opts.items():
|
|
928
1087
|
val = resolved_cfg.get(cfg_key)
|
|
1088
|
+
resolved_name = None
|
|
1089
|
+
aliases = runopt.aliases or []
|
|
1090
|
+
deprecated_aliases = runopt.deprecated_aliases or []
|
|
1091
|
+
if val is None:
|
|
1092
|
+
for alias in aliases:
|
|
1093
|
+
val = resolved_cfg.get(alias)
|
|
1094
|
+
if alias in cfg or val is not None:
|
|
1095
|
+
resolved_name = alias
|
|
1096
|
+
break
|
|
1097
|
+
for alias in deprecated_aliases:
|
|
1098
|
+
val = resolved_cfg.get(alias)
|
|
1099
|
+
if val is not None:
|
|
1100
|
+
resolved_name = alias
|
|
1101
|
+
use_instead = self._alias_to_key.get(alias)
|
|
1102
|
+
warnings.warn(
|
|
1103
|
+
f"Run option `{alias}` is deprecated, use `{use_instead}` instead",
|
|
1104
|
+
UserWarning,
|
|
1105
|
+
stacklevel=2,
|
|
1106
|
+
)
|
|
1107
|
+
break
|
|
1108
|
+
else:
|
|
1109
|
+
resolved_name = cfg_key
|
|
1110
|
+
for alias in aliases:
|
|
1111
|
+
duplicate_val = resolved_cfg.get(alias)
|
|
1112
|
+
if alias in cfg or duplicate_val is not None:
|
|
1113
|
+
raise InvalidRunConfigException(
|
|
1114
|
+
f"Duplicate opt name. runopt: `{resolved_name}``, is an alias of runopt: `{alias}`",
|
|
1115
|
+
resolved_name,
|
|
1116
|
+
cfg,
|
|
1117
|
+
)
|
|
929
1118
|
|
|
930
1119
|
# check required opt
|
|
931
1120
|
if runopt.is_required and val is None:
|
|
@@ -945,7 +1134,7 @@ class runopts:
|
|
|
945
1134
|
)
|
|
946
1135
|
|
|
947
1136
|
# not required and not set, set to default
|
|
948
|
-
if val is None:
|
|
1137
|
+
if val is None and resolved_name is None:
|
|
949
1138
|
resolved_cfg[cfg_key] = runopt.default
|
|
950
1139
|
return resolved_cfg
|
|
951
1140
|
|
|
@@ -1045,12 +1234,16 @@ class runopts:
|
|
|
1045
1234
|
help: str,
|
|
1046
1235
|
default: CfgVal = None,
|
|
1047
1236
|
required: bool = False,
|
|
1237
|
+
aliases: Optional[list[str]] = None,
|
|
1238
|
+
deprecated_aliases: Optional[list[str]] = None,
|
|
1048
1239
|
) -> None:
|
|
1049
1240
|
"""
|
|
1050
1241
|
Adds the ``config`` option with the given help string and ``default``
|
|
1051
1242
|
value (if any). If the ``default`` is not specified then this option
|
|
1052
1243
|
is a required option.
|
|
1053
1244
|
"""
|
|
1245
|
+
aliases = aliases or []
|
|
1246
|
+
deprecated_aliases = deprecated_aliases or []
|
|
1054
1247
|
if required and default is not None:
|
|
1055
1248
|
raise ValueError(
|
|
1056
1249
|
f"Required option: {cfg_key} must not specify default value. Given: {default}"
|
|
@@ -1062,7 +1255,19 @@ class runopts:
|
|
|
1062
1255
|
f" Given: {default} ({type(default).__name__})"
|
|
1063
1256
|
)
|
|
1064
1257
|
|
|
1065
|
-
|
|
1258
|
+
opt = runopt(
|
|
1259
|
+
default,
|
|
1260
|
+
type_,
|
|
1261
|
+
required,
|
|
1262
|
+
help,
|
|
1263
|
+
list(set(aliases)),
|
|
1264
|
+
list(set(deprecated_aliases)),
|
|
1265
|
+
)
|
|
1266
|
+
for alias in aliases:
|
|
1267
|
+
self._alias_to_key[alias] = cfg_key
|
|
1268
|
+
for deprecated_alias in deprecated_aliases:
|
|
1269
|
+
self._alias_to_key[deprecated_alias] = cfg_key
|
|
1270
|
+
self._opts[cfg_key] = opt
|
|
1066
1271
|
|
|
1067
1272
|
def update(self, other: "runopts") -> None:
|
|
1068
1273
|
self._opts.update(other._opts)
|
torchx/specs/file_linter.py
CHANGED
|
@@ -75,7 +75,7 @@ def get_fn_docstring(fn: Callable[..., object]) -> Tuple[str, Dict[str, str]]:
|
|
|
75
75
|
if the description
|
|
76
76
|
"""
|
|
77
77
|
default_fn_desc = f"""{fn.__name__} TIP: improve this help string by adding a docstring
|
|
78
|
-
to your component (see: https://pytorch.org/torchx/latest/component_best_practices.html)"""
|
|
78
|
+
to your component (see: https://meta-pytorch.org/torchx/latest/component_best_practices.html)"""
|
|
79
79
|
args_description = _get_default_arguments_descriptions(fn)
|
|
80
80
|
func_description = inspect.getdoc(fn)
|
|
81
81
|
if not func_description:
|
torchx/specs/finder.py
CHANGED
|
@@ -452,7 +452,7 @@ def get_component(
|
|
|
452
452
|
raise ComponentNotFoundException(
|
|
453
453
|
f"Component `{name}` not found. Please make sure it is one of the "
|
|
454
454
|
"builtins: `torchx builtins`. Or registered via `[torchx.components]` "
|
|
455
|
-
"entry point (see: https://pytorch.org/torchx/latest/configure.html)"
|
|
455
|
+
"entry point (see: https://meta-pytorch.org/torchx/latest/configure.html)"
|
|
456
456
|
)
|
|
457
457
|
|
|
458
458
|
component = components[name]
|
|
@@ -16,7 +16,7 @@ the equvalent resource in mem, cpu and gpu numbers.
|
|
|
16
16
|
|
|
17
17
|
.. note::
|
|
18
18
|
These resource definitions may change in future. It is expected for each user to
|
|
19
|
-
manage their own resources. Follow https://pytorch.org/torchx/latest/specs.html#torchx.specs.get_named_resources
|
|
19
|
+
manage their own resources. Follow https://meta-pytorch.org/torchx/latest/specs.html#torchx.specs.get_named_resources
|
|
20
20
|
to set up named resources.
|
|
21
21
|
|
|
22
22
|
Usage:
|
|
@@ -47,7 +47,7 @@ NEURON_DEVICE = "aws.amazon.com/neurondevice"
|
|
|
47
47
|
MEM_TAX = 0.96
|
|
48
48
|
|
|
49
49
|
# determines instance type for non-honogeneous CEs
|
|
50
|
-
# see https://github.com/pytorch/torchx/issues/780
|
|
50
|
+
# see https://github.com/meta-pytorch/torchx/issues/780
|
|
51
51
|
K8S_ITYPE = "node.kubernetes.io/instance-type"
|
|
52
52
|
GiB: int = int(1024 * MEM_TAX)
|
|
53
53
|
|
|
@@ -120,6 +120,16 @@ def aws_p5_48xlarge() -> Resource:
|
|
|
120
120
|
)
|
|
121
121
|
|
|
122
122
|
|
|
123
|
+
def aws_p5e_48xlarge() -> Resource:
|
|
124
|
+
return Resource(
|
|
125
|
+
cpu=192,
|
|
126
|
+
gpu=8,
|
|
127
|
+
memMB=2048 * GiB,
|
|
128
|
+
capabilities={K8S_ITYPE: "p5e.48xlarge"},
|
|
129
|
+
devices={EFA_DEVICE: 32},
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
123
133
|
def aws_p5en_48xlarge() -> Resource:
|
|
124
134
|
return Resource(
|
|
125
135
|
cpu=192,
|
|
@@ -419,6 +429,7 @@ NAMED_RESOURCES: Mapping[str, Callable[[], Resource]] = {
|
|
|
419
429
|
"aws_p4d.24xlarge": aws_p4d_24xlarge,
|
|
420
430
|
"aws_p4de.24xlarge": aws_p4de_24xlarge,
|
|
421
431
|
"aws_p5.48xlarge": aws_p5_48xlarge,
|
|
432
|
+
"aws_p5e.48xlarge": aws_p5e_48xlarge,
|
|
422
433
|
"aws_p5en.48xlarge": aws_p5en_48xlarge,
|
|
423
434
|
"aws_g4dn.xlarge": aws_g4dn_xlarge,
|
|
424
435
|
"aws_g4dn.2xlarge": aws_g4dn_2xlarge,
|
torchx/tracker/__init__.py
CHANGED
|
@@ -32,7 +32,7 @@ implementation.
|
|
|
32
32
|
|
|
33
33
|
Example usage
|
|
34
34
|
-------------
|
|
35
|
-
Sample `code <https://github.com/pytorch/torchx/blob/main/torchx/examples/apps/tracker/main.py>`__ using tracker API.
|
|
35
|
+
Sample `code <https://github.com/meta-pytorch/torchx/blob/main/torchx/examples/apps/tracker/main.py>`__ using tracker API.
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
Tracker Setup
|
|
@@ -111,7 +111,7 @@ Use :py:meth:`~torchx.tracker.app_run_from_env`:
|
|
|
111
111
|
Reference :py:class:`~torchx.tracker.api.TrackerBase` implementation
|
|
112
112
|
--------------------------------------------------------------------
|
|
113
113
|
:py:class:`~torchx.tracker.backend.fsspec.FsspecTracker` provides reference implementation of a tracker backend.
|
|
114
|
-
GitHub example `directory <https://github.com/pytorch/torchx/blob/main/torchx/examples/apps/tracker/>`__ provides example on how to
|
|
114
|
+
GitHub example `directory <https://github.com/meta-pytorch/torchx/blob/main/torchx/examples/apps/tracker/>`__ provides example on how to
|
|
115
115
|
configure and use it in user application.
|
|
116
116
|
|
|
117
117
|
|
torchx/tracker/api.py
CHANGED
|
@@ -191,7 +191,7 @@ def build_trackers(
|
|
|
191
191
|
factory = entrypoint_factories.get(factory_name) or load_module(factory_name)
|
|
192
192
|
if not factory:
|
|
193
193
|
logger.warning(
|
|
194
|
-
f"No tracker factory `{factory_name}` found in entry_points or modules. See https://pytorch.org/torchx/main/tracker.html#module-torchx.tracker"
|
|
194
|
+
f"No tracker factory `{factory_name}` found in entry_points or modules. See https://meta-pytorch.org/torchx/main/tracker.html#module-torchx.tracker"
|
|
195
195
|
)
|
|
196
196
|
continue
|
|
197
197
|
if config:
|
torchx/util/entrypoints.py
CHANGED
|
@@ -69,9 +69,7 @@ def _defer_load_ep(ep: EntryPoint) -> object:
|
|
|
69
69
|
return run
|
|
70
70
|
|
|
71
71
|
|
|
72
|
-
def load_group(
|
|
73
|
-
group: str, default: Optional[Dict[str, Any]] = None, skip_defaults: bool = False
|
|
74
|
-
):
|
|
72
|
+
def load_group(group: str, default: Optional[Dict[str, Any]] = None):
|
|
75
73
|
"""
|
|
76
74
|
Loads all the entry points specified by ``group`` and returns
|
|
77
75
|
the entry points as a map of ``name (str) -> deferred_load_fn``.
|
|
@@ -90,7 +88,6 @@ def load_group(
|
|
|
90
88
|
1. ``load_group("foo")["bar"]("baz")`` -> equivalent to calling ``this.is.a_fn("baz")``
|
|
91
89
|
1. ``load_group("food")`` -> ``None``
|
|
92
90
|
1. ``load_group("food", default={"hello": this.is.c_fn})["hello"]("world")`` -> equivalent to calling ``this.is.c_fn("world")``
|
|
93
|
-
1. ``load_group("food", default={"hello": this.is.c_fn}, skip_defaults=True)`` -> ``None``
|
|
94
91
|
|
|
95
92
|
|
|
96
93
|
If the entrypoint is a module (versus a function as shown above), then calling the ``deferred_load_fn``
|
|
@@ -115,8 +112,6 @@ def load_group(
|
|
|
115
112
|
entrypoints = metadata.entry_points().get(group, ())
|
|
116
113
|
|
|
117
114
|
if len(entrypoints) == 0:
|
|
118
|
-
if skip_defaults:
|
|
119
|
-
return None
|
|
120
115
|
return default
|
|
121
116
|
|
|
122
117
|
eps = {}
|
torchx/version.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
1
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
2
|
# All rights reserved.
|
|
4
3
|
#
|
|
@@ -7,6 +6,7 @@
|
|
|
7
6
|
|
|
8
7
|
# pyre-strict
|
|
9
8
|
|
|
9
|
+
from torchx._version import BASE_VERSION
|
|
10
10
|
from torchx.util.entrypoints import load
|
|
11
11
|
|
|
12
12
|
# Follows PEP-0440 version scheme guidelines
|
|
@@ -18,7 +18,7 @@ from torchx.util.entrypoints import load
|
|
|
18
18
|
# 0.1.0bN # Beta release
|
|
19
19
|
# 0.1.0rcN # Release Candidate
|
|
20
20
|
# 0.1.0 # Final release
|
|
21
|
-
__version__ =
|
|
21
|
+
__version__: str = BASE_VERSION
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
# Use the github container registry images corresponding to the current package
|
torchx/workspace/__init__.py
CHANGED
|
@@ -22,4 +22,4 @@ Example workspace paths:
|
|
|
22
22
|
* ``memory://foo-bar/`` an in-memory workspace for notebook/programmatic usage
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
-
from torchx.workspace.api import walk_workspace,
|
|
25
|
+
from torchx.workspace.api import walk_workspace, WorkspaceMixin # noqa: F401
|