torchx-nightly 2023.10.21__py3-none-any.whl → 2025.12.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchx-nightly might be problematic. Click here for more details.
- torchx/__init__.py +2 -0
- torchx/{schedulers/ray/__init__.py → _version.py} +3 -1
- torchx/apps/serve/serve.py +2 -0
- torchx/apps/utils/booth_main.py +2 -0
- torchx/apps/utils/copy_main.py +2 -0
- torchx/apps/utils/process_monitor.py +2 -0
- torchx/cli/__init__.py +2 -0
- torchx/cli/argparse_util.py +38 -3
- torchx/cli/cmd_base.py +2 -0
- torchx/cli/cmd_cancel.py +2 -0
- torchx/cli/cmd_configure.py +2 -0
- torchx/cli/cmd_delete.py +30 -0
- torchx/cli/cmd_describe.py +2 -0
- torchx/cli/cmd_list.py +8 -4
- torchx/cli/cmd_log.py +6 -24
- torchx/cli/cmd_run.py +269 -45
- torchx/cli/cmd_runopts.py +2 -0
- torchx/cli/cmd_status.py +12 -1
- torchx/cli/cmd_tracker.py +3 -1
- torchx/cli/colors.py +2 -0
- torchx/cli/main.py +4 -0
- torchx/components/__init__.py +3 -8
- torchx/components/component_test_base.py +2 -0
- torchx/components/dist.py +18 -7
- torchx/components/integration_tests/component_provider.py +4 -2
- torchx/components/integration_tests/integ_tests.py +2 -0
- torchx/components/serve.py +2 -0
- torchx/components/structured_arg.py +7 -6
- torchx/components/utils.py +15 -4
- torchx/distributed/__init__.py +2 -4
- torchx/examples/apps/datapreproc/datapreproc.py +2 -0
- torchx/examples/apps/lightning/data.py +5 -3
- torchx/examples/apps/lightning/model.py +7 -6
- torchx/examples/apps/lightning/profiler.py +7 -4
- torchx/examples/apps/lightning/train.py +11 -2
- torchx/examples/torchx_out_of_sync_training.py +11 -0
- torchx/notebook.py +2 -0
- torchx/runner/__init__.py +2 -0
- torchx/runner/api.py +167 -60
- torchx/runner/config.py +43 -10
- torchx/runner/events/__init__.py +57 -13
- torchx/runner/events/api.py +14 -3
- torchx/runner/events/handlers.py +2 -0
- torchx/runtime/tracking/__init__.py +2 -0
- torchx/runtime/tracking/api.py +2 -0
- torchx/schedulers/__init__.py +16 -15
- torchx/schedulers/api.py +70 -14
- torchx/schedulers/aws_batch_scheduler.py +79 -5
- torchx/schedulers/aws_sagemaker_scheduler.py +598 -0
- torchx/schedulers/devices.py +17 -4
- torchx/schedulers/docker_scheduler.py +43 -11
- torchx/schedulers/ids.py +29 -23
- torchx/schedulers/kubernetes_mcad_scheduler.py +10 -8
- torchx/schedulers/kubernetes_scheduler.py +383 -38
- torchx/schedulers/local_scheduler.py +100 -27
- torchx/schedulers/lsf_scheduler.py +5 -4
- torchx/schedulers/slurm_scheduler.py +336 -20
- torchx/schedulers/streams.py +2 -0
- torchx/specs/__init__.py +89 -12
- torchx/specs/api.py +431 -32
- torchx/specs/builders.py +176 -38
- torchx/specs/file_linter.py +143 -57
- torchx/specs/finder.py +68 -28
- torchx/specs/named_resources_aws.py +254 -22
- torchx/specs/named_resources_generic.py +2 -0
- torchx/specs/overlays.py +106 -0
- torchx/specs/test/components/__init__.py +2 -0
- torchx/specs/test/components/a/__init__.py +2 -0
- torchx/specs/test/components/a/b/__init__.py +2 -0
- torchx/specs/test/components/a/b/c.py +2 -0
- torchx/specs/test/components/c/__init__.py +2 -0
- torchx/specs/test/components/c/d.py +2 -0
- torchx/tracker/__init__.py +12 -6
- torchx/tracker/api.py +15 -18
- torchx/tracker/backend/fsspec.py +2 -0
- torchx/util/cuda.py +2 -0
- torchx/util/datetime.py +2 -0
- torchx/util/entrypoints.py +39 -15
- torchx/util/io.py +2 -0
- torchx/util/log_tee_helpers.py +210 -0
- torchx/util/modules.py +65 -0
- torchx/util/session.py +42 -0
- torchx/util/shlex.py +2 -0
- torchx/util/strings.py +3 -1
- torchx/util/types.py +90 -29
- torchx/version.py +4 -2
- torchx/workspace/__init__.py +2 -0
- torchx/workspace/api.py +136 -6
- torchx/workspace/dir_workspace.py +2 -0
- torchx/workspace/docker_workspace.py +30 -2
- torchx_nightly-2025.12.24.dist-info/METADATA +167 -0
- torchx_nightly-2025.12.24.dist-info/RECORD +113 -0
- {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/WHEEL +1 -1
- {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/entry_points.txt +0 -1
- torchx/examples/pipelines/__init__.py +0 -0
- torchx/examples/pipelines/kfp/__init__.py +0 -0
- torchx/examples/pipelines/kfp/advanced_pipeline.py +0 -287
- torchx/examples/pipelines/kfp/dist_pipeline.py +0 -69
- torchx/examples/pipelines/kfp/intro_pipeline.py +0 -81
- torchx/pipelines/kfp/__init__.py +0 -28
- torchx/pipelines/kfp/adapter.py +0 -271
- torchx/pipelines/kfp/version.py +0 -17
- torchx/schedulers/gcp_batch_scheduler.py +0 -487
- torchx/schedulers/ray/ray_common.py +0 -22
- torchx/schedulers/ray/ray_driver.py +0 -307
- torchx/schedulers/ray_scheduler.py +0 -453
- torchx_nightly-2023.10.21.dist-info/METADATA +0 -174
- torchx_nightly-2023.10.21.dist-info/RECORD +0 -118
- {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info/licenses}/LICENSE +0 -0
- {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/top_level.txt +0 -0
torchx/__init__.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
1
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
2
|
# All rights reserved.
|
|
4
3
|
#
|
|
5
4
|
# This source code is licensed under the BSD-style license found in the
|
|
6
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
# pyre-strict
|
|
8
|
+
BASE_VERSION = "0.8.0dev0"
|
torchx/apps/serve/serve.py
CHANGED
torchx/apps/utils/booth_main.py
CHANGED
torchx/apps/utils/copy_main.py
CHANGED
torchx/cli/__init__.py
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
7
9
|
"""
|
|
8
10
|
The ``torchx`` CLI is a commandline tool around :py:class:`torchx.runner.Runner`.
|
|
9
11
|
It allows users to launch :py:class:`torchx.specs.AppDef` directly onto
|
torchx/cli/argparse_util.py
CHANGED
|
@@ -4,19 +4,27 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import sys
|
|
7
11
|
from argparse import Action, ArgumentParser, Namespace
|
|
8
|
-
from typing import Any, Dict, Optional, Sequence, Text
|
|
12
|
+
from typing import Any, Dict, List, Optional, Sequence, Set, Text
|
|
9
13
|
|
|
10
14
|
from torchx.runner import config
|
|
11
15
|
|
|
16
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
12
18
|
|
|
13
|
-
class
|
|
19
|
+
class torchxconfig(Action):
|
|
14
20
|
"""
|
|
15
21
|
Custom argparse action that loads default torchx CLI options
|
|
16
22
|
from .torchxconfig file.
|
|
17
23
|
|
|
18
24
|
"""
|
|
19
25
|
|
|
26
|
+
called_args: Set[str] = set()
|
|
27
|
+
|
|
20
28
|
# since this action is used for each argparse argument
|
|
21
29
|
# load the config section for the subcmd once
|
|
22
30
|
_subcmd_configs: Dict[str, Dict[str, str]] = {}
|
|
@@ -64,13 +72,18 @@ class _torchxconfig(Action):
|
|
|
64
72
|
values: Any, # pyre-ignore[2] declared as Any in superclass Action
|
|
65
73
|
option_string: Optional[str] = None,
|
|
66
74
|
) -> None:
|
|
75
|
+
if option_string is not None:
|
|
76
|
+
if option_string in self.called_args:
|
|
77
|
+
logger.error(f"{option_string} is specified more than once")
|
|
78
|
+
sys.exit(1)
|
|
79
|
+
self.called_args.add(option_string)
|
|
67
80
|
setattr(namespace, self.dest, values)
|
|
68
81
|
|
|
69
82
|
|
|
70
83
|
# argparse takes the action as a Type[Action] so we can't have custom constructors
|
|
71
84
|
# hence for each subcommand we need to subclass the base _torchxconfig Action
|
|
72
85
|
# this is also how store_true and store_false builtin actions are implemented in argparse
|
|
73
|
-
class torchxconfig_run(
|
|
86
|
+
class torchxconfig_run(torchxconfig):
|
|
74
87
|
"""
|
|
75
88
|
Custom action that gets the default argument from .torchxconfig.
|
|
76
89
|
"""
|
|
@@ -92,3 +105,25 @@ class torchxconfig_run(_torchxconfig):
|
|
|
92
105
|
option_strings=option_strings,
|
|
93
106
|
**kwargs,
|
|
94
107
|
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class ArgOnceAction(Action):
|
|
111
|
+
"""
|
|
112
|
+
Custom argparse action only allows argument to be specified once
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
called_args: Set[str] = set()
|
|
116
|
+
|
|
117
|
+
def __call__(
|
|
118
|
+
self,
|
|
119
|
+
parser: ArgumentParser,
|
|
120
|
+
namespace: Namespace,
|
|
121
|
+
values: List[str],
|
|
122
|
+
option_string: Optional[str] = None,
|
|
123
|
+
) -> None:
|
|
124
|
+
if option_string is not None:
|
|
125
|
+
if option_string in self.called_args:
|
|
126
|
+
logger.error(f"{option_string} is specified more than once")
|
|
127
|
+
sys.exit(1)
|
|
128
|
+
self.called_args.add(option_string)
|
|
129
|
+
setattr(namespace, self.dest, values)
|
torchx/cli/cmd_base.py
CHANGED
torchx/cli/cmd_cancel.py
CHANGED
torchx/cli/cmd_configure.py
CHANGED
torchx/cli/cmd_delete.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This source code is licensed under the BSD-style license found in the
|
|
6
|
+
# LICENSE file in the root directory of this source tree.
|
|
7
|
+
|
|
8
|
+
# pyre-strict
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import logging
|
|
12
|
+
|
|
13
|
+
from torchx.cli.cmd_base import SubCommand
|
|
14
|
+
from torchx.runner import get_runner
|
|
15
|
+
|
|
16
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class CmdDelete(SubCommand):
|
|
20
|
+
def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
|
|
21
|
+
subparser.add_argument(
|
|
22
|
+
"app_handle",
|
|
23
|
+
type=str,
|
|
24
|
+
help="torchx app handle (e.g. local://session-name/app-id)",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def run(self, args: argparse.Namespace) -> None:
|
|
28
|
+
app_handle = args.app_handle
|
|
29
|
+
runner = get_runner()
|
|
30
|
+
runner.delete(app_handle)
|
torchx/cli/cmd_describe.py
CHANGED
torchx/cli/cmd_list.py
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
# This source code is licensed under the BSD-style license found in the
|
|
6
6
|
# LICENSE file in the root directory of this source tree.
|
|
7
7
|
|
|
8
|
+
# pyre-strict
|
|
9
|
+
|
|
8
10
|
import argparse
|
|
9
11
|
import logging
|
|
10
12
|
|
|
@@ -19,6 +21,7 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
19
21
|
|
|
20
22
|
HANDLE_HEADER = "APP HANDLE"
|
|
21
23
|
STATUS_HEADER = "APP STATUS"
|
|
24
|
+
NAME_HEADER = "APP NAME"
|
|
22
25
|
|
|
23
26
|
|
|
24
27
|
class CmdList(SubCommand):
|
|
@@ -30,12 +33,13 @@ class CmdList(SubCommand):
|
|
|
30
33
|
type=str,
|
|
31
34
|
default=get_default_scheduler_name(),
|
|
32
35
|
choices=list(scheduler_names),
|
|
33
|
-
help=f"Name of the scheduler to use. One of: [{','.join(scheduler_names)}]."
|
|
34
|
-
" For listing app handles for ray scheduler, RAY_ADDRESS env variable should be set.",
|
|
36
|
+
help=f"Name of the scheduler to use. One of: [{','.join(scheduler_names)}].",
|
|
35
37
|
)
|
|
36
38
|
|
|
37
39
|
def run(self, args: argparse.Namespace) -> None:
|
|
38
40
|
with get_runner() as runner:
|
|
39
41
|
apps = runner.list(args.scheduler)
|
|
40
|
-
apps_data = [[app.app_handle, str(app.state)] for app in apps]
|
|
41
|
-
print(
|
|
42
|
+
apps_data = [[app.app_handle, app.name, str(app.state)] for app in apps]
|
|
43
|
+
print(
|
|
44
|
+
tabulate(apps_data, headers=[HANDLE_HEADER, NAME_HEADER, STATUS_HEADER])
|
|
45
|
+
)
|
torchx/cli/cmd_log.py
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
# This source code is licensed under the BSD-style license found in the
|
|
6
6
|
# LICENSE file in the root directory of this source tree.
|
|
7
7
|
|
|
8
|
+
# pyre-strict
|
|
9
|
+
|
|
8
10
|
import argparse
|
|
9
11
|
import logging
|
|
10
12
|
import re
|
|
@@ -21,6 +23,10 @@ from torchx.runner import get_runner, Runner
|
|
|
21
23
|
from torchx.schedulers.api import Stream
|
|
22
24
|
from torchx.specs.api import is_started
|
|
23
25
|
from torchx.specs.builders import make_app_handle
|
|
26
|
+
from torchx.util.log_tee_helpers import (
|
|
27
|
+
_find_role_replicas as find_role_replicas,
|
|
28
|
+
_prefix_line,
|
|
29
|
+
)
|
|
24
30
|
|
|
25
31
|
from torchx.util.types import none_throws
|
|
26
32
|
|
|
@@ -37,19 +43,6 @@ def validate(job_identifier: str) -> None:
|
|
|
37
43
|
sys.exit(1)
|
|
38
44
|
|
|
39
45
|
|
|
40
|
-
def _prefix_line(prefix: str, line: str) -> str:
|
|
41
|
-
"""
|
|
42
|
-
_prefix_line ensure the prefix is still present even when dealing with return characters
|
|
43
|
-
"""
|
|
44
|
-
if "\r" in line:
|
|
45
|
-
line = line.replace("\r", f"\r{prefix}")
|
|
46
|
-
if "\n" in line[:-1]:
|
|
47
|
-
line = line[:-1].replace("\n", f"\n{prefix}") + line[-1:]
|
|
48
|
-
if not line.startswith("\r"):
|
|
49
|
-
line = f"{prefix}{line}"
|
|
50
|
-
return line
|
|
51
|
-
|
|
52
|
-
|
|
53
46
|
def print_log_lines(
|
|
54
47
|
file: TextIO,
|
|
55
48
|
runner: Runner,
|
|
@@ -165,17 +158,6 @@ def get_logs(
|
|
|
165
158
|
raise threads_exceptions[0]
|
|
166
159
|
|
|
167
160
|
|
|
168
|
-
def find_role_replicas(
|
|
169
|
-
app: specs.AppDef, role_name: Optional[str]
|
|
170
|
-
) -> List[Tuple[str, int]]:
|
|
171
|
-
role_replicas = []
|
|
172
|
-
for role in app.roles:
|
|
173
|
-
if role_name is None or role_name == role.name:
|
|
174
|
-
for i in range(role.num_replicas):
|
|
175
|
-
role_replicas.append((role.name, i))
|
|
176
|
-
return role_replicas
|
|
177
|
-
|
|
178
|
-
|
|
179
161
|
class CmdLog(SubCommand):
|
|
180
162
|
def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
|
|
181
163
|
subparser.add_argument(
|