torchx-nightly 2024.2.11__py3-none-any.whl → 2024.4.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchx-nightly might be problematic. Click here for more details.
- torchx/__init__.py +2 -0
- torchx/apps/serve/serve.py +2 -0
- torchx/apps/utils/booth_main.py +2 -0
- torchx/apps/utils/copy_main.py +2 -0
- torchx/apps/utils/process_monitor.py +2 -0
- torchx/cli/__init__.py +2 -0
- torchx/cli/argparse_util.py +38 -3
- torchx/cli/cmd_base.py +2 -0
- torchx/cli/cmd_cancel.py +2 -0
- torchx/cli/cmd_configure.py +2 -0
- torchx/cli/cmd_describe.py +2 -0
- torchx/cli/cmd_list.py +2 -0
- torchx/cli/cmd_log.py +2 -0
- torchx/cli/cmd_run.py +5 -1
- torchx/cli/cmd_runopts.py +2 -0
- torchx/cli/cmd_status.py +2 -0
- torchx/cli/cmd_tracker.py +2 -0
- torchx/cli/colors.py +2 -0
- torchx/cli/main.py +2 -0
- torchx/components/__init__.py +2 -0
- torchx/components/component_test_base.py +2 -0
- torchx/components/dist.py +2 -0
- torchx/components/integration_tests/component_provider.py +2 -0
- torchx/components/integration_tests/integ_tests.py +2 -0
- torchx/components/serve.py +2 -0
- torchx/components/structured_arg.py +2 -0
- torchx/components/utils.py +2 -0
- torchx/examples/apps/datapreproc/datapreproc.py +2 -0
- torchx/examples/apps/lightning/data.py +2 -0
- torchx/examples/apps/lightning/model.py +2 -0
- torchx/examples/apps/lightning/profiler.py +2 -0
- torchx/examples/apps/lightning/train.py +2 -0
- torchx/examples/pipelines/kfp/advanced_pipeline.py +2 -0
- torchx/examples/pipelines/kfp/dist_pipeline.py +2 -0
- torchx/examples/pipelines/kfp/intro_pipeline.py +2 -0
- torchx/notebook.py +2 -0
- torchx/pipelines/kfp/__init__.py +2 -0
- torchx/pipelines/kfp/adapter.py +3 -2
- torchx/pipelines/kfp/version.py +2 -0
- torchx/runner/__init__.py +2 -0
- torchx/runner/api.py +4 -2
- torchx/runner/config.py +14 -0
- torchx/runner/events/__init__.py +2 -0
- torchx/runner/events/api.py +2 -0
- torchx/runner/events/handlers.py +2 -0
- torchx/runtime/tracking/__init__.py +2 -0
- torchx/runtime/tracking/api.py +2 -0
- torchx/schedulers/__init__.py +4 -2
- torchx/schedulers/api.py +2 -0
- torchx/schedulers/aws_batch_scheduler.py +2 -0
- torchx/schedulers/aws_sagemaker_scheduler.py +590 -0
- torchx/schedulers/devices.py +2 -0
- torchx/schedulers/docker_scheduler.py +18 -3
- torchx/schedulers/gcp_batch_scheduler.py +7 -8
- torchx/schedulers/ids.py +2 -0
- torchx/schedulers/kubernetes_scheduler.py +3 -1
- torchx/schedulers/local_scheduler.py +24 -2
- torchx/schedulers/lsf_scheduler.py +2 -0
- torchx/schedulers/ray/ray_driver.py +6 -6
- torchx/schedulers/slurm_scheduler.py +2 -0
- torchx/schedulers/streams.py +2 -0
- torchx/specs/__init__.py +2 -0
- torchx/specs/api.py +14 -3
- torchx/specs/builders.py +68 -19
- torchx/specs/file_linter.py +8 -2
- torchx/specs/finder.py +2 -0
- torchx/specs/named_resources_aws.py +2 -0
- torchx/specs/named_resources_generic.py +2 -0
- torchx/specs/test/components/__init__.py +2 -0
- torchx/specs/test/components/a/__init__.py +2 -0
- torchx/specs/test/components/a/b/__init__.py +2 -0
- torchx/specs/test/components/a/b/c.py +2 -0
- torchx/specs/test/components/c/__init__.py +2 -0
- torchx/specs/test/components/c/d.py +2 -0
- torchx/tracker/__init__.py +2 -0
- torchx/tracker/api.py +4 -4
- torchx/tracker/backend/fsspec.py +2 -0
- torchx/util/cuda.py +2 -0
- torchx/util/datetime.py +2 -0
- torchx/util/entrypoints.py +2 -0
- torchx/util/io.py +2 -0
- torchx/util/modules.py +2 -0
- torchx/util/shlex.py +2 -0
- torchx/util/strings.py +2 -0
- torchx/util/types.py +12 -0
- torchx/version.py +2 -0
- torchx/workspace/__init__.py +2 -0
- torchx/workspace/api.py +2 -0
- torchx/workspace/dir_workspace.py +2 -0
- torchx/workspace/docker_workspace.py +2 -0
- {torchx_nightly-2024.2.11.dist-info → torchx_nightly-2024.4.10.dist-info}/METADATA +25 -17
- torchx_nightly-2024.4.10.dist-info/RECORD +120 -0
- {torchx_nightly-2024.2.11.dist-info → torchx_nightly-2024.4.10.dist-info}/WHEEL +1 -1
- torchx_nightly-2024.2.11.dist-info/RECORD +0 -119
- {torchx_nightly-2024.2.11.dist-info → torchx_nightly-2024.4.10.dist-info}/LICENSE +0 -0
- {torchx_nightly-2024.2.11.dist-info → torchx_nightly-2024.4.10.dist-info}/entry_points.txt +0 -0
- {torchx_nightly-2024.2.11.dist-info → torchx_nightly-2024.4.10.dist-info}/top_level.txt +0 -0
torchx/__init__.py
CHANGED
torchx/apps/serve/serve.py
CHANGED
torchx/apps/utils/booth_main.py
CHANGED
torchx/apps/utils/copy_main.py
CHANGED
torchx/cli/__init__.py
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
7
9
|
"""
|
|
8
10
|
The ``torchx`` CLI is a commandline tool around :py:class:`torchx.runner.Runner`.
|
|
9
11
|
It allows users to launch :py:class:`torchx.specs.AppDef` directly onto
|
torchx/cli/argparse_util.py
CHANGED
|
@@ -4,19 +4,27 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import sys
|
|
7
11
|
from argparse import Action, ArgumentParser, Namespace
|
|
8
|
-
from typing import Any, Dict, Optional, Sequence, Text
|
|
12
|
+
from typing import Any, Dict, List, Optional, Sequence, Set, Text
|
|
9
13
|
|
|
10
14
|
from torchx.runner import config
|
|
11
15
|
|
|
16
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
12
18
|
|
|
13
|
-
class
|
|
19
|
+
class torchxconfig(Action):
|
|
14
20
|
"""
|
|
15
21
|
Custom argparse action that loads default torchx CLI options
|
|
16
22
|
from .torchxconfig file.
|
|
17
23
|
|
|
18
24
|
"""
|
|
19
25
|
|
|
26
|
+
called_args: Set[str] = set()
|
|
27
|
+
|
|
20
28
|
# since this action is used for each argparse argument
|
|
21
29
|
# load the config section for the subcmd once
|
|
22
30
|
_subcmd_configs: Dict[str, Dict[str, str]] = {}
|
|
@@ -64,13 +72,18 @@ class _torchxconfig(Action):
|
|
|
64
72
|
values: Any, # pyre-ignore[2] declared as Any in superclass Action
|
|
65
73
|
option_string: Optional[str] = None,
|
|
66
74
|
) -> None:
|
|
75
|
+
if option_string is not None:
|
|
76
|
+
if option_string in self.called_args:
|
|
77
|
+
logger.error(f"{option_string} is specified more than once")
|
|
78
|
+
sys.exit(1)
|
|
79
|
+
self.called_args.add(option_string)
|
|
67
80
|
setattr(namespace, self.dest, values)
|
|
68
81
|
|
|
69
82
|
|
|
70
83
|
# argparse takes the action as a Type[Action] so we can't have custom constructors
|
|
71
84
|
# hence for each subcommand we need to subclass the base _torchxconfig Action
|
|
72
85
|
# this is also how store_true and store_false builtin actions are implemented in argparse
|
|
73
|
-
class torchxconfig_run(
|
|
86
|
+
class torchxconfig_run(torchxconfig):
|
|
74
87
|
"""
|
|
75
88
|
Custom action that gets the default argument from .torchxconfig.
|
|
76
89
|
"""
|
|
@@ -92,3 +105,25 @@ class torchxconfig_run(_torchxconfig):
|
|
|
92
105
|
option_strings=option_strings,
|
|
93
106
|
**kwargs,
|
|
94
107
|
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class ArgOnceAction(Action):
|
|
111
|
+
"""
|
|
112
|
+
Custom argparse action only allows argument to be specified once
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
called_args: Set[str] = set()
|
|
116
|
+
|
|
117
|
+
def __call__(
|
|
118
|
+
self,
|
|
119
|
+
parser: ArgumentParser,
|
|
120
|
+
namespace: Namespace,
|
|
121
|
+
values: List[str],
|
|
122
|
+
option_string: Optional[str] = None,
|
|
123
|
+
) -> None:
|
|
124
|
+
if option_string is not None:
|
|
125
|
+
if option_string in self.called_args:
|
|
126
|
+
logger.error(f"{option_string} is specified more than once")
|
|
127
|
+
sys.exit(1)
|
|
128
|
+
self.called_args.add(option_string)
|
|
129
|
+
setattr(namespace, self.dest, values)
|
torchx/cli/cmd_base.py
CHANGED
torchx/cli/cmd_cancel.py
CHANGED
torchx/cli/cmd_configure.py
CHANGED
torchx/cli/cmd_describe.py
CHANGED
torchx/cli/cmd_list.py
CHANGED
torchx/cli/cmd_log.py
CHANGED
torchx/cli/cmd_run.py
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
7
9
|
import argparse
|
|
8
10
|
import logging
|
|
9
11
|
import os
|
|
@@ -15,7 +17,7 @@ from pprint import pformat
|
|
|
15
17
|
from typing import Dict, List, Optional, Tuple
|
|
16
18
|
|
|
17
19
|
import torchx.specs as specs
|
|
18
|
-
from torchx.cli.argparse_util import torchxconfig_run
|
|
20
|
+
from torchx.cli.argparse_util import ArgOnceAction, torchxconfig_run
|
|
19
21
|
from torchx.cli.cmd_base import SubCommand
|
|
20
22
|
from torchx.cli.cmd_log import get_logs
|
|
21
23
|
from torchx.runner import config, get_runner, Runner
|
|
@@ -131,6 +133,7 @@ class CmdRun(SubCommand):
|
|
|
131
133
|
"-cfg",
|
|
132
134
|
"--scheduler_args",
|
|
133
135
|
type=str,
|
|
136
|
+
action=ArgOnceAction,
|
|
134
137
|
help="Arguments to pass to the scheduler (Ex:`cluster=foo,user=bar`)."
|
|
135
138
|
" For a list of scheduler run options run: `torchx runopts`",
|
|
136
139
|
)
|
|
@@ -163,6 +166,7 @@ class CmdRun(SubCommand):
|
|
|
163
166
|
subparser.add_argument(
|
|
164
167
|
"--parent_run_id",
|
|
165
168
|
type=str,
|
|
169
|
+
action=ArgOnceAction,
|
|
166
170
|
help="optional parent run ID that this run belongs to."
|
|
167
171
|
" It can be used to group runs for experiment tracking purposes",
|
|
168
172
|
)
|
torchx/cli/cmd_runopts.py
CHANGED
torchx/cli/cmd_status.py
CHANGED
torchx/cli/cmd_tracker.py
CHANGED
torchx/cli/colors.py
CHANGED
torchx/cli/main.py
CHANGED
torchx/components/__init__.py
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
7
9
|
"""
|
|
8
10
|
This module contains a collection of builtin TorchX components. The directory
|
|
9
11
|
structure is organized by component category. Components are simply
|
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
7
9
|
"""
|
|
8
10
|
You can unit test the component definitions as you would normal Python code
|
|
9
11
|
since they are valid Python definitions.
|
torchx/components/dist.py
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
7
9
|
"""
|
|
8
10
|
For distributed training, TorchX relies on the scheduler's gang scheduling
|
|
9
11
|
capabilities to schedule ``n`` copies of nodes. Once launched, the application
|
torchx/components/serve.py
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
7
9
|
"""
|
|
8
10
|
These components aim to make it easier to interact with inference and serving
|
|
9
11
|
tools such as `torchserve <https://pytorch.org/serve/>`_.
|
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
7
9
|
"""
|
|
8
10
|
Defines methods for structured (higher order) component argument parsing.
|
|
9
11
|
Use the functionalities defined in this module to author components
|
torchx/components/utils.py
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
7
9
|
"""
|
|
8
10
|
This contains TorchX utility components that are `ready-to-use` out of the box. These are
|
|
9
11
|
components that simply execute well known binaries (e.g. ``cp``)
|
torchx/notebook.py
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
# This source code is licensed under the BSD-style license found in the
|
|
6
6
|
# LICENSE file in the root directory of this source tree.
|
|
7
7
|
|
|
8
|
+
# pyre-strict
|
|
9
|
+
|
|
8
10
|
"""
|
|
9
11
|
This contains TorchX utilities for creating and running components and apps from
|
|
10
12
|
an Jupyter/IPython Notebook.
|
torchx/pipelines/kfp/__init__.py
CHANGED
torchx/pipelines/kfp/adapter.py
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
# This source code is licensed under the BSD-style license found in the
|
|
6
6
|
# LICENSE file in the root directory of this source tree.
|
|
7
7
|
|
|
8
|
+
# pyre-strict
|
|
9
|
+
|
|
8
10
|
import json
|
|
9
11
|
import os
|
|
10
12
|
import os.path
|
|
@@ -74,8 +76,7 @@ class ContainerFactory(Protocol):
|
|
|
74
76
|
kfp.dsl.ContainerOp.
|
|
75
77
|
"""
|
|
76
78
|
|
|
77
|
-
def __call__(self, *args: object, **kwargs: object) -> dsl.ContainerOp:
|
|
78
|
-
...
|
|
79
|
+
def __call__(self, *args: object, **kwargs: object) -> dsl.ContainerOp: ...
|
|
79
80
|
|
|
80
81
|
|
|
81
82
|
class KFPContainerFactory(ContainerFactory, Protocol):
|
torchx/pipelines/kfp/version.py
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
# This source code is licensed under the BSD-style license found in the
|
|
6
6
|
# LICENSE file in the root directory of this source tree.
|
|
7
7
|
|
|
8
|
+
# pyre-strict
|
|
9
|
+
|
|
8
10
|
# Follows PEP-0440 version scheme guidelines
|
|
9
11
|
# https://www.python.org/dev/peps/pep-0440/#version-scheme
|
|
10
12
|
#
|
torchx/runner/__init__.py
CHANGED
torchx/runner/api.py
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
# This source code is licensed under the BSD-style license found in the
|
|
6
6
|
# LICENSE file in the root directory of this source tree.
|
|
7
7
|
|
|
8
|
+
# pyre-strict
|
|
9
|
+
|
|
8
10
|
import json
|
|
9
11
|
import logging
|
|
10
12
|
import os
|
|
@@ -131,7 +133,7 @@ class Runner:
|
|
|
131
133
|
It is ok to call this method multiple times on the same runner object.
|
|
132
134
|
"""
|
|
133
135
|
|
|
134
|
-
for
|
|
136
|
+
for scheduler in self._scheduler_instances.values():
|
|
135
137
|
scheduler.close()
|
|
136
138
|
|
|
137
139
|
def run_component(
|
|
@@ -654,7 +656,7 @@ class Runner:
|
|
|
654
656
|
def _scheduler_app_id(
|
|
655
657
|
self,
|
|
656
658
|
app_handle: AppHandle,
|
|
657
|
-
check_session: bool = True
|
|
659
|
+
check_session: bool = True,
|
|
658
660
|
# pyre-fixme[24]: SchedulerOpts is a generic, and we don't have access to the corresponding type
|
|
659
661
|
) -> Tuple[Scheduler, str, str]:
|
|
660
662
|
"""
|
torchx/runner/config.py
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
# This source code is licensed under the BSD-style license found in the
|
|
6
6
|
# LICENSE file in the root directory of this source tree.
|
|
7
7
|
|
|
8
|
+
# pyre-strict
|
|
9
|
+
|
|
8
10
|
"""
|
|
9
11
|
Status: Beta
|
|
10
12
|
|
|
@@ -266,6 +268,13 @@ def dump(
|
|
|
266
268
|
val = ";".join(opt.default)
|
|
267
269
|
else:
|
|
268
270
|
val = _NONE
|
|
271
|
+
elif opt.opt_type == Dict[str, str]:
|
|
272
|
+
# deal with empty or None default lists
|
|
273
|
+
if opt.default:
|
|
274
|
+
# pyre-ignore[16] opt.default type checked already as Dict[str, str]
|
|
275
|
+
val = ";".join([f"{k}:{v}" for k, v in opt.default.items()])
|
|
276
|
+
else:
|
|
277
|
+
val = _NONE
|
|
269
278
|
else:
|
|
270
279
|
val = f"{opt.default}"
|
|
271
280
|
|
|
@@ -525,6 +534,11 @@ def load(scheduler: str, f: TextIO, cfg: Dict[str, CfgVal]) -> None:
|
|
|
525
534
|
cfg[name] = config.getboolean(section, name)
|
|
526
535
|
elif runopt.opt_type is List[str]:
|
|
527
536
|
cfg[name] = value.split(";")
|
|
537
|
+
elif runopt.opt_type is Dict[str, str]:
|
|
538
|
+
cfg[name] = {
|
|
539
|
+
s.split(":", 1)[0]: s.split(":", 1)[1]
|
|
540
|
+
for s in value.replace(",", ";").split(";")
|
|
541
|
+
}
|
|
528
542
|
else:
|
|
529
543
|
# pyre-ignore[29]
|
|
530
544
|
cfg[name] = runopt.opt_type(value)
|
torchx/runner/events/__init__.py
CHANGED
torchx/runner/events/api.py
CHANGED
torchx/runner/events/handlers.py
CHANGED
torchx/runtime/tracking/api.py
CHANGED
torchx/schedulers/__init__.py
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
# This source code is licensed under the BSD-style license found in the
|
|
6
6
|
# LICENSE file in the root directory of this source tree.
|
|
7
7
|
|
|
8
|
+
# pyre-strict
|
|
9
|
+
|
|
8
10
|
import importlib
|
|
9
11
|
from typing import Dict, Mapping
|
|
10
12
|
|
|
@@ -19,6 +21,7 @@ DEFAULT_SCHEDULER_MODULES: Mapping[str, str] = {
|
|
|
19
21
|
"kubernetes": "torchx.schedulers.kubernetes_scheduler",
|
|
20
22
|
"kubernetes_mcad": "torchx.schedulers.kubernetes_mcad_scheduler",
|
|
21
23
|
"aws_batch": "torchx.schedulers.aws_batch_scheduler",
|
|
24
|
+
"aws_sagemaker": "torchx.schedulers.aws_sagemaker_scheduler",
|
|
22
25
|
"gcp_batch": "torchx.schedulers.gcp_batch_scheduler",
|
|
23
26
|
"ray": "torchx.schedulers.ray_scheduler",
|
|
24
27
|
"lsf": "torchx.schedulers.lsf_scheduler",
|
|
@@ -27,8 +30,7 @@ DEFAULT_SCHEDULER_MODULES: Mapping[str, str] = {
|
|
|
27
30
|
|
|
28
31
|
class SchedulerFactory(Protocol):
|
|
29
32
|
# pyre-fixme: Scheduler opts
|
|
30
|
-
def __call__(self, session_name: str, **kwargs: object) -> Scheduler:
|
|
31
|
-
...
|
|
33
|
+
def __call__(self, session_name: str, **kwargs: object) -> Scheduler: ...
|
|
32
34
|
|
|
33
35
|
|
|
34
36
|
def _defer_load_scheduler(path: str) -> SchedulerFactory:
|
torchx/schedulers/api.py
CHANGED