torchx-nightly 2023.10.21__py3-none-any.whl → 2025.12.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchx-nightly might be problematic. Click here for more details.
- torchx/__init__.py +2 -0
- torchx/{schedulers/ray/__init__.py → _version.py} +3 -1
- torchx/apps/serve/serve.py +2 -0
- torchx/apps/utils/booth_main.py +2 -0
- torchx/apps/utils/copy_main.py +2 -0
- torchx/apps/utils/process_monitor.py +2 -0
- torchx/cli/__init__.py +2 -0
- torchx/cli/argparse_util.py +38 -3
- torchx/cli/cmd_base.py +2 -0
- torchx/cli/cmd_cancel.py +2 -0
- torchx/cli/cmd_configure.py +2 -0
- torchx/cli/cmd_delete.py +30 -0
- torchx/cli/cmd_describe.py +2 -0
- torchx/cli/cmd_list.py +8 -4
- torchx/cli/cmd_log.py +6 -24
- torchx/cli/cmd_run.py +269 -45
- torchx/cli/cmd_runopts.py +2 -0
- torchx/cli/cmd_status.py +12 -1
- torchx/cli/cmd_tracker.py +3 -1
- torchx/cli/colors.py +2 -0
- torchx/cli/main.py +4 -0
- torchx/components/__init__.py +3 -8
- torchx/components/component_test_base.py +2 -0
- torchx/components/dist.py +18 -7
- torchx/components/integration_tests/component_provider.py +4 -2
- torchx/components/integration_tests/integ_tests.py +2 -0
- torchx/components/serve.py +2 -0
- torchx/components/structured_arg.py +7 -6
- torchx/components/utils.py +15 -4
- torchx/distributed/__init__.py +2 -4
- torchx/examples/apps/datapreproc/datapreproc.py +2 -0
- torchx/examples/apps/lightning/data.py +5 -3
- torchx/examples/apps/lightning/model.py +7 -6
- torchx/examples/apps/lightning/profiler.py +7 -4
- torchx/examples/apps/lightning/train.py +11 -2
- torchx/examples/torchx_out_of_sync_training.py +11 -0
- torchx/notebook.py +2 -0
- torchx/runner/__init__.py +2 -0
- torchx/runner/api.py +167 -60
- torchx/runner/config.py +43 -10
- torchx/runner/events/__init__.py +57 -13
- torchx/runner/events/api.py +14 -3
- torchx/runner/events/handlers.py +2 -0
- torchx/runtime/tracking/__init__.py +2 -0
- torchx/runtime/tracking/api.py +2 -0
- torchx/schedulers/__init__.py +16 -15
- torchx/schedulers/api.py +70 -14
- torchx/schedulers/aws_batch_scheduler.py +79 -5
- torchx/schedulers/aws_sagemaker_scheduler.py +598 -0
- torchx/schedulers/devices.py +17 -4
- torchx/schedulers/docker_scheduler.py +43 -11
- torchx/schedulers/ids.py +29 -23
- torchx/schedulers/kubernetes_mcad_scheduler.py +10 -8
- torchx/schedulers/kubernetes_scheduler.py +383 -38
- torchx/schedulers/local_scheduler.py +100 -27
- torchx/schedulers/lsf_scheduler.py +5 -4
- torchx/schedulers/slurm_scheduler.py +336 -20
- torchx/schedulers/streams.py +2 -0
- torchx/specs/__init__.py +89 -12
- torchx/specs/api.py +431 -32
- torchx/specs/builders.py +176 -38
- torchx/specs/file_linter.py +143 -57
- torchx/specs/finder.py +68 -28
- torchx/specs/named_resources_aws.py +254 -22
- torchx/specs/named_resources_generic.py +2 -0
- torchx/specs/overlays.py +106 -0
- torchx/specs/test/components/__init__.py +2 -0
- torchx/specs/test/components/a/__init__.py +2 -0
- torchx/specs/test/components/a/b/__init__.py +2 -0
- torchx/specs/test/components/a/b/c.py +2 -0
- torchx/specs/test/components/c/__init__.py +2 -0
- torchx/specs/test/components/c/d.py +2 -0
- torchx/tracker/__init__.py +12 -6
- torchx/tracker/api.py +15 -18
- torchx/tracker/backend/fsspec.py +2 -0
- torchx/util/cuda.py +2 -0
- torchx/util/datetime.py +2 -0
- torchx/util/entrypoints.py +39 -15
- torchx/util/io.py +2 -0
- torchx/util/log_tee_helpers.py +210 -0
- torchx/util/modules.py +65 -0
- torchx/util/session.py +42 -0
- torchx/util/shlex.py +2 -0
- torchx/util/strings.py +3 -1
- torchx/util/types.py +90 -29
- torchx/version.py +4 -2
- torchx/workspace/__init__.py +2 -0
- torchx/workspace/api.py +136 -6
- torchx/workspace/dir_workspace.py +2 -0
- torchx/workspace/docker_workspace.py +30 -2
- torchx_nightly-2025.12.24.dist-info/METADATA +167 -0
- torchx_nightly-2025.12.24.dist-info/RECORD +113 -0
- {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/WHEEL +1 -1
- {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/entry_points.txt +0 -1
- torchx/examples/pipelines/__init__.py +0 -0
- torchx/examples/pipelines/kfp/__init__.py +0 -0
- torchx/examples/pipelines/kfp/advanced_pipeline.py +0 -287
- torchx/examples/pipelines/kfp/dist_pipeline.py +0 -69
- torchx/examples/pipelines/kfp/intro_pipeline.py +0 -81
- torchx/pipelines/kfp/__init__.py +0 -28
- torchx/pipelines/kfp/adapter.py +0 -271
- torchx/pipelines/kfp/version.py +0 -17
- torchx/schedulers/gcp_batch_scheduler.py +0 -487
- torchx/schedulers/ray/ray_common.py +0 -22
- torchx/schedulers/ray/ray_driver.py +0 -307
- torchx/schedulers/ray_scheduler.py +0 -453
- torchx_nightly-2023.10.21.dist-info/METADATA +0 -174
- torchx_nightly-2023.10.21.dist-info/RECORD +0 -118
- {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info/licenses}/LICENSE +0 -0
- {torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/top_level.txt +0 -0
torchx/util/modules.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
9
|
+
import importlib
|
|
10
|
+
from types import ModuleType
|
|
11
|
+
from typing import Callable, Optional, TypeVar, Union
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def load_module(path: str) -> Union[ModuleType, Optional[Callable[..., object]]]:
|
|
15
|
+
"""
|
|
16
|
+
Loads and returns the module/module attr represented by the ``path``: ``full.module.path:optional_attr``
|
|
17
|
+
|
|
18
|
+
1. ``load_module("this.is.a_module:fn")`` -> equivalent to ``this.is.a_module.fn``
|
|
19
|
+
1. ``load_module("this.is.a_module")`` -> equivalent to ``this.is.a_module``
|
|
20
|
+
"""
|
|
21
|
+
parts = path.split(":", 2)
|
|
22
|
+
module_path, method = parts[0], parts[1] if len(parts) > 1 else None
|
|
23
|
+
module = None
|
|
24
|
+
i, n = -1, len(module_path)
|
|
25
|
+
try:
|
|
26
|
+
while i < n:
|
|
27
|
+
i = module_path.find(".", i + 1)
|
|
28
|
+
i = i if i >= 0 else n
|
|
29
|
+
module = importlib.import_module(module_path[:i])
|
|
30
|
+
return getattr(module, method) if method else module
|
|
31
|
+
except Exception:
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
T = TypeVar("T")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def import_attr(name: str, attr: str, default: T) -> T:
|
|
39
|
+
"""
|
|
40
|
+
Imports ``name.attr`` and returns it if the module is found.
|
|
41
|
+
Otherwise, returns the specified ``default``.
|
|
42
|
+
Useful when getting an attribute from an optional dependency.
|
|
43
|
+
|
|
44
|
+
Note that the ``default`` parameter is intentionally not an optional
|
|
45
|
+
since this function is intended to be used with modules that may not be
|
|
46
|
+
installed as a dependency. Therefore the caller must ALWAYS provide a
|
|
47
|
+
sensible default.
|
|
48
|
+
|
|
49
|
+
Usage:
|
|
50
|
+
|
|
51
|
+
.. code-block:: python
|
|
52
|
+
|
|
53
|
+
aws_resources = import_attr("torchx.specs.named_resources_aws", "NAMED_RESOURCES", default={})
|
|
54
|
+
all_resources.update(aws_resources)
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
AttributeError: If the module exists (e.g. can be imported)
|
|
58
|
+
but does not have an attribute with name ``attr``.
|
|
59
|
+
"""
|
|
60
|
+
try:
|
|
61
|
+
mod = importlib.import_module(name)
|
|
62
|
+
except ModuleNotFoundError:
|
|
63
|
+
return default
|
|
64
|
+
else:
|
|
65
|
+
return getattr(mod, attr)
|
torchx/util/session.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This source code is licensed under the BSD-style license found in the
|
|
6
|
+
# LICENSE file in the root directory of this source tree.
|
|
7
|
+
|
|
8
|
+
# pyre-strict
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import uuid
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
TORCHX_INTERNAL_SESSION_ID = "TORCHX_INTERNAL_SESSION_ID"
|
|
15
|
+
|
|
16
|
+
CURRENT_SESSION_ID: Optional[str] = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_session_id_or_create_new() -> str:
|
|
20
|
+
"""
|
|
21
|
+
Returns the current session ID, or creates a new one if none exists.
|
|
22
|
+
The session ID remains the same as long as it is in the same process.
|
|
23
|
+
Please DO NOT use this function out of torchx codebase.
|
|
24
|
+
"""
|
|
25
|
+
global CURRENT_SESSION_ID
|
|
26
|
+
if CURRENT_SESSION_ID:
|
|
27
|
+
return CURRENT_SESSION_ID
|
|
28
|
+
env_session_id = os.getenv(TORCHX_INTERNAL_SESSION_ID)
|
|
29
|
+
if env_session_id:
|
|
30
|
+
CURRENT_SESSION_ID = env_session_id
|
|
31
|
+
return CURRENT_SESSION_ID
|
|
32
|
+
session_id = str(uuid.uuid4())
|
|
33
|
+
CURRENT_SESSION_ID = session_id
|
|
34
|
+
return session_id
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_torchx_session_id() -> Optional[str]:
|
|
38
|
+
"""
|
|
39
|
+
Returns the torchx session ID.
|
|
40
|
+
Please use this function to get the session ID out of torchx codebase.
|
|
41
|
+
"""
|
|
42
|
+
return CURRENT_SESSION_ID
|
torchx/util/shlex.py
CHANGED
torchx/util/strings.py
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
7
9
|
import re
|
|
8
10
|
|
|
9
11
|
|
|
@@ -11,7 +13,7 @@ def normalize_str(data: str) -> str:
|
|
|
11
13
|
"""
|
|
12
14
|
Invokes ``lower`` on thes string and removes all
|
|
13
15
|
characters that do not satisfy ``[a-z0-9\\-]`` pattern.
|
|
14
|
-
This method is mostly used to make sure kubernetes
|
|
16
|
+
This method is mostly used to make sure kubernetes scheduler gets
|
|
15
17
|
the job name that does not violate its restrictions.
|
|
16
18
|
"""
|
|
17
19
|
if data.startswith("-"):
|
torchx/util/types.py
CHANGED
|
@@ -4,13 +4,15 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union
|
|
7
|
+
# pyre-strict
|
|
9
8
|
|
|
10
|
-
import
|
|
9
|
+
import inspect
|
|
10
|
+
import re
|
|
11
|
+
from types import UnionType
|
|
12
|
+
from typing import Any, Callable, Optional, Tuple, TypeVar, Union
|
|
11
13
|
|
|
12
14
|
|
|
13
|
-
def to_list(arg: str) ->
|
|
15
|
+
def to_list(arg: str) -> list[str]:
|
|
14
16
|
conf = []
|
|
15
17
|
if len(arg.strip()) == 0:
|
|
16
18
|
return []
|
|
@@ -19,9 +21,9 @@ def to_list(arg: str) -> List[str]:
|
|
|
19
21
|
return conf
|
|
20
22
|
|
|
21
23
|
|
|
22
|
-
def to_dict(arg: str) ->
|
|
24
|
+
def to_dict(arg: str) -> dict[str, str]:
|
|
23
25
|
"""
|
|
24
|
-
Parses the given ``arg`` string literal into a ``
|
|
26
|
+
Parses the given ``arg`` string literal into a ``dict[str, str]`` of
|
|
25
27
|
key-value pairs delimited by ``"="`` (equals). The values may be a
|
|
26
28
|
list literal where the list elements are delimited by ``","`` (comma)
|
|
27
29
|
or ``";"`` (semi-colon). The same delimiters (``","`` and ``";"``) are used
|
|
@@ -29,6 +31,9 @@ def to_dict(arg: str) -> Dict[str, str]:
|
|
|
29
31
|
When values are lists, the last delimiter is used as kv-pair delimiter
|
|
30
32
|
(e.g. ``FOO=v1,v2,BAR=v3``). Empty values of ``arg`` returns an empty map.
|
|
31
33
|
|
|
34
|
+
Values can be quoted with single or double quotes to include special characters
|
|
35
|
+
(``"="``, ``","``, ``";"``) without them being interpreted as separators.
|
|
36
|
+
|
|
32
37
|
Note that values that encode list literals are returned as list literals
|
|
33
38
|
NOT actual lists. The caller must further process each value in the returned
|
|
34
39
|
map, to cast/decode the value literals as specific types. In this case,
|
|
@@ -43,6 +48,9 @@ def to_dict(arg: str) -> Dict[str, str]:
|
|
|
43
48
|
|
|
44
49
|
to_dict("FOO=v1") == {"FOO": "v1"}
|
|
45
50
|
|
|
51
|
+
to_dict("FOO=''") == {"FOO": ""}
|
|
52
|
+
to_dict('FOO=""') == {"FOO": ""}
|
|
53
|
+
|
|
46
54
|
to_dict("FOO=v1,v2") == {"FOO": "v1,v2"]}
|
|
47
55
|
to_dict("FOO=v1;v2") == {"FOO": "v1;v2"]}
|
|
48
56
|
to_dict("FOO=v1;v2") == {"FOO": "v1;v2,"]}
|
|
@@ -52,6 +60,7 @@ def to_dict(arg: str) -> Dict[str, str]:
|
|
|
52
60
|
to_dict("FOO=v1;v2,BAR=v3") == {"FOO": "v1;v2", "BAR": "v3"}
|
|
53
61
|
to_dict("FOO=v1;v2;BAR=v3") == {"FOO": "v1;v2", "BAR": "v3"}
|
|
54
62
|
|
|
63
|
+
to_dict('FOO="value with = and , and ;"') == {"FOO": "value with = and , and ;"}
|
|
55
64
|
"""
|
|
56
65
|
|
|
57
66
|
def parse_val_key(vk: str) -> Tuple[str, str]:
|
|
@@ -68,17 +77,35 @@ def to_dict(arg: str) -> Dict[str, str]:
|
|
|
68
77
|
else:
|
|
69
78
|
return vk[0:idx].strip(), vk[idx + 1 :].strip()
|
|
70
79
|
|
|
71
|
-
|
|
80
|
+
def to_val(val: str) -> str:
|
|
81
|
+
if (val.startswith("'") and val.endswith("'")) or (
|
|
82
|
+
val.startswith('"') and val.endswith('"')
|
|
83
|
+
):
|
|
84
|
+
return val[1:-1]
|
|
85
|
+
return val if val != '""' and val != "''" else ""
|
|
86
|
+
|
|
87
|
+
arg_map: dict[str, str] = {}
|
|
72
88
|
|
|
73
89
|
if not arg:
|
|
74
90
|
return arg_map
|
|
75
91
|
|
|
92
|
+
# find quoted values
|
|
93
|
+
quoted_pattern = r'([\'"])((?:\\.|(?!\1).)*?)\1'
|
|
94
|
+
quoted_values: list[str] = []
|
|
95
|
+
|
|
96
|
+
def replace_quoted(match):
|
|
97
|
+
quoted_values.append(match.group(0))
|
|
98
|
+
return f"__QUOTED_{len(quoted_values) - 1}__"
|
|
99
|
+
|
|
100
|
+
# replace quoted values with placeholders
|
|
101
|
+
processed_arg = re.sub(quoted_pattern, replace_quoted, arg)
|
|
102
|
+
|
|
76
103
|
# split cfgs
|
|
77
104
|
cfg_kv_delim = "="
|
|
78
105
|
|
|
79
106
|
# ["FOO", "v1;v2,BAR", v3, "BAZ", "v4,v5"]
|
|
80
107
|
split_arg = [
|
|
81
|
-
s.strip() for s in
|
|
108
|
+
s.strip() for s in processed_arg.split(cfg_kv_delim) if s.strip()
|
|
82
109
|
] # remove empty
|
|
83
110
|
split_arg_len = len(split_arg)
|
|
84
111
|
|
|
@@ -90,18 +117,28 @@ def to_dict(arg: str) -> Dict[str, str]:
|
|
|
90
117
|
# middle elements are value_{n}<delim>key_{n+1}
|
|
91
118
|
for vk in split_arg[1 : split_arg_len - 1]: # python deals with
|
|
92
119
|
val, key_next = parse_val_key(vk)
|
|
93
|
-
|
|
120
|
+
for i, quoted in enumerate(quoted_values):
|
|
121
|
+
val = val.replace(f"__QUOTED_{i}__", quoted)
|
|
122
|
+
arg_map[key] = to_val(val)
|
|
94
123
|
key = key_next
|
|
124
|
+
|
|
95
125
|
val = split_arg[-1] # last element is always a value
|
|
96
|
-
|
|
126
|
+
for i, quoted in enumerate(quoted_values):
|
|
127
|
+
val = val.replace(f"__QUOTED_{i}__", quoted)
|
|
128
|
+
arg_map[key] = to_val(val)
|
|
129
|
+
|
|
97
130
|
return arg_map
|
|
98
131
|
|
|
99
132
|
|
|
100
133
|
# pyre-ignore-all-errors[3, 2]
|
|
101
134
|
def _decode_string_to_dict(
|
|
102
|
-
encoded_value: str, param_type:
|
|
103
|
-
) ->
|
|
104
|
-
|
|
135
|
+
encoded_value: str, param_type: type[dict[Any, Any]]
|
|
136
|
+
) -> dict[Any, Any]:
|
|
137
|
+
# pyre-ignore[16]
|
|
138
|
+
if not hasattr(param_type, "__args__") or len(param_type.__args__) != 2:
|
|
139
|
+
raise ValueError(f"param_type must be a `dict` type, but was `{param_type}`")
|
|
140
|
+
|
|
141
|
+
key_type, value_type = param_type.__args__
|
|
105
142
|
arg_values = {}
|
|
106
143
|
for key, value in to_dict(encoded_value).items():
|
|
107
144
|
arg_values[key_type(key)] = value_type(value)
|
|
@@ -109,9 +146,12 @@ def _decode_string_to_dict(
|
|
|
109
146
|
|
|
110
147
|
|
|
111
148
|
def _decode_string_to_list(
|
|
112
|
-
encoded_value: str, param_type:
|
|
113
|
-
) ->
|
|
114
|
-
|
|
149
|
+
encoded_value: str, param_type: type[list[Any]]
|
|
150
|
+
) -> list[Any]:
|
|
151
|
+
# pyre-ignore[16]
|
|
152
|
+
if not hasattr(param_type, "__args__") or len(param_type.__args__) != 1:
|
|
153
|
+
raise ValueError(f"param_type must be a `list` type, but was `{param_type}`")
|
|
154
|
+
value_type = param_type.__args__[0]
|
|
115
155
|
if not is_primitive(value_type):
|
|
116
156
|
raise ValueError("List types support only primitives: int, str, float")
|
|
117
157
|
arg_values = []
|
|
@@ -120,9 +160,19 @@ def _decode_string_to_list(
|
|
|
120
160
|
return arg_values
|
|
121
161
|
|
|
122
162
|
|
|
163
|
+
def decode(encoded_value: Any, annotation: Any):
|
|
164
|
+
if encoded_value is None:
|
|
165
|
+
return None
|
|
166
|
+
if is_bool(annotation):
|
|
167
|
+
return encoded_value and encoded_value.lower() == "true"
|
|
168
|
+
if not is_primitive(annotation) and type(encoded_value) == str:
|
|
169
|
+
return decode_from_string(encoded_value, annotation)
|
|
170
|
+
return encoded_value
|
|
171
|
+
|
|
172
|
+
|
|
123
173
|
def decode_from_string(
|
|
124
174
|
encoded_value: str, annotation: Any
|
|
125
|
-
) -> Union[
|
|
175
|
+
) -> Union[dict[Any, Any], list[Any], None]:
|
|
126
176
|
"""Decodes string representation to the underlying type(Dict or List)
|
|
127
177
|
|
|
128
178
|
Given a string representation of the value, the method decodes it according
|
|
@@ -147,13 +197,13 @@ def decode_from_string(
|
|
|
147
197
|
if not encoded_value:
|
|
148
198
|
return None
|
|
149
199
|
value_type = annotation
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
200
|
+
if hasattr(value_type, "__origin__"):
|
|
201
|
+
value_origin = value_type.__origin__
|
|
202
|
+
if value_origin is dict:
|
|
203
|
+
return _decode_string_to_dict(encoded_value, value_type)
|
|
204
|
+
elif value_origin is list:
|
|
205
|
+
return _decode_string_to_list(encoded_value, value_type)
|
|
206
|
+
raise ValueError("Unknown")
|
|
157
207
|
|
|
158
208
|
|
|
159
209
|
def is_bool(param_type: Any) -> bool:
|
|
@@ -185,12 +235,23 @@ def decode_optional(param_type: Any) -> Any:
|
|
|
185
235
|
If ``param_type`` is type Optional[INNER_TYPE], method returns INNER_TYPE
|
|
186
236
|
Otherwise returns ``param_type``
|
|
187
237
|
"""
|
|
188
|
-
|
|
189
|
-
if
|
|
238
|
+
|
|
239
|
+
if not hasattr(param_type, "__origin__"):
|
|
240
|
+
if isinstance(param_type, UnionType):
|
|
241
|
+
# handle BinOp style Optional (e.g. `T | None`)
|
|
242
|
+
if len(param_type.__args__) == 2 and param_type.__args__[1] is type(None):
|
|
243
|
+
return param_type.__args__[0]
|
|
244
|
+
else:
|
|
245
|
+
return param_type
|
|
246
|
+
else:
|
|
247
|
+
return param_type
|
|
248
|
+
|
|
249
|
+
if param_type.__origin__ is not Union:
|
|
190
250
|
return param_type
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
251
|
+
|
|
252
|
+
args = param_type.__args__
|
|
253
|
+
if len(args) == 2 and args[1] is type(None):
|
|
254
|
+
return args[0]
|
|
194
255
|
else:
|
|
195
256
|
return param_type
|
|
196
257
|
|
torchx/version.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
1
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
2
|
# All rights reserved.
|
|
4
3
|
#
|
|
5
4
|
# This source code is licensed under the BSD-style license found in the
|
|
6
5
|
# LICENSE file in the root directory of this source tree.
|
|
7
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
9
|
+
from torchx._version import BASE_VERSION
|
|
8
10
|
from torchx.util.entrypoints import load
|
|
9
11
|
|
|
10
12
|
# Follows PEP-0440 version scheme guidelines
|
|
@@ -16,7 +18,7 @@ from torchx.util.entrypoints import load
|
|
|
16
18
|
# 0.1.0bN # Beta release
|
|
17
19
|
# 0.1.0rcN # Release Candidate
|
|
18
20
|
# 0.1.0 # Final release
|
|
19
|
-
__version__ =
|
|
21
|
+
__version__: str = BASE_VERSION
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
# Use the github container registry images corresponding to the current package
|
torchx/workspace/__init__.py
CHANGED
torchx/workspace/api.py
CHANGED
|
@@ -4,12 +4,20 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
7
9
|
import abc
|
|
8
10
|
import fnmatch
|
|
11
|
+
import logging
|
|
9
12
|
import posixpath
|
|
10
|
-
|
|
13
|
+
import tempfile
|
|
14
|
+
import warnings
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import Any, Dict, Generic, Iterable, Mapping, Tuple, TYPE_CHECKING, TypeVar
|
|
17
|
+
|
|
18
|
+
from torchx.specs import AppDef, CfgVal, Role, runopts, Workspace
|
|
11
19
|
|
|
12
|
-
|
|
20
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
|
13
21
|
|
|
14
22
|
if TYPE_CHECKING:
|
|
15
23
|
from fsspec import AbstractFileSystem
|
|
@@ -18,6 +26,58 @@ TORCHX_IGNORE = ".torchxignore"
|
|
|
18
26
|
|
|
19
27
|
T = TypeVar("T")
|
|
20
28
|
|
|
29
|
+
PackageType = TypeVar("PackageType")
|
|
30
|
+
WorkspaceConfigType = TypeVar("WorkspaceConfigType")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class PkgInfo(Generic[PackageType]):
|
|
35
|
+
"""
|
|
36
|
+
Convenience class used to specify information regarding the built workspace
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
img: str
|
|
40
|
+
lazy_overrides: Dict[str, Any]
|
|
41
|
+
metadata: PackageType
|
|
42
|
+
|
|
43
|
+
def __post_init__(self) -> None:
|
|
44
|
+
msg = (
|
|
45
|
+
f"{self.__class__.__name__} is deprecated and will be removed in the future."
|
|
46
|
+
" Consider forking this class if your project depends on it."
|
|
47
|
+
)
|
|
48
|
+
warnings.warn(
|
|
49
|
+
msg,
|
|
50
|
+
FutureWarning,
|
|
51
|
+
stacklevel=2,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class WorkspaceBuilder(Generic[PackageType, WorkspaceConfigType]):
|
|
57
|
+
cfg: WorkspaceConfigType
|
|
58
|
+
|
|
59
|
+
def __post_init__(self) -> None:
|
|
60
|
+
msg = (
|
|
61
|
+
f"{self.__class__.__name__} is deprecated and will be removed in the future."
|
|
62
|
+
" Consider forking this class if your project depends on it."
|
|
63
|
+
)
|
|
64
|
+
warnings.warn(
|
|
65
|
+
msg,
|
|
66
|
+
FutureWarning,
|
|
67
|
+
stacklevel=2,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
@abc.abstractmethod
|
|
71
|
+
def build_workspace(self, sync: bool = True) -> PkgInfo[PackageType]:
|
|
72
|
+
"""
|
|
73
|
+
Builds the specified ``workspace`` with respect to ``img``.
|
|
74
|
+
In the simplest case, this method builds a new image.
|
|
75
|
+
Certain (more efficient) implementations build
|
|
76
|
+
incremental diff patches that overlay on top of the role's image.
|
|
77
|
+
|
|
78
|
+
"""
|
|
79
|
+
pass
|
|
80
|
+
|
|
21
81
|
|
|
22
82
|
class WorkspaceMixin(abc.ABC, Generic[T]):
|
|
23
83
|
"""
|
|
@@ -44,11 +104,82 @@ class WorkspaceMixin(abc.ABC, Generic[T]):
|
|
|
44
104
|
"""
|
|
45
105
|
return runopts()
|
|
46
106
|
|
|
47
|
-
|
|
107
|
+
def build_workspaces(self, roles: list[Role], cfg: Mapping[str, CfgVal]) -> None:
|
|
108
|
+
"""
|
|
109
|
+
NOTE: this method MUTATES the passed roles!
|
|
110
|
+
|
|
111
|
+
Builds the workspaces (if any) for each role and updates the role to reflect the built workspace.
|
|
112
|
+
Typically ``role.image`` is updated with the newly built image that reflects the local workspace.
|
|
113
|
+
Some workspace implementations may add extra environment variables to make it easier for other
|
|
114
|
+
parts of the program to access the workspace. For example a ``WORKSPACE_DIR`` env var may be added
|
|
115
|
+
to ``role.env`` that scripts can use to refert to the workspace directory in the container.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
build_cache: dict[object, object] = {}
|
|
119
|
+
|
|
120
|
+
for i, role in enumerate(roles):
|
|
121
|
+
if role.workspace:
|
|
122
|
+
old_img = role.image
|
|
123
|
+
self.caching_build_workspace_and_update_role(role, cfg, build_cache)
|
|
124
|
+
|
|
125
|
+
if old_img != role.image:
|
|
126
|
+
logger.info(
|
|
127
|
+
"role[%d]=%s updated with new image to include workspace changes",
|
|
128
|
+
i,
|
|
129
|
+
role.name,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
def caching_build_workspace_and_update_role(
|
|
133
|
+
self,
|
|
134
|
+
role: Role,
|
|
135
|
+
cfg: Mapping[str, CfgVal],
|
|
136
|
+
build_cache: dict[object, object],
|
|
137
|
+
) -> None:
|
|
138
|
+
"""
|
|
139
|
+
Same as :py:meth:`build_workspace_and_update_role` but takes
|
|
140
|
+
a ``build_cache`` that can be used to cache pointers to build artifacts
|
|
141
|
+
between building workspace for each role.
|
|
142
|
+
|
|
143
|
+
This is useful when an appdef has multiple roles where the image and workspace
|
|
144
|
+
of the roles are the same but other attributes such as entrypoint or args are different.
|
|
145
|
+
|
|
146
|
+
NOTE: ``build_cache``'s lifetime is within :py:meth:`build_workspace_and_update_roles`
|
|
147
|
+
NOTE: the workspace implementation decides what to cache
|
|
148
|
+
|
|
149
|
+
Workspace subclasses should prefer implementing this method over
|
|
150
|
+
:py:meth:`build_workspace_and_update_role`.
|
|
151
|
+
|
|
152
|
+
The default implementation of this method simply calls the (deprecated) non-caching
|
|
153
|
+
:py:meth:`build_workspace_and_update_role` and deals with multi-dir workspaces by
|
|
154
|
+
merging them into a single tmpdir before passing it down.
|
|
155
|
+
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
workspace = role.workspace
|
|
159
|
+
|
|
160
|
+
if not workspace:
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
if workspace.is_unmapped_single_project():
|
|
164
|
+
# single-dir workspace with no target map; no need to copy to a tmp dir
|
|
165
|
+
self.build_workspace_and_update_role(role, str(workspace), cfg)
|
|
166
|
+
else:
|
|
167
|
+
# multi-dirs or single-dir with a target map;
|
|
168
|
+
# copy all dirs to a tmp dir and treat the tmp dir as a single-dir workspace
|
|
169
|
+
with tempfile.TemporaryDirectory(suffix="torchx_workspace_") as outdir:
|
|
170
|
+
workspace.merge_into(outdir)
|
|
171
|
+
self.build_workspace_and_update_role(role, outdir, cfg)
|
|
172
|
+
|
|
48
173
|
def build_workspace_and_update_role(
|
|
49
|
-
self,
|
|
174
|
+
self,
|
|
175
|
+
role: Role,
|
|
176
|
+
workspace: str,
|
|
177
|
+
cfg: Mapping[str, CfgVal],
|
|
50
178
|
) -> None:
|
|
51
179
|
"""
|
|
180
|
+
.. note:: DEPRECATED: Workspace subclasses should implement
|
|
181
|
+
:py:meth:`caching_build_workspace_and_update_role` over this method.
|
|
182
|
+
|
|
52
183
|
Builds the specified ``workspace`` with respect to ``img``
|
|
53
184
|
and updates the ``role`` to reflect the built workspace artifacts.
|
|
54
185
|
In the simplest case, this method builds a new image and updates
|
|
@@ -57,7 +188,7 @@ class WorkspaceMixin(abc.ABC, Generic[T]):
|
|
|
57
188
|
|
|
58
189
|
Note: this method mutates the passed ``role``.
|
|
59
190
|
"""
|
|
60
|
-
|
|
191
|
+
raise NotImplementedError("implement `caching_build_workspace_and_update_role`")
|
|
61
192
|
|
|
62
193
|
def dryrun_push_images(self, app: AppDef, cfg: Mapping[str, CfgVal]) -> T:
|
|
63
194
|
"""
|
|
@@ -100,7 +231,6 @@ def walk_workspace(
|
|
|
100
231
|
walk_workspace walks the filesystem path and applies the ignore rules
|
|
101
232
|
specified via ``ignore_name``.
|
|
102
233
|
This follows the rules for ``.dockerignore``.
|
|
103
|
-
https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
|
104
234
|
"""
|
|
105
235
|
ignore_patterns = []
|
|
106
236
|
ignore_path = posixpath.join(path, ignore_name)
|
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
# pyre-strict
|
|
8
|
+
|
|
7
9
|
import io
|
|
8
10
|
import logging
|
|
9
11
|
import posixpath
|
|
@@ -16,6 +18,7 @@ from typing import Dict, IO, Iterable, Mapping, Optional, TextIO, Tuple, TYPE_CH
|
|
|
16
18
|
import fsspec
|
|
17
19
|
|
|
18
20
|
import torchx
|
|
21
|
+
from docker.errors import BuildError
|
|
19
22
|
from torchx.specs import AppDef, CfgVal, Role, runopts
|
|
20
23
|
from torchx.workspace.api import walk_workspace, WorkspaceMixin
|
|
21
24
|
|
|
@@ -91,6 +94,12 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
|
|
|
91
94
|
type_=str,
|
|
92
95
|
help="(remote jobs) the image repository to use when pushing patched images, must have push access. Ex: example.com/your/container",
|
|
93
96
|
)
|
|
97
|
+
opts.add(
|
|
98
|
+
"quiet",
|
|
99
|
+
type_=bool,
|
|
100
|
+
default=False,
|
|
101
|
+
help="whether to suppress verbose output for image building. Defaults to ``False``.",
|
|
102
|
+
)
|
|
94
103
|
return opts
|
|
95
104
|
|
|
96
105
|
def build_workspace_and_update_role(
|
|
@@ -105,6 +114,10 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
|
|
|
105
114
|
workspace: a fsspec path to a directory with contents to be overlaid
|
|
106
115
|
"""
|
|
107
116
|
|
|
117
|
+
old_imgs = [
|
|
118
|
+
image.id
|
|
119
|
+
for image in self._docker_client.images.list(name=cfg["image_repo"])
|
|
120
|
+
]
|
|
108
121
|
context = _build_context(role.image, workspace)
|
|
109
122
|
|
|
110
123
|
try:
|
|
@@ -115,7 +128,7 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
|
|
|
115
128
|
f"failed to pull image {role.image}, falling back to local: {e}"
|
|
116
129
|
)
|
|
117
130
|
log.info("Building workspace docker image (this may take a while)...")
|
|
118
|
-
|
|
131
|
+
build_events = self._docker_client.api.build(
|
|
119
132
|
fileobj=context,
|
|
120
133
|
custom_context=True,
|
|
121
134
|
dockerfile=TORCHX_DOCKERFILE,
|
|
@@ -125,11 +138,26 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
|
|
|
125
138
|
},
|
|
126
139
|
pull=False,
|
|
127
140
|
rm=True,
|
|
141
|
+
decode=True,
|
|
128
142
|
labels={
|
|
129
143
|
self.LABEL_VERSION: torchx.__version__,
|
|
130
144
|
},
|
|
131
145
|
)
|
|
132
|
-
|
|
146
|
+
image_id = None
|
|
147
|
+
for event in build_events:
|
|
148
|
+
if message := event.get("stream"):
|
|
149
|
+
if not cfg.get("quiet", False):
|
|
150
|
+
message = message.strip("\r\n").strip("\n")
|
|
151
|
+
if message:
|
|
152
|
+
log.info(message)
|
|
153
|
+
if aux := event.get("aux"):
|
|
154
|
+
image_id = aux["ID"]
|
|
155
|
+
if error := event.get("error"):
|
|
156
|
+
raise BuildError(reason=error, build_log=None)
|
|
157
|
+
if len(old_imgs) == 0 or role.image not in old_imgs:
|
|
158
|
+
assert image_id, "image id was not found"
|
|
159
|
+
role.image = image_id
|
|
160
|
+
|
|
133
161
|
finally:
|
|
134
162
|
context.close()
|
|
135
163
|
|