torchx-nightly 2024.1.6__py3-none-any.whl → 2025.12.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchx-nightly might be problematic. Click here for more details.

Files changed (110) hide show
  1. torchx/__init__.py +2 -0
  2. torchx/{schedulers/ray/__init__.py → _version.py} +3 -1
  3. torchx/apps/serve/serve.py +2 -0
  4. torchx/apps/utils/booth_main.py +2 -0
  5. torchx/apps/utils/copy_main.py +2 -0
  6. torchx/apps/utils/process_monitor.py +2 -0
  7. torchx/cli/__init__.py +2 -0
  8. torchx/cli/argparse_util.py +38 -3
  9. torchx/cli/cmd_base.py +2 -0
  10. torchx/cli/cmd_cancel.py +2 -0
  11. torchx/cli/cmd_configure.py +2 -0
  12. torchx/cli/cmd_delete.py +30 -0
  13. torchx/cli/cmd_describe.py +2 -0
  14. torchx/cli/cmd_list.py +8 -4
  15. torchx/cli/cmd_log.py +6 -24
  16. torchx/cli/cmd_run.py +269 -45
  17. torchx/cli/cmd_runopts.py +2 -0
  18. torchx/cli/cmd_status.py +12 -1
  19. torchx/cli/cmd_tracker.py +3 -1
  20. torchx/cli/colors.py +2 -0
  21. torchx/cli/main.py +4 -0
  22. torchx/components/__init__.py +3 -8
  23. torchx/components/component_test_base.py +2 -0
  24. torchx/components/dist.py +18 -7
  25. torchx/components/integration_tests/component_provider.py +4 -2
  26. torchx/components/integration_tests/integ_tests.py +2 -0
  27. torchx/components/serve.py +2 -0
  28. torchx/components/structured_arg.py +4 -3
  29. torchx/components/utils.py +15 -4
  30. torchx/distributed/__init__.py +2 -4
  31. torchx/examples/apps/datapreproc/datapreproc.py +2 -0
  32. torchx/examples/apps/lightning/data.py +5 -3
  33. torchx/examples/apps/lightning/model.py +7 -6
  34. torchx/examples/apps/lightning/profiler.py +7 -4
  35. torchx/examples/apps/lightning/train.py +11 -2
  36. torchx/examples/torchx_out_of_sync_training.py +11 -0
  37. torchx/notebook.py +2 -0
  38. torchx/runner/__init__.py +2 -0
  39. torchx/runner/api.py +167 -60
  40. torchx/runner/config.py +43 -10
  41. torchx/runner/events/__init__.py +57 -13
  42. torchx/runner/events/api.py +14 -3
  43. torchx/runner/events/handlers.py +2 -0
  44. torchx/runtime/tracking/__init__.py +2 -0
  45. torchx/runtime/tracking/api.py +2 -0
  46. torchx/schedulers/__init__.py +16 -15
  47. torchx/schedulers/api.py +70 -14
  48. torchx/schedulers/aws_batch_scheduler.py +75 -6
  49. torchx/schedulers/aws_sagemaker_scheduler.py +598 -0
  50. torchx/schedulers/devices.py +17 -4
  51. torchx/schedulers/docker_scheduler.py +43 -11
  52. torchx/schedulers/ids.py +29 -23
  53. torchx/schedulers/kubernetes_mcad_scheduler.py +9 -7
  54. torchx/schedulers/kubernetes_scheduler.py +383 -38
  55. torchx/schedulers/local_scheduler.py +100 -27
  56. torchx/schedulers/lsf_scheduler.py +5 -4
  57. torchx/schedulers/slurm_scheduler.py +336 -20
  58. torchx/schedulers/streams.py +2 -0
  59. torchx/specs/__init__.py +89 -12
  60. torchx/specs/api.py +418 -30
  61. torchx/specs/builders.py +176 -38
  62. torchx/specs/file_linter.py +143 -57
  63. torchx/specs/finder.py +68 -28
  64. torchx/specs/named_resources_aws.py +181 -4
  65. torchx/specs/named_resources_generic.py +2 -0
  66. torchx/specs/overlays.py +106 -0
  67. torchx/specs/test/components/__init__.py +2 -0
  68. torchx/specs/test/components/a/__init__.py +2 -0
  69. torchx/specs/test/components/a/b/__init__.py +2 -0
  70. torchx/specs/test/components/a/b/c.py +2 -0
  71. torchx/specs/test/components/c/__init__.py +2 -0
  72. torchx/specs/test/components/c/d.py +2 -0
  73. torchx/tracker/__init__.py +12 -6
  74. torchx/tracker/api.py +15 -18
  75. torchx/tracker/backend/fsspec.py +2 -0
  76. torchx/util/cuda.py +2 -0
  77. torchx/util/datetime.py +2 -0
  78. torchx/util/entrypoints.py +39 -15
  79. torchx/util/io.py +2 -0
  80. torchx/util/log_tee_helpers.py +210 -0
  81. torchx/util/modules.py +65 -0
  82. torchx/util/session.py +42 -0
  83. torchx/util/shlex.py +2 -0
  84. torchx/util/strings.py +3 -1
  85. torchx/util/types.py +90 -29
  86. torchx/version.py +4 -2
  87. torchx/workspace/__init__.py +2 -0
  88. torchx/workspace/api.py +136 -6
  89. torchx/workspace/dir_workspace.py +2 -0
  90. torchx/workspace/docker_workspace.py +30 -2
  91. torchx_nightly-2025.12.24.dist-info/METADATA +167 -0
  92. torchx_nightly-2025.12.24.dist-info/RECORD +113 -0
  93. {torchx_nightly-2024.1.6.dist-info → torchx_nightly-2025.12.24.dist-info}/WHEEL +1 -1
  94. {torchx_nightly-2024.1.6.dist-info → torchx_nightly-2025.12.24.dist-info}/entry_points.txt +0 -1
  95. torchx/examples/pipelines/__init__.py +0 -0
  96. torchx/examples/pipelines/kfp/__init__.py +0 -0
  97. torchx/examples/pipelines/kfp/advanced_pipeline.py +0 -287
  98. torchx/examples/pipelines/kfp/dist_pipeline.py +0 -69
  99. torchx/examples/pipelines/kfp/intro_pipeline.py +0 -81
  100. torchx/pipelines/kfp/__init__.py +0 -28
  101. torchx/pipelines/kfp/adapter.py +0 -271
  102. torchx/pipelines/kfp/version.py +0 -17
  103. torchx/schedulers/gcp_batch_scheduler.py +0 -487
  104. torchx/schedulers/ray/ray_common.py +0 -22
  105. torchx/schedulers/ray/ray_driver.py +0 -307
  106. torchx/schedulers/ray_scheduler.py +0 -453
  107. torchx_nightly-2024.1.6.dist-info/METADATA +0 -176
  108. torchx_nightly-2024.1.6.dist-info/RECORD +0 -118
  109. {torchx_nightly-2024.1.6.dist-info → torchx_nightly-2025.12.24.dist-info/licenses}/LICENSE +0 -0
  110. {torchx_nightly-2024.1.6.dist-info → torchx_nightly-2025.12.24.dist-info}/top_level.txt +0 -0
torchx/util/types.py CHANGED
@@ -4,13 +4,15 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
- import inspect
8
- from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union
7
+ # pyre-strict
9
8
 
10
- import typing_inspect
9
+ import inspect
10
+ import re
11
+ from types import UnionType
12
+ from typing import Any, Callable, Optional, Tuple, TypeVar, Union
11
13
 
12
14
 
13
- def to_list(arg: str) -> List[str]:
15
+ def to_list(arg: str) -> list[str]:
14
16
  conf = []
15
17
  if len(arg.strip()) == 0:
16
18
  return []
@@ -19,9 +21,9 @@ def to_list(arg: str) -> List[str]:
19
21
  return conf
20
22
 
21
23
 
22
- def to_dict(arg: str) -> Dict[str, str]:
24
+ def to_dict(arg: str) -> dict[str, str]:
23
25
  """
24
- Parses the given ``arg`` string literal into a ``Dict[str, str]`` of
26
+ Parses the given ``arg`` string literal into a ``dict[str, str]`` of
25
27
  key-value pairs delimited by ``"="`` (equals). The values may be a
26
28
  list literal where the list elements are delimited by ``","`` (comma)
27
29
  or ``";"`` (semi-colon). The same delimiters (``","`` and ``";"``) are used
@@ -29,6 +31,9 @@ def to_dict(arg: str) -> Dict[str, str]:
29
31
  When values are lists, the last delimiter is used as kv-pair delimiter
30
32
  (e.g. ``FOO=v1,v2,BAR=v3``). Empty values of ``arg`` returns an empty map.
31
33
 
34
+ Values can be quoted with single or double quotes to include special characters
35
+ (``"="``, ``","``, ``";"``) without them being interpreted as separators.
36
+
32
37
  Note that values that encode list literals are returned as list literals
33
38
  NOT actual lists. The caller must further process each value in the returned
34
39
  map, to cast/decode the value literals as specific types. In this case,
@@ -43,6 +48,9 @@ def to_dict(arg: str) -> Dict[str, str]:
43
48
 
44
49
  to_dict("FOO=v1") == {"FOO": "v1"}
45
50
 
51
+ to_dict("FOO=''") == {"FOO": ""}
52
+ to_dict('FOO=""') == {"FOO": ""}
53
+
46
54
  to_dict("FOO=v1,v2") == {"FOO": "v1,v2"]}
47
55
  to_dict("FOO=v1;v2") == {"FOO": "v1;v2"]}
48
56
  to_dict("FOO=v1;v2") == {"FOO": "v1;v2,"]}
@@ -52,6 +60,7 @@ def to_dict(arg: str) -> Dict[str, str]:
52
60
  to_dict("FOO=v1;v2,BAR=v3") == {"FOO": "v1;v2", "BAR": "v3"}
53
61
  to_dict("FOO=v1;v2;BAR=v3") == {"FOO": "v1;v2", "BAR": "v3"}
54
62
 
63
+ to_dict('FOO="value with = and , and ;"') == {"FOO": "value with = and , and ;"}
55
64
  """
56
65
 
57
66
  def parse_val_key(vk: str) -> Tuple[str, str]:
@@ -68,17 +77,35 @@ def to_dict(arg: str) -> Dict[str, str]:
68
77
  else:
69
78
  return vk[0:idx].strip(), vk[idx + 1 :].strip()
70
79
 
71
- arg_map: Dict[str, str] = {}
80
+ def to_val(val: str) -> str:
81
+ if (val.startswith("'") and val.endswith("'")) or (
82
+ val.startswith('"') and val.endswith('"')
83
+ ):
84
+ return val[1:-1]
85
+ return val if val != '""' and val != "''" else ""
86
+
87
+ arg_map: dict[str, str] = {}
72
88
 
73
89
  if not arg:
74
90
  return arg_map
75
91
 
92
+ # find quoted values
93
+ quoted_pattern = r'([\'"])((?:\\.|(?!\1).)*?)\1'
94
+ quoted_values: list[str] = []
95
+
96
+ def replace_quoted(match):
97
+ quoted_values.append(match.group(0))
98
+ return f"__QUOTED_{len(quoted_values) - 1}__"
99
+
100
+ # replace quoted values with placeholders
101
+ processed_arg = re.sub(quoted_pattern, replace_quoted, arg)
102
+
76
103
  # split cfgs
77
104
  cfg_kv_delim = "="
78
105
 
79
106
  # ["FOO", "v1;v2,BAR", v3, "BAZ", "v4,v5"]
80
107
  split_arg = [
81
- s.strip() for s in arg.split(cfg_kv_delim) if s.strip()
108
+ s.strip() for s in processed_arg.split(cfg_kv_delim) if s.strip()
82
109
  ] # remove empty
83
110
  split_arg_len = len(split_arg)
84
111
 
@@ -90,18 +117,28 @@ def to_dict(arg: str) -> Dict[str, str]:
90
117
  # middle elements are value_{n}<delim>key_{n+1}
91
118
  for vk in split_arg[1 : split_arg_len - 1]: # python deals with
92
119
  val, key_next = parse_val_key(vk)
93
- arg_map[key] = val
120
+ for i, quoted in enumerate(quoted_values):
121
+ val = val.replace(f"__QUOTED_{i}__", quoted)
122
+ arg_map[key] = to_val(val)
94
123
  key = key_next
124
+
95
125
  val = split_arg[-1] # last element is always a value
96
- arg_map[key] = val
126
+ for i, quoted in enumerate(quoted_values):
127
+ val = val.replace(f"__QUOTED_{i}__", quoted)
128
+ arg_map[key] = to_val(val)
129
+
97
130
  return arg_map
98
131
 
99
132
 
100
133
  # pyre-ignore-all-errors[3, 2]
101
134
  def _decode_string_to_dict(
102
- encoded_value: str, param_type: Type[Dict[Any, Any]]
103
- ) -> Dict[Any, Any]:
104
- key_type, value_type = typing_inspect.get_args(param_type)
135
+ encoded_value: str, param_type: type[dict[Any, Any]]
136
+ ) -> dict[Any, Any]:
137
+ # pyre-ignore[16]
138
+ if not hasattr(param_type, "__args__") or len(param_type.__args__) != 2:
139
+ raise ValueError(f"param_type must be a `dict` type, but was `{param_type}`")
140
+
141
+ key_type, value_type = param_type.__args__
105
142
  arg_values = {}
106
143
  for key, value in to_dict(encoded_value).items():
107
144
  arg_values[key_type(key)] = value_type(value)
@@ -109,9 +146,12 @@ def _decode_string_to_dict(
109
146
 
110
147
 
111
148
  def _decode_string_to_list(
112
- encoded_value: str, param_type: Type[List[Any]]
113
- ) -> List[Any]:
114
- value_type = typing_inspect.get_args(param_type)[0]
149
+ encoded_value: str, param_type: type[list[Any]]
150
+ ) -> list[Any]:
151
+ # pyre-ignore[16]
152
+ if not hasattr(param_type, "__args__") or len(param_type.__args__) != 1:
153
+ raise ValueError(f"param_type must be a `list` type, but was `{param_type}`")
154
+ value_type = param_type.__args__[0]
115
155
  if not is_primitive(value_type):
116
156
  raise ValueError("List types support only primitives: int, str, float")
117
157
  arg_values = []
@@ -120,9 +160,19 @@ def _decode_string_to_list(
120
160
  return arg_values
121
161
 
122
162
 
163
+ def decode(encoded_value: Any, annotation: Any):
164
+ if encoded_value is None:
165
+ return None
166
+ if is_bool(annotation):
167
+ return encoded_value and encoded_value.lower() == "true"
168
+ if not is_primitive(annotation) and type(encoded_value) == str:
169
+ return decode_from_string(encoded_value, annotation)
170
+ return encoded_value
171
+
172
+
123
173
  def decode_from_string(
124
174
  encoded_value: str, annotation: Any
125
- ) -> Union[Dict[Any, Any], List[Any], None]:
175
+ ) -> Union[dict[Any, Any], list[Any], None]:
126
176
  """Decodes string representation to the underlying type(Dict or List)
127
177
 
128
178
  Given a string representation of the value, the method decodes it according
@@ -147,13 +197,13 @@ def decode_from_string(
147
197
  if not encoded_value:
148
198
  return None
149
199
  value_type = annotation
150
- value_origin = typing_inspect.get_origin(value_type)
151
- if value_origin is dict:
152
- return _decode_string_to_dict(encoded_value, value_type)
153
- elif value_origin is list:
154
- return _decode_string_to_list(encoded_value, value_type)
155
- else:
156
- raise ValueError("Unknown")
200
+ if hasattr(value_type, "__origin__"):
201
+ value_origin = value_type.__origin__
202
+ if value_origin is dict:
203
+ return _decode_string_to_dict(encoded_value, value_type)
204
+ elif value_origin is list:
205
+ return _decode_string_to_list(encoded_value, value_type)
206
+ raise ValueError("Unknown")
157
207
 
158
208
 
159
209
  def is_bool(param_type: Any) -> bool:
@@ -185,12 +235,23 @@ def decode_optional(param_type: Any) -> Any:
185
235
  If ``param_type`` is type Optional[INNER_TYPE], method returns INNER_TYPE
186
236
  Otherwise returns ``param_type``
187
237
  """
188
- param_origin = typing_inspect.get_origin(param_type)
189
- if param_origin is not Union:
238
+
239
+ if not hasattr(param_type, "__origin__"):
240
+ if isinstance(param_type, UnionType):
241
+ # handle BinOp style Optional (e.g. `T | None`)
242
+ if len(param_type.__args__) == 2 and param_type.__args__[1] is type(None):
243
+ return param_type.__args__[0]
244
+ else:
245
+ return param_type
246
+ else:
247
+ return param_type
248
+
249
+ if param_type.__origin__ is not Union:
190
250
  return param_type
191
- key_type, value_type = typing_inspect.get_args(param_type)
192
- if value_type is type(None):
193
- return key_type
251
+
252
+ args = param_type.__args__
253
+ if len(args) == 2 and args[1] is type(None):
254
+ return args[0]
194
255
  else:
195
256
  return param_type
196
257
 
torchx/version.py CHANGED
@@ -1,10 +1,12 @@
1
- #!/usr/bin/env python3
2
1
  # Copyright (c) Meta Platforms, Inc. and affiliates.
3
2
  # All rights reserved.
4
3
  #
5
4
  # This source code is licensed under the BSD-style license found in the
6
5
  # LICENSE file in the root directory of this source tree.
7
6
 
7
+ # pyre-strict
8
+
9
+ from torchx._version import BASE_VERSION
8
10
  from torchx.util.entrypoints import load
9
11
 
10
12
  # Follows PEP-0440 version scheme guidelines
@@ -16,7 +18,7 @@ from torchx.util.entrypoints import load
16
18
  # 0.1.0bN # Beta release
17
19
  # 0.1.0rcN # Release Candidate
18
20
  # 0.1.0 # Final release
19
- __version__ = "0.7.0dev0"
21
+ __version__: str = BASE_VERSION
20
22
 
21
23
 
22
24
  # Use the github container registry images corresponding to the current package
@@ -4,6 +4,8 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
+ # pyre-strict
8
+
7
9
  """
8
10
  Status: Beta
9
11
 
torchx/workspace/api.py CHANGED
@@ -4,12 +4,20 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
+ # pyre-strict
8
+
7
9
  import abc
8
10
  import fnmatch
11
+ import logging
9
12
  import posixpath
10
- from typing import Generic, Iterable, Mapping, Tuple, TYPE_CHECKING, TypeVar
13
+ import tempfile
14
+ import warnings
15
+ from dataclasses import dataclass
16
+ from typing import Any, Dict, Generic, Iterable, Mapping, Tuple, TYPE_CHECKING, TypeVar
17
+
18
+ from torchx.specs import AppDef, CfgVal, Role, runopts, Workspace
11
19
 
12
- from torchx.specs import AppDef, CfgVal, Role, runopts
20
+ logger: logging.Logger = logging.getLogger(__name__)
13
21
 
14
22
  if TYPE_CHECKING:
15
23
  from fsspec import AbstractFileSystem
@@ -18,6 +26,58 @@ TORCHX_IGNORE = ".torchxignore"
18
26
 
19
27
  T = TypeVar("T")
20
28
 
29
+ PackageType = TypeVar("PackageType")
30
+ WorkspaceConfigType = TypeVar("WorkspaceConfigType")
31
+
32
+
33
+ @dataclass
34
+ class PkgInfo(Generic[PackageType]):
35
+ """
36
+ Convenience class used to specify information regarding the built workspace
37
+ """
38
+
39
+ img: str
40
+ lazy_overrides: Dict[str, Any]
41
+ metadata: PackageType
42
+
43
+ def __post_init__(self) -> None:
44
+ msg = (
45
+ f"{self.__class__.__name__} is deprecated and will be removed in the future."
46
+ " Consider forking this class if your project depends on it."
47
+ )
48
+ warnings.warn(
49
+ msg,
50
+ FutureWarning,
51
+ stacklevel=2,
52
+ )
53
+
54
+
55
+ @dataclass
56
+ class WorkspaceBuilder(Generic[PackageType, WorkspaceConfigType]):
57
+ cfg: WorkspaceConfigType
58
+
59
+ def __post_init__(self) -> None:
60
+ msg = (
61
+ f"{self.__class__.__name__} is deprecated and will be removed in the future."
62
+ " Consider forking this class if your project depends on it."
63
+ )
64
+ warnings.warn(
65
+ msg,
66
+ FutureWarning,
67
+ stacklevel=2,
68
+ )
69
+
70
+ @abc.abstractmethod
71
+ def build_workspace(self, sync: bool = True) -> PkgInfo[PackageType]:
72
+ """
73
+ Builds the specified ``workspace`` with respect to ``img``.
74
+ In the simplest case, this method builds a new image.
75
+ Certain (more efficient) implementations build
76
+ incremental diff patches that overlay on top of the role's image.
77
+
78
+ """
79
+ pass
80
+
21
81
 
22
82
  class WorkspaceMixin(abc.ABC, Generic[T]):
23
83
  """
@@ -44,11 +104,82 @@ class WorkspaceMixin(abc.ABC, Generic[T]):
44
104
  """
45
105
  return runopts()
46
106
 
47
- @abc.abstractmethod
107
+ def build_workspaces(self, roles: list[Role], cfg: Mapping[str, CfgVal]) -> None:
108
+ """
109
+ NOTE: this method MUTATES the passed roles!
110
+
111
+ Builds the workspaces (if any) for each role and updates the role to reflect the built workspace.
112
+ Typically ``role.image`` is updated with the newly built image that reflects the local workspace.
113
+ Some workspace implementations may add extra environment variables to make it easier for other
114
+ parts of the program to access the workspace. For example a ``WORKSPACE_DIR`` env var may be added
115
+ to ``role.env`` that scripts can use to refert to the workspace directory in the container.
116
+ """
117
+
118
+ build_cache: dict[object, object] = {}
119
+
120
+ for i, role in enumerate(roles):
121
+ if role.workspace:
122
+ old_img = role.image
123
+ self.caching_build_workspace_and_update_role(role, cfg, build_cache)
124
+
125
+ if old_img != role.image:
126
+ logger.info(
127
+ "role[%d]=%s updated with new image to include workspace changes",
128
+ i,
129
+ role.name,
130
+ )
131
+
132
+ def caching_build_workspace_and_update_role(
133
+ self,
134
+ role: Role,
135
+ cfg: Mapping[str, CfgVal],
136
+ build_cache: dict[object, object],
137
+ ) -> None:
138
+ """
139
+ Same as :py:meth:`build_workspace_and_update_role` but takes
140
+ a ``build_cache`` that can be used to cache pointers to build artifacts
141
+ between building workspace for each role.
142
+
143
+ This is useful when an appdef has multiple roles where the image and workspace
144
+ of the roles are the same but other attributes such as entrypoint or args are different.
145
+
146
+ NOTE: ``build_cache``'s lifetime is within :py:meth:`build_workspace_and_update_roles`
147
+ NOTE: the workspace implementation decides what to cache
148
+
149
+ Workspace subclasses should prefer implementing this method over
150
+ :py:meth:`build_workspace_and_update_role`.
151
+
152
+ The default implementation of this method simply calls the (deprecated) non-caching
153
+ :py:meth:`build_workspace_and_update_role` and deals with multi-dir workspaces by
154
+ merging them into a single tmpdir before passing it down.
155
+
156
+ """
157
+
158
+ workspace = role.workspace
159
+
160
+ if not workspace:
161
+ return
162
+
163
+ if workspace.is_unmapped_single_project():
164
+ # single-dir workspace with no target map; no need to copy to a tmp dir
165
+ self.build_workspace_and_update_role(role, str(workspace), cfg)
166
+ else:
167
+ # multi-dirs or single-dir with a target map;
168
+ # copy all dirs to a tmp dir and treat the tmp dir as a single-dir workspace
169
+ with tempfile.TemporaryDirectory(suffix="torchx_workspace_") as outdir:
170
+ workspace.merge_into(outdir)
171
+ self.build_workspace_and_update_role(role, outdir, cfg)
172
+
48
173
  def build_workspace_and_update_role(
49
- self, role: Role, workspace: str, cfg: Mapping[str, CfgVal]
174
+ self,
175
+ role: Role,
176
+ workspace: str,
177
+ cfg: Mapping[str, CfgVal],
50
178
  ) -> None:
51
179
  """
180
+ .. note:: DEPRECATED: Workspace subclasses should implement
181
+ :py:meth:`caching_build_workspace_and_update_role` over this method.
182
+
52
183
  Builds the specified ``workspace`` with respect to ``img``
53
184
  and updates the ``role`` to reflect the built workspace artifacts.
54
185
  In the simplest case, this method builds a new image and updates
@@ -57,7 +188,7 @@ class WorkspaceMixin(abc.ABC, Generic[T]):
57
188
 
58
189
  Note: this method mutates the passed ``role``.
59
190
  """
60
- ...
191
+ raise NotImplementedError("implement `caching_build_workspace_and_update_role`")
61
192
 
62
193
  def dryrun_push_images(self, app: AppDef, cfg: Mapping[str, CfgVal]) -> T:
63
194
  """
@@ -100,7 +231,6 @@ def walk_workspace(
100
231
  walk_workspace walks the filesystem path and applies the ignore rules
101
232
  specified via ``ignore_name``.
102
233
  This follows the rules for ``.dockerignore``.
103
- https://docs.docker.com/engine/reference/builder/#dockerignore-file
104
234
  """
105
235
  ignore_patterns = []
106
236
  ignore_path = posixpath.join(path, ignore_name)
@@ -5,6 +5,8 @@
5
5
  # This source code is licensed under the BSD-style license found in the
6
6
  # LICENSE file in the root directory of this source tree.
7
7
 
8
+ # pyre-strict
9
+
8
10
  import os
9
11
  import posixpath
10
12
  import shutil
@@ -4,6 +4,8 @@
4
4
  # This source code is licensed under the BSD-style license found in the
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
+ # pyre-strict
8
+
7
9
  import io
8
10
  import logging
9
11
  import posixpath
@@ -16,6 +18,7 @@ from typing import Dict, IO, Iterable, Mapping, Optional, TextIO, Tuple, TYPE_CH
16
18
  import fsspec
17
19
 
18
20
  import torchx
21
+ from docker.errors import BuildError
19
22
  from torchx.specs import AppDef, CfgVal, Role, runopts
20
23
  from torchx.workspace.api import walk_workspace, WorkspaceMixin
21
24
 
@@ -91,6 +94,12 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
91
94
  type_=str,
92
95
  help="(remote jobs) the image repository to use when pushing patched images, must have push access. Ex: example.com/your/container",
93
96
  )
97
+ opts.add(
98
+ "quiet",
99
+ type_=bool,
100
+ default=False,
101
+ help="whether to suppress verbose output for image building. Defaults to ``False``.",
102
+ )
94
103
  return opts
95
104
 
96
105
  def build_workspace_and_update_role(
@@ -105,6 +114,10 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
105
114
  workspace: a fsspec path to a directory with contents to be overlaid
106
115
  """
107
116
 
117
+ old_imgs = [
118
+ image.id
119
+ for image in self._docker_client.images.list(name=cfg["image_repo"])
120
+ ]
108
121
  context = _build_context(role.image, workspace)
109
122
 
110
123
  try:
@@ -115,7 +128,7 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
115
128
  f"failed to pull image {role.image}, falling back to local: {e}"
116
129
  )
117
130
  log.info("Building workspace docker image (this may take a while)...")
118
- image, _ = self._docker_client.images.build(
131
+ build_events = self._docker_client.api.build(
119
132
  fileobj=context,
120
133
  custom_context=True,
121
134
  dockerfile=TORCHX_DOCKERFILE,
@@ -125,11 +138,26 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
125
138
  },
126
139
  pull=False,
127
140
  rm=True,
141
+ decode=True,
128
142
  labels={
129
143
  self.LABEL_VERSION: torchx.__version__,
130
144
  },
131
145
  )
132
- role.image = image.id
146
+ image_id = None
147
+ for event in build_events:
148
+ if message := event.get("stream"):
149
+ if not cfg.get("quiet", False):
150
+ message = message.strip("\r\n").strip("\n")
151
+ if message:
152
+ log.info(message)
153
+ if aux := event.get("aux"):
154
+ image_id = aux["ID"]
155
+ if error := event.get("error"):
156
+ raise BuildError(reason=error, build_log=None)
157
+ if len(old_imgs) == 0 or role.image not in old_imgs:
158
+ assert image_id, "image id was not found"
159
+ role.image = image_id
160
+
133
161
  finally:
134
162
  context.close()
135
163
 
@@ -0,0 +1,167 @@
1
+ Metadata-Version: 2.4
2
+ Name: torchx-nightly
3
+ Version: 2025.12.24
4
+ Summary: TorchX SDK and Components
5
+ Home-page: https://github.com/meta-pytorch/torchx
6
+ Author: TorchX Devs
7
+ Author-email: torchx@fb.com
8
+ License: BSD-3
9
+ Keywords: pytorch,machine learning
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: BSD License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Requires-Python: >=3.7
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: docstring-parser>=0.8.1
21
+ Requires-Dist: pyyaml
22
+ Requires-Dist: docker
23
+ Requires-Dist: filelock
24
+ Requires-Dist: fsspec>=2023.10.0
25
+ Requires-Dist: tabulate
26
+ Provides-Extra: aws-batch
27
+ Requires-Dist: boto3; extra == "aws-batch"
28
+ Provides-Extra: kubernetes
29
+ Requires-Dist: kubernetes>=11; extra == "kubernetes"
30
+ Provides-Extra: dev
31
+ Requires-Dist: aiobotocore==2.20.0; extra == "dev"
32
+ Requires-Dist: ax-platform[mysql]==0.2.3; extra == "dev"
33
+ Requires-Dist: boto3==1.36.0; extra == "dev"
34
+ Requires-Dist: captum>=0.4.0; extra == "dev"
35
+ Requires-Dist: docker; extra == "dev"
36
+ Requires-Dist: kubernetes==25.3.0; extra == "dev"
37
+ Requires-Dist: flake8==3.9.0; extra == "dev"
38
+ Requires-Dist: fsspec==2024.3.1; extra == "dev"
39
+ Requires-Dist: s3fs==2024.3.1; extra == "dev"
40
+ Requires-Dist: hydra-core; extra == "dev"
41
+ Requires-Dist: ipython; extra == "dev"
42
+ Requires-Dist: mlflow-skinny; extra == "dev"
43
+ Requires-Dist: moto~=5.0.8; extra == "dev"
44
+ Requires-Dist: pyre-extensions; extra == "dev"
45
+ Requires-Dist: pyre-check; extra == "dev"
46
+ Requires-Dist: pytest; extra == "dev"
47
+ Requires-Dist: pytest-cov; extra == "dev"
48
+ Requires-Dist: pytorch-lightning==2.5.0; extra == "dev"
49
+ Requires-Dist: tensorboard==2.14.0; extra == "dev"
50
+ Requires-Dist: sagemaker==2.237.3; extra == "dev"
51
+ Requires-Dist: torch-model-archiver>=0.4.2; extra == "dev"
52
+ Requires-Dist: torch; extra == "dev"
53
+ Requires-Dist: torchmetrics==1.6.3; extra == "dev"
54
+ Requires-Dist: torchserve>=0.10.0; extra == "dev"
55
+ Requires-Dist: torchtext; extra == "dev"
56
+ Requires-Dist: torchvision; extra == "dev"
57
+ Requires-Dist: typing-extensions; extra == "dev"
58
+ Requires-Dist: ts==0.5.1; extra == "dev"
59
+ Requires-Dist: wheel; extra == "dev"
60
+ Requires-Dist: lintrunner; extra == "dev"
61
+ Requires-Dist: lintrunner-adapters; extra == "dev"
62
+ Dynamic: author
63
+ Dynamic: author-email
64
+ Dynamic: classifier
65
+ Dynamic: description
66
+ Dynamic: description-content-type
67
+ Dynamic: home-page
68
+ Dynamic: keywords
69
+ Dynamic: license
70
+ Dynamic: license-file
71
+ Dynamic: provides-extra
72
+ Dynamic: requires-dist
73
+ Dynamic: requires-python
74
+ Dynamic: summary
75
+
76
+ [![PyPI](https://img.shields.io/pypi/v/torchx)](https://pypi.org/project/torchx/)
77
+ [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://github.com/meta-pytorch/torchx/blob/main/LICENSE)
78
+ ![Tests](https://github.com/meta-pytorch/torchx/actions/workflows/python-unittests.yaml/badge.svg)
79
+ ![Lint](https://github.com/meta-pytorch/torchx/actions/workflows/lint.yaml/badge.svg)
80
+ [![codecov](https://codecov.io/gh/pytorch/torchx/branch/main/graph/badge.svg?token=ceHHIm0hXy)](https://codecov.io/gh/pytorch/torchx)
81
+
82
+
83
+ # TorchX
84
+
85
+
86
+ TorchX is a universal job launcher for PyTorch applications.
87
+ TorchX is designed to have fast iteration time for training/research and support
88
+ for E2E production ML pipelines when you're ready.
89
+
90
+ TorchX currently supports:
91
+
92
+ * Kubernetes (EKS, GKE, AKS, etc)
93
+ * Slurm
94
+ * AWS Batch
95
+ * Docker
96
+ * Local
97
+
98
+ Need a scheduler not listed? [Let us know!](https://github.com/meta-pytorch/torchx/issues?q=is%3Aopen+is%3Aissue+label%3Ascheduler-request)
99
+
100
+ ## Quickstart
101
+
102
+ See the [quickstart guide](https://meta-pytorch.org/torchx/latest/quickstart.html).
103
+
104
+ ## Documentation
105
+
106
+ * [Stable Documentation](https://meta-pytorch.org/torchx/latest/)
107
+ * [Nightly Documentation](https://meta-pytorch.org/torchx/main/)
108
+
109
+ ## Requirements
110
+
111
+ torchx:
112
+
113
+ * python3 (3.8+)
114
+ * [PyTorch](https://pytorch.org/get-started/locally/)
115
+ * optional: [Docker](https://docs.docker.com/get-docker/) (needed for docker based schedulers)
116
+
117
+ Certain schedulers may require scheduler specific requirements. See installation
118
+ for info.
119
+
120
+ ## Installation
121
+
122
+ ### Stable
123
+
124
+ ```bash
125
+ # install torchx sdk and CLI -- minimum dependencies
126
+ pip install torchx
127
+
128
+ # install torchx sdk and CLI -- all dependencies
129
+ pip install "torchx[dev]"
130
+
131
+ # install torchx Kubernetes / Volcano support
132
+ pip install "torchx[kubernetes]"
133
+
134
+ # install torchx GCP Batch support
135
+ pip install "torchx[gcp_batch]"
136
+ ```
137
+
138
+ ### Nightly
139
+
140
+ ```bash
141
+ # install torchx sdk and CLI
142
+ pip install torchx-nightly[dev]
143
+ ```
144
+
145
+ ### Source
146
+
147
+ ```bash
148
+ # install torchx sdk and CLI from source
149
+ $ pip install -e git+https://github.com/meta-pytorch/torchx.git#egg=torchx
150
+
151
+ # install extra dependencies
152
+ $ pip install -e git+https://github.com/meta-pytorch/torchx.git#egg=torchx[dev]
153
+ ```
154
+
155
+ ### Docker
156
+
157
+ TorchX provides a docker container for using as as part of a TorchX role.
158
+
159
+ See: https://github.com/meta-pytorch/torchx/pkgs/container/torchx
160
+
161
+ ## Contributing
162
+
163
+ We welcome PRs! See the [CONTRIBUTING](https://github.com/meta-pytorch/torchx/blob/main/CONTRIBUTING.md) file.
164
+
165
+ ## License
166
+
167
+ TorchX is BSD licensed, as found in the [LICENSE](https://github.com/meta-pytorch/torchx/blob/main/LICENSE) file.