PyPI - torchx-nightly - Versions diffs - 2023.10.21__py3-none-any.whl → 2025.12.24__py3-none-any.whl - Mend

torchx-nightly 2023.10.21py3-none-any.whl → 2025.12.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchx-nightly might be problematic. Click here for more details.

Files changed (110) hide show

torchx/__init__.py +2 -0
torchx/{schedulers/ray/__init__.py → _version.py} +3 -1
torchx/apps/serve/serve.py +2 -0
torchx/apps/utils/booth_main.py +2 -0
torchx/apps/utils/copy_main.py +2 -0
torchx/apps/utils/process_monitor.py +2 -0
torchx/cli/__init__.py +2 -0
torchx/cli/argparse_util.py +38 -3
torchx/cli/cmd_base.py +2 -0
torchx/cli/cmd_cancel.py +2 -0
torchx/cli/cmd_configure.py +2 -0
torchx/cli/cmd_delete.py +30 -0
torchx/cli/cmd_describe.py +2 -0
torchx/cli/cmd_list.py +8 -4
torchx/cli/cmd_log.py +6 -24
torchx/cli/cmd_run.py +269 -45
torchx/cli/cmd_runopts.py +2 -0
torchx/cli/cmd_status.py +12 -1
torchx/cli/cmd_tracker.py +3 -1
torchx/cli/colors.py +2 -0
torchx/cli/main.py +4 -0
torchx/components/__init__.py +3 -8
torchx/components/component_test_base.py +2 -0
torchx/components/dist.py +18 -7
torchx/components/integration_tests/component_provider.py +4 -2
torchx/components/integration_tests/integ_tests.py +2 -0
torchx/components/serve.py +2 -0
torchx/components/structured_arg.py +7 -6
torchx/components/utils.py +15 -4
torchx/distributed/__init__.py +2 -4
torchx/examples/apps/datapreproc/datapreproc.py +2 -0
torchx/examples/apps/lightning/data.py +5 -3
torchx/examples/apps/lightning/model.py +7 -6
torchx/examples/apps/lightning/profiler.py +7 -4
torchx/examples/apps/lightning/train.py +11 -2
torchx/examples/torchx_out_of_sync_training.py +11 -0
torchx/notebook.py +2 -0
torchx/runner/__init__.py +2 -0
torchx/runner/api.py +167 -60
torchx/runner/config.py +43 -10
torchx/runner/events/__init__.py +57 -13
torchx/runner/events/api.py +14 -3
torchx/runner/events/handlers.py +2 -0
torchx/runtime/tracking/__init__.py +2 -0
torchx/runtime/tracking/api.py +2 -0
torchx/schedulers/__init__.py +16 -15
torchx/schedulers/api.py +70 -14
torchx/schedulers/aws_batch_scheduler.py +79 -5
torchx/schedulers/aws_sagemaker_scheduler.py +598 -0
torchx/schedulers/devices.py +17 -4
torchx/schedulers/docker_scheduler.py +43 -11
torchx/schedulers/ids.py +29 -23
torchx/schedulers/kubernetes_mcad_scheduler.py +10 -8
torchx/schedulers/kubernetes_scheduler.py +383 -38
torchx/schedulers/local_scheduler.py +100 -27
torchx/schedulers/lsf_scheduler.py +5 -4
torchx/schedulers/slurm_scheduler.py +336 -20
torchx/schedulers/streams.py +2 -0
torchx/specs/__init__.py +89 -12
torchx/specs/api.py +431 -32
torchx/specs/builders.py +176 -38
torchx/specs/file_linter.py +143 -57
torchx/specs/finder.py +68 -28
torchx/specs/named_resources_aws.py +254 -22
torchx/specs/named_resources_generic.py +2 -0
torchx/specs/overlays.py +106 -0
torchx/specs/test/components/__init__.py +2 -0
torchx/specs/test/components/a/__init__.py +2 -0
torchx/specs/test/components/a/b/__init__.py +2 -0
torchx/specs/test/components/a/b/c.py +2 -0
torchx/specs/test/components/c/__init__.py +2 -0
torchx/specs/test/components/c/d.py +2 -0
torchx/tracker/__init__.py +12 -6
torchx/tracker/api.py +15 -18
torchx/tracker/backend/fsspec.py +2 -0
torchx/util/cuda.py +2 -0
torchx/util/datetime.py +2 -0
torchx/util/entrypoints.py +39 -15
torchx/util/io.py +2 -0
torchx/util/log_tee_helpers.py +210 -0
torchx/util/modules.py +65 -0
torchx/util/session.py +42 -0
torchx/util/shlex.py +2 -0
torchx/util/strings.py +3 -1
torchx/util/types.py +90 -29
torchx/version.py +4 -2
torchx/workspace/__init__.py +2 -0
torchx/workspace/api.py +136 -6
torchx/workspace/dir_workspace.py +2 -0
torchx/workspace/docker_workspace.py +30 -2
torchx_nightly-2025.12.24.dist-info/METADATA +167 -0
torchx_nightly-2025.12.24.dist-info/RECORD +113 -0
{torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/WHEEL +1 -1
{torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/entry_points.txt +0 -1
torchx/examples/pipelines/__init__.py +0 -0
torchx/examples/pipelines/kfp/__init__.py +0 -0
torchx/examples/pipelines/kfp/advanced_pipeline.py +0 -287
torchx/examples/pipelines/kfp/dist_pipeline.py +0 -69
torchx/examples/pipelines/kfp/intro_pipeline.py +0 -81
torchx/pipelines/kfp/__init__.py +0 -28
torchx/pipelines/kfp/adapter.py +0 -271
torchx/pipelines/kfp/version.py +0 -17
torchx/schedulers/gcp_batch_scheduler.py +0 -487
torchx/schedulers/ray/ray_common.py +0 -22
torchx/schedulers/ray/ray_driver.py +0 -307
torchx/schedulers/ray_scheduler.py +0 -453
torchx_nightly-2023.10.21.dist-info/METADATA +0 -174
torchx_nightly-2023.10.21.dist-info/RECORD +0 -118
{torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info/licenses}/LICENSE +0 -0
{torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/top_level.txt +0 -0

torchx/util/modules.py ADDED Viewed

@@ -0,0 +1,65 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-strict
+import importlib
+from types import ModuleType
+from typing import Callable, Optional, TypeVar, Union
+def load_module(path: str) -> Union[ModuleType, Optional[Callable[..., object]]]:
+    """
+    Loads and returns the module/module attr represented by the ``path``: ``full.module.path:optional_attr``
+    1. ``load_module("this.is.a_module:fn")`` -> equivalent to ``this.is.a_module.fn``
+    1. ``load_module("this.is.a_module")`` -> equivalent to ``this.is.a_module``
+    """
+    parts = path.split(":", 2)
+    module_path, method = parts[0], parts[1] if len(parts) > 1 else None
+    module = None
+    i, n = -1, len(module_path)
+    try:
+        while i < n:
+            i = module_path.find(".", i + 1)
+            i = i if i >= 0 else n
+            module = importlib.import_module(module_path[:i])
+        return getattr(module, method) if method else module
+    except Exception:
+        return None
+T = TypeVar("T")
+def import_attr(name: str, attr: str, default: T) -> T:
+    """
+    Imports ``name.attr`` and returns it if the module is found.
+    Otherwise, returns the specified ``default``.
+    Useful when getting an attribute from an optional dependency.
+    Note that the ``default`` parameter is intentionally not an optional
+    since this function is intended to be used with modules that may not be
+    installed as a dependency. Therefore the caller must ALWAYS provide a
+    sensible default.
+    Usage:
+    .. code-block:: python
+        aws_resources = import_attr("torchx.specs.named_resources_aws", "NAMED_RESOURCES", default={})
+        all_resources.update(aws_resources)
+    Raises:
+        AttributeError: If the module exists (e.g. can be imported)
+            but does not have an attribute with name ``attr``.
+    """
+    try:
+        mod = importlib.import_module(name)
+    except ModuleNotFoundError:
+        return default
+    else:
+        return getattr(mod, attr)

torchx/util/session.py ADDED Viewed

@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-strict
+import os
+import uuid
+from typing import Optional
+TORCHX_INTERNAL_SESSION_ID = "TORCHX_INTERNAL_SESSION_ID"
+CURRENT_SESSION_ID: Optional[str] = None
+def get_session_id_or_create_new() -> str:
+    """
+    Returns the current session ID, or creates a new one if none exists.
+    The session ID remains the same as long as it is in the same process.
+    Please DO NOT use this function out of torchx codebase.
+    """
+    global CURRENT_SESSION_ID
+    if CURRENT_SESSION_ID:
+        return CURRENT_SESSION_ID
+    env_session_id = os.getenv(TORCHX_INTERNAL_SESSION_ID)
+    if env_session_id:
+        CURRENT_SESSION_ID = env_session_id
+        return CURRENT_SESSION_ID
+    session_id = str(uuid.uuid4())
+    CURRENT_SESSION_ID = session_id
+    return session_id
+def get_torchx_session_id() -> Optional[str]:
+    """
+    Returns the torchx session ID.
+    Please use this function to get the session ID out of torchx codebase.
+    """
+    return CURRENT_SESSION_ID

torchx/util/shlex.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 import shlex
 from typing import Iterable

torchx/util/strings.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 import re
@@ -11,7 +13,7 @@ def normalize_str(data: str) -> str:
     """
     Invokes ``lower`` on thes string and removes all
     characters that do not satisfy ``[a-z0-9\\-]`` pattern.
-    This method is mostly used to make sure kubernetes and gcp_batch scheduler gets
+    This method is mostly used to make sure kubernetes scheduler gets
     the job name that does not violate its restrictions.
     """
     if data.startswith("-"):

torchx/util/types.py CHANGED Viewed

@@ -4,13 +4,15 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-import inspect
-from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union
+# pyre-strict
-import typing_inspect
+import inspect
+import re
+from types import UnionType
+from typing import Any, Callable, Optional, Tuple, TypeVar, Union
-def to_list(arg: str) -> List[str]:
+def to_list(arg: str) -> list[str]:
     conf = []
     if len(arg.strip()) == 0:
         return []
@@ -19,9 +21,9 @@ def to_list(arg: str) -> List[str]:
     return conf
-def to_dict(arg: str) -> Dict[str, str]:
+def to_dict(arg: str) -> dict[str, str]:
     """
-    Parses the given ``arg`` string literal into a ``Dict[str, str]`` of
+    Parses the given ``arg`` string literal into a ``dict[str, str]`` of
     key-value pairs delimited by ``"="`` (equals). The values may be a
     list literal where the list elements are delimited by ``","`` (comma)
     or ``";"`` (semi-colon). The same delimiters (``","`` and ``";"``) are used
@@ -29,6 +31,9 @@ def to_dict(arg: str) -> Dict[str, str]:
     When values are lists, the last delimiter is used as kv-pair delimiter
     (e.g. ``FOO=v1,v2,BAR=v3``). Empty values of ``arg`` returns an empty map.
+    Values can be quoted with single or double quotes to include special characters
+    (``"="``, ``","``, ``";"``) without them being interpreted as separators.
     Note that values that encode list literals are returned as list literals
     NOT actual lists. The caller must further process each value in the returned
     map, to cast/decode the value literals as specific types. In this case,
@@ -43,6 +48,9 @@ def to_dict(arg: str) -> Dict[str, str]:
      to_dict("FOO=v1") == {"FOO": "v1"}
+     to_dict("FOO=''") == {"FOO": ""}
+     to_dict('FOO=""') == {"FOO": ""}
      to_dict("FOO=v1,v2") == {"FOO": "v1,v2"]}
      to_dict("FOO=v1;v2") == {"FOO": "v1;v2"]}
      to_dict("FOO=v1;v2") == {"FOO": "v1;v2,"]}
@@ -52,6 +60,7 @@ def to_dict(arg: str) -> Dict[str, str]:
      to_dict("FOO=v1;v2,BAR=v3") == {"FOO": "v1;v2", "BAR": "v3"}
      to_dict("FOO=v1;v2;BAR=v3") == {"FOO": "v1;v2", "BAR": "v3"}
+     to_dict('FOO="value with = and , and ;"') == {"FOO": "value with = and , and ;"}
     """
     def parse_val_key(vk: str) -> Tuple[str, str]:
@@ -68,17 +77,35 @@ def to_dict(arg: str) -> Dict[str, str]:
         else:
             return vk[0:idx].strip(), vk[idx + 1 :].strip()
-    arg_map: Dict[str, str] = {}
+    def to_val(val: str) -> str:
+        if (val.startswith("'") and val.endswith("'")) or (
+            val.startswith('"') and val.endswith('"')
+        ):
+            return val[1:-1]
+        return val if val != '""' and val != "''" else ""
+    arg_map: dict[str, str] = {}
     if not arg:
         return arg_map
+    # find quoted values
+    quoted_pattern = r'([\'"])((?:\\.|(?!\1).)*?)\1'
+    quoted_values: list[str] = []
+    def replace_quoted(match):
+        quoted_values.append(match.group(0))
+        return f"__QUOTED_{len(quoted_values) - 1}__"
+    # replace quoted values with placeholders
+    processed_arg = re.sub(quoted_pattern, replace_quoted, arg)
     # split cfgs
     cfg_kv_delim = "="
     # ["FOO", "v1;v2,BAR", v3, "BAZ", "v4,v5"]
     split_arg = [
-        s.strip() for s in arg.split(cfg_kv_delim) if s.strip()
+        s.strip() for s in processed_arg.split(cfg_kv_delim) if s.strip()
     ]  # remove empty
     split_arg_len = len(split_arg)
@@ -90,18 +117,28 @@ def to_dict(arg: str) -> Dict[str, str]:
     # middle elements are value_{n}<delim>key_{n+1}
     for vk in split_arg[1 : split_arg_len - 1]:  # python deals with
         val, key_next = parse_val_key(vk)
-        arg_map[key] = val
+        for i, quoted in enumerate(quoted_values):
+            val = val.replace(f"__QUOTED_{i}__", quoted)
+        arg_map[key] = to_val(val)
         key = key_next
     val = split_arg[-1]  # last element is always a value
-    arg_map[key] = val
+    for i, quoted in enumerate(quoted_values):
+        val = val.replace(f"__QUOTED_{i}__", quoted)
+    arg_map[key] = to_val(val)
     return arg_map
 # pyre-ignore-all-errors[3, 2]
 def _decode_string_to_dict(
-    encoded_value: str, param_type: Type[Dict[Any, Any]]
-) -> Dict[Any, Any]:
-    key_type, value_type = typing_inspect.get_args(param_type)
+    encoded_value: str, param_type: type[dict[Any, Any]]
+) -> dict[Any, Any]:
+    # pyre-ignore[16]
+    if not hasattr(param_type, "__args__") or len(param_type.__args__) != 2:
+        raise ValueError(f"param_type must be a `dict` type, but was `{param_type}`")
+    key_type, value_type = param_type.__args__
     arg_values = {}
     for key, value in to_dict(encoded_value).items():
         arg_values[key_type(key)] = value_type(value)
@@ -109,9 +146,12 @@ def _decode_string_to_dict(
 def _decode_string_to_list(
-    encoded_value: str, param_type: Type[List[Any]]
-) -> List[Any]:
-    value_type = typing_inspect.get_args(param_type)[0]
+    encoded_value: str, param_type: type[list[Any]]
+) -> list[Any]:
+    # pyre-ignore[16]
+    if not hasattr(param_type, "__args__") or len(param_type.__args__) != 1:
+        raise ValueError(f"param_type must be a `list` type, but was `{param_type}`")
+    value_type = param_type.__args__[0]
     if not is_primitive(value_type):
         raise ValueError("List types support only primitives: int, str, float")
     arg_values = []
@@ -120,9 +160,19 @@ def _decode_string_to_list(
     return arg_values
+def decode(encoded_value: Any, annotation: Any):
+    if encoded_value is None:
+        return None
+    if is_bool(annotation):
+        return encoded_value and encoded_value.lower() == "true"
+    if not is_primitive(annotation) and type(encoded_value) == str:
+        return decode_from_string(encoded_value, annotation)
+    return encoded_value
 def decode_from_string(
     encoded_value: str, annotation: Any
-) -> Union[Dict[Any, Any], List[Any], None]:
+) -> Union[dict[Any, Any], list[Any], None]:
     """Decodes string representation to the underlying type(Dict or List)
     Given a string representation of the value, the method decodes it according
@@ -147,13 +197,13 @@ def decode_from_string(
     if not encoded_value:
         return None
     value_type = annotation
-    value_origin = typing_inspect.get_origin(value_type)
-    if value_origin is dict:
-        return _decode_string_to_dict(encoded_value, value_type)
-    elif value_origin is list:
-        return _decode_string_to_list(encoded_value, value_type)
-    else:
-        raise ValueError("Unknown")
+    if hasattr(value_type, "__origin__"):
+        value_origin = value_type.__origin__
+        if value_origin is dict:
+            return _decode_string_to_dict(encoded_value, value_type)
+        elif value_origin is list:
+            return _decode_string_to_list(encoded_value, value_type)
+    raise ValueError("Unknown")
 def is_bool(param_type: Any) -> bool:
@@ -185,12 +235,23 @@ def decode_optional(param_type: Any) -> Any:
         If ``param_type`` is type Optional[INNER_TYPE], method returns INNER_TYPE
         Otherwise returns ``param_type``
     """
-    param_origin = typing_inspect.get_origin(param_type)
-    if param_origin is not Union:
+    if not hasattr(param_type, "__origin__"):
+        if isinstance(param_type, UnionType):
+            # handle BinOp style Optional (e.g. `T | None`)
+            if len(param_type.__args__) == 2 and param_type.__args__[1] is type(None):
+                return param_type.__args__[0]
+            else:
+                return param_type
+        else:
+            return param_type
+    if param_type.__origin__ is not Union:
         return param_type
-    key_type, value_type = typing_inspect.get_args(param_type)
-    if value_type is type(None):
-        return key_type
+    args = param_type.__args__
+    if len(args) == 2 and args[1] is type(None):
+        return args[0]
     else:
         return param_type

torchx/version.py CHANGED Viewed

@@ -1,10 +1,12 @@
-#!/usr/bin/env python3
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
+from torchx._version import BASE_VERSION
 from torchx.util.entrypoints import load
 # Follows PEP-0440 version scheme guidelines
@@ -16,7 +18,7 @@ from torchx.util.entrypoints import load
 # 0.1.0bN  # Beta release
 # 0.1.0rcN  # Release Candidate
 # 0.1.0  # Final release
-__version__ = "0.7.0dev0"
+__version__: str = BASE_VERSION
 # Use the github container registry images corresponding to the current package

torchx/workspace/__init__.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 """
 Status: Beta

torchx/workspace/api.py CHANGED Viewed

@@ -4,12 +4,20 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 import abc
 import fnmatch
+import logging
 import posixpath
-from typing import Generic, Iterable, Mapping, Tuple, TYPE_CHECKING, TypeVar
+import tempfile
+import warnings
+from dataclasses import dataclass
+from typing import Any, Dict, Generic, Iterable, Mapping, Tuple, TYPE_CHECKING, TypeVar
+from torchx.specs import AppDef, CfgVal, Role, runopts, Workspace
-from torchx.specs import AppDef, CfgVal, Role, runopts
+logger: logging.Logger = logging.getLogger(__name__)
 if TYPE_CHECKING:
     from fsspec import AbstractFileSystem
@@ -18,6 +26,58 @@ TORCHX_IGNORE = ".torchxignore"
 T = TypeVar("T")
+PackageType = TypeVar("PackageType")
+WorkspaceConfigType = TypeVar("WorkspaceConfigType")
+@dataclass
+class PkgInfo(Generic[PackageType]):
+    """
+    Convenience class used to specify information regarding the built workspace
+    """
+    img: str
+    lazy_overrides: Dict[str, Any]
+    metadata: PackageType
+    def __post_init__(self) -> None:
+        msg = (
+            f"{self.__class__.__name__} is deprecated and will be removed in the future."
+            " Consider forking this class if your project depends on it."
+        )
+        warnings.warn(
+            msg,
+            FutureWarning,
+            stacklevel=2,
+        )
+@dataclass
+class WorkspaceBuilder(Generic[PackageType, WorkspaceConfigType]):
+    cfg: WorkspaceConfigType
+    def __post_init__(self) -> None:
+        msg = (
+            f"{self.__class__.__name__} is deprecated and will be removed in the future."
+            " Consider forking this class if your project depends on it."
+        )
+        warnings.warn(
+            msg,
+            FutureWarning,
+            stacklevel=2,
+        )
+    @abc.abstractmethod
+    def build_workspace(self, sync: bool = True) -> PkgInfo[PackageType]:
+        """
+        Builds the specified ``workspace`` with respect to ``img``.
+        In the simplest case, this method builds a new image.
+        Certain (more efficient) implementations build
+        incremental diff patches that overlay on top of the role's image.
+        """
+        pass
 class WorkspaceMixin(abc.ABC, Generic[T]):
     """
@@ -44,11 +104,82 @@ class WorkspaceMixin(abc.ABC, Generic[T]):
         """
         return runopts()
-    @abc.abstractmethod
+    def build_workspaces(self, roles: list[Role], cfg: Mapping[str, CfgVal]) -> None:
+        """
+        NOTE: this method MUTATES the passed roles!
+        Builds the workspaces (if any) for each role and updates the role to reflect the built workspace.
+        Typically ``role.image`` is updated with the newly built image that reflects the local workspace.
+        Some workspace implementations may add extra environment variables to make it easier for other
+        parts of the program to access the workspace. For example a ``WORKSPACE_DIR`` env var may be added
+        to ``role.env`` that scripts can use to refert to the workspace directory in the container.
+        """
+        build_cache: dict[object, object] = {}
+        for i, role in enumerate(roles):
+            if role.workspace:
+                old_img = role.image
+                self.caching_build_workspace_and_update_role(role, cfg, build_cache)
+                if old_img != role.image:
+                    logger.info(
+                        "role[%d]=%s updated with new image to include workspace changes",
+                        i,
+                        role.name,
+                    )
+    def caching_build_workspace_and_update_role(
+        self,
+        role: Role,
+        cfg: Mapping[str, CfgVal],
+        build_cache: dict[object, object],
+    ) -> None:
+        """
+        Same as :py:meth:`build_workspace_and_update_role` but takes
+        a ``build_cache`` that can be used to cache pointers to build artifacts
+        between building workspace for each role.
+        This is useful when an appdef has multiple roles where the image and workspace
+        of the roles are the same but other attributes such as entrypoint or args are different.
+        NOTE: ``build_cache``'s lifetime is within :py:meth:`build_workspace_and_update_roles`
+        NOTE: the workspace implementation decides what to cache
+        Workspace subclasses should prefer implementing this method over
+        :py:meth:`build_workspace_and_update_role`.
+        The default implementation of this method simply calls the (deprecated) non-caching
+        :py:meth:`build_workspace_and_update_role` and deals with multi-dir workspaces by
+        merging them into a single tmpdir before passing it down.
+        """
+        workspace = role.workspace
+        if not workspace:
+            return
+        if workspace.is_unmapped_single_project():
+            # single-dir workspace with no target map; no need to copy to a tmp dir
+            self.build_workspace_and_update_role(role, str(workspace), cfg)
+        else:
+            # multi-dirs or single-dir with a target map;
+            # copy all dirs to a tmp dir and treat the tmp dir as a single-dir workspace
+            with tempfile.TemporaryDirectory(suffix="torchx_workspace_") as outdir:
+                workspace.merge_into(outdir)
+                self.build_workspace_and_update_role(role, outdir, cfg)
     def build_workspace_and_update_role(
-        self, role: Role, workspace: str, cfg: Mapping[str, CfgVal]
+        self,
+        role: Role,
+        workspace: str,
+        cfg: Mapping[str, CfgVal],
     ) -> None:
         """
+        .. note:: DEPRECATED: Workspace subclasses should implement
+                  :py:meth:`caching_build_workspace_and_update_role` over this method.
         Builds the specified ``workspace`` with respect to ``img``
         and updates the ``role`` to reflect the built workspace artifacts.
         In the simplest case, this method builds a new image and updates
@@ -57,7 +188,7 @@ class WorkspaceMixin(abc.ABC, Generic[T]):
         Note: this method mutates the passed ``role``.
         """
-        ...
+        raise NotImplementedError("implement `caching_build_workspace_and_update_role`")
     def dryrun_push_images(self, app: AppDef, cfg: Mapping[str, CfgVal]) -> T:
         """
@@ -100,7 +231,6 @@ def walk_workspace(
     walk_workspace walks the filesystem path and applies the ignore rules
     specified via ``ignore_name``.
     This follows the rules for ``.dockerignore``.
-    https://docs.docker.com/engine/reference/builder/#dockerignore-file
     """
     ignore_patterns = []
     ignore_path = posixpath.join(path, ignore_name)

torchx/workspace/dir_workspace.py CHANGED Viewed

@@ -5,6 +5,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 import os
 import posixpath
 import shutil

torchx/workspace/docker_workspace.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 import io
 import logging
 import posixpath
@@ -16,6 +18,7 @@ from typing import Dict, IO, Iterable, Mapping, Optional, TextIO, Tuple, TYPE_CH
 import fsspec
 import torchx
+from docker.errors import BuildError
 from torchx.specs import AppDef, CfgVal, Role, runopts
 from torchx.workspace.api import walk_workspace, WorkspaceMixin
@@ -91,6 +94,12 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
             type_=str,
             help="(remote jobs) the image repository to use when pushing patched images, must have push access. Ex: example.com/your/container",
         )
+        opts.add(
+            "quiet",
+            type_=bool,
+            default=False,
+            help="whether to suppress verbose output for image building. Defaults to ``False``.",
+        )
         return opts
     def build_workspace_and_update_role(
@@ -105,6 +114,10 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
             workspace: a fsspec path to a directory with contents to be overlaid
         """
+        old_imgs = [
+            image.id
+            for image in self._docker_client.images.list(name=cfg["image_repo"])
+        ]
         context = _build_context(role.image, workspace)
         try:
@@ -115,7 +128,7 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
                     f"failed to pull image {role.image}, falling back to local: {e}"
                 )
             log.info("Building workspace docker image (this may take a while)...")
-            image, _ = self._docker_client.images.build(
+            build_events = self._docker_client.api.build(
                 fileobj=context,
                 custom_context=True,
                 dockerfile=TORCHX_DOCKERFILE,
@@ -125,11 +138,26 @@ class DockerWorkspaceMixin(WorkspaceMixin[Dict[str, Tuple[str, str]]]):
                 },
                 pull=False,
                 rm=True,
+                decode=True,
                 labels={
                     self.LABEL_VERSION: torchx.__version__,
                 },
             )
-            role.image = image.id
+            image_id = None
+            for event in build_events:
+                if message := event.get("stream"):
+                    if not cfg.get("quiet", False):
+                        message = message.strip("\r\n").strip("\n")
+                        if message:
+                            log.info(message)
+                if aux := event.get("aux"):
+                    image_id = aux["ID"]
+                if error := event.get("error"):
+                    raise BuildError(reason=error, build_log=None)
+            if len(old_imgs) == 0 or role.image not in old_imgs:
+                assert image_id, "image id was not found"
+                role.image = image_id
         finally:
             context.close()

torchx-nightly 2023.10.21__py3-none-any.whl → 2025.12.24__py3-none-any.whl

Potentially problematic release.

torchx-nightly 2023.10.21py3-none-any.whl → 2025.12.24py3-none-any.whl