PyPI - torchx-nightly - Versions diffs - 2023.10.21__py3-none-any.whl → 2025.12.24__py3-none-any.whl - Mend

torchx-nightly 2023.10.21py3-none-any.whl → 2025.12.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchx-nightly might be problematic. Click here for more details.

Files changed (110) hide show

torchx/__init__.py +2 -0
torchx/{schedulers/ray/__init__.py → _version.py} +3 -1
torchx/apps/serve/serve.py +2 -0
torchx/apps/utils/booth_main.py +2 -0
torchx/apps/utils/copy_main.py +2 -0
torchx/apps/utils/process_monitor.py +2 -0
torchx/cli/__init__.py +2 -0
torchx/cli/argparse_util.py +38 -3
torchx/cli/cmd_base.py +2 -0
torchx/cli/cmd_cancel.py +2 -0
torchx/cli/cmd_configure.py +2 -0
torchx/cli/cmd_delete.py +30 -0
torchx/cli/cmd_describe.py +2 -0
torchx/cli/cmd_list.py +8 -4
torchx/cli/cmd_log.py +6 -24
torchx/cli/cmd_run.py +269 -45
torchx/cli/cmd_runopts.py +2 -0
torchx/cli/cmd_status.py +12 -1
torchx/cli/cmd_tracker.py +3 -1
torchx/cli/colors.py +2 -0
torchx/cli/main.py +4 -0
torchx/components/__init__.py +3 -8
torchx/components/component_test_base.py +2 -0
torchx/components/dist.py +18 -7
torchx/components/integration_tests/component_provider.py +4 -2
torchx/components/integration_tests/integ_tests.py +2 -0
torchx/components/serve.py +2 -0
torchx/components/structured_arg.py +7 -6
torchx/components/utils.py +15 -4
torchx/distributed/__init__.py +2 -4
torchx/examples/apps/datapreproc/datapreproc.py +2 -0
torchx/examples/apps/lightning/data.py +5 -3
torchx/examples/apps/lightning/model.py +7 -6
torchx/examples/apps/lightning/profiler.py +7 -4
torchx/examples/apps/lightning/train.py +11 -2
torchx/examples/torchx_out_of_sync_training.py +11 -0
torchx/notebook.py +2 -0
torchx/runner/__init__.py +2 -0
torchx/runner/api.py +167 -60
torchx/runner/config.py +43 -10
torchx/runner/events/__init__.py +57 -13
torchx/runner/events/api.py +14 -3
torchx/runner/events/handlers.py +2 -0
torchx/runtime/tracking/__init__.py +2 -0
torchx/runtime/tracking/api.py +2 -0
torchx/schedulers/__init__.py +16 -15
torchx/schedulers/api.py +70 -14
torchx/schedulers/aws_batch_scheduler.py +79 -5
torchx/schedulers/aws_sagemaker_scheduler.py +598 -0
torchx/schedulers/devices.py +17 -4
torchx/schedulers/docker_scheduler.py +43 -11
torchx/schedulers/ids.py +29 -23
torchx/schedulers/kubernetes_mcad_scheduler.py +10 -8
torchx/schedulers/kubernetes_scheduler.py +383 -38
torchx/schedulers/local_scheduler.py +100 -27
torchx/schedulers/lsf_scheduler.py +5 -4
torchx/schedulers/slurm_scheduler.py +336 -20
torchx/schedulers/streams.py +2 -0
torchx/specs/__init__.py +89 -12
torchx/specs/api.py +431 -32
torchx/specs/builders.py +176 -38
torchx/specs/file_linter.py +143 -57
torchx/specs/finder.py +68 -28
torchx/specs/named_resources_aws.py +254 -22
torchx/specs/named_resources_generic.py +2 -0
torchx/specs/overlays.py +106 -0
torchx/specs/test/components/__init__.py +2 -0
torchx/specs/test/components/a/__init__.py +2 -0
torchx/specs/test/components/a/b/__init__.py +2 -0
torchx/specs/test/components/a/b/c.py +2 -0
torchx/specs/test/components/c/__init__.py +2 -0
torchx/specs/test/components/c/d.py +2 -0
torchx/tracker/__init__.py +12 -6
torchx/tracker/api.py +15 -18
torchx/tracker/backend/fsspec.py +2 -0
torchx/util/cuda.py +2 -0
torchx/util/datetime.py +2 -0
torchx/util/entrypoints.py +39 -15
torchx/util/io.py +2 -0
torchx/util/log_tee_helpers.py +210 -0
torchx/util/modules.py +65 -0
torchx/util/session.py +42 -0
torchx/util/shlex.py +2 -0
torchx/util/strings.py +3 -1
torchx/util/types.py +90 -29
torchx/version.py +4 -2
torchx/workspace/__init__.py +2 -0
torchx/workspace/api.py +136 -6
torchx/workspace/dir_workspace.py +2 -0
torchx/workspace/docker_workspace.py +30 -2
torchx_nightly-2025.12.24.dist-info/METADATA +167 -0
torchx_nightly-2025.12.24.dist-info/RECORD +113 -0
{torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/WHEEL +1 -1
{torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/entry_points.txt +0 -1
torchx/examples/pipelines/__init__.py +0 -0
torchx/examples/pipelines/kfp/__init__.py +0 -0
torchx/examples/pipelines/kfp/advanced_pipeline.py +0 -287
torchx/examples/pipelines/kfp/dist_pipeline.py +0 -69
torchx/examples/pipelines/kfp/intro_pipeline.py +0 -81
torchx/pipelines/kfp/__init__.py +0 -28
torchx/pipelines/kfp/adapter.py +0 -271
torchx/pipelines/kfp/version.py +0 -17
torchx/schedulers/gcp_batch_scheduler.py +0 -487
torchx/schedulers/ray/ray_common.py +0 -22
torchx/schedulers/ray/ray_driver.py +0 -307
torchx/schedulers/ray_scheduler.py +0 -453
torchx_nightly-2023.10.21.dist-info/METADATA +0 -174
torchx_nightly-2023.10.21.dist-info/RECORD +0 -118
{torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info/licenses}/LICENSE +0 -0
{torchx_nightly-2023.10.21.dist-info → torchx_nightly-2025.12.24.dist-info}/top_level.txt +0 -0

torchx/specs/overlays.py ADDED Viewed

@@ -0,0 +1,106 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-strict
+"""
+Overlays are JSON structs applied to :py:class:`~torchx.specs.AppDef` and :py:class:`~torchx.specs.Role`
+to specify attributes of the scheduler's submit-job request that are not currently representable
+as attributes of :py:class:`~torchx.specs.AppDef` and :py:class:`~torchx.specs.Role`.
+For end-uses, here are a few use-cases of overlays:
+1. A new version of the scheduler has concepts/features that have not yet been added to TorchX.
+2. A bespoke internal scheduler has custom features that do not generalize hence not in TorchX.
+3. Re-using a pre-built ``AppDef`` but need to make a small change to the resulting scheduler request.
+And for scheduler authors:
+1. Scheduler setting needs to be applied to a ``Role``, which makes it hard to add as ``runopts``
+   since ``runopts`` apply at the ``AppDef`` level.
+2. Scheduler setting cannot be represented naturally as the types supported by ``runopts``.
+3. Exposing the setting as a ``runopts`` obfuscates things.
+See :py:func:`~torchx.specs.overlays.apply_overlay` for rules on how overlays are applied.
+"""
+from typing import Any
+Json = dict[str, Any]
+def apply_overlay(base: Json, overlay: Json) -> None:
+    """Applies ``overlay`` on ``base``.
+    .. note:: this function mutates the ``base``!
+    Overlays follow these rules:
+    1. Dicts, upsert key, value in base with the ones in overlay.
+    2. Nested dicts, overlay recursively.
+    3. Lists, append the overlay values to the base values.
+    4. Nested lists DO NOT append recursively.
+    5. Primitives (bool, str, int, float), replace base with the value in overlay.
+    .. doctest::
+        from torchx.specs.overlays import apply_overlay
+        base = {
+            "scheduler": {"policy": "default"},
+            "resources": {"limits": {"cpu": "500m"}},
+            "tolerations": [{"key": "gpu"}],
+            "nodeSelectorTerms": [
+                [{"matchExpressions": []}]
+            ],
+            "maxPods": 110,
+        }
+        overlay = {
+            "scheduler": {"policy": "binpacking"},
+            "resources": {"limits": {"memory": "1Gi"}},
+            "tolerations": [{"key": "spot"}],
+            "nodeSelectorTerms": [
+                [{"matchExpressions": [{"key": "disk"}]}]
+            ],
+            "maxPods": 250,
+        }
+        apply_overlay(base, overlay)
+        assert {
+            "scheduler": {"policy": "binpacking"},
+            "resources": {"limits": {"cpu": "500m", "memory": "1Gi"}},
+            "tolerations": [{"key": "gpu"}, {"key": "spot"}],
+            "nodeSelectorTerms": [
+                [{"matchExpressions": []}],
+                [{"matchExpressions": [{"key": "disk"}]}],
+            ],
+            "maxPods": 250,
+        } == base
+    """
+    def assert_type_equal(key: str, o1: object, o2: object) -> None:
+        o1_type = type(o1)
+        o2_type = type(o2)
+        assert (
+            o1_type == o2_type
+        ), f"Type mismatch for attr: `{key}`. {o1_type.__qualname__} != {o2_type.__qualname__}"
+    for key, overlay_value in overlay.items():
+        if key in base:
+            base_value = base[key]
+            assert_type_equal(key, base_value, overlay_value)
+            if isinstance(base_value, dict) and isinstance(overlay_value, dict):
+                apply_overlay(base_value, overlay_value)
+            elif isinstance(base_value, list) and isinstance(overlay_value, list):
+                base_value.extend(overlay_value)
+            else:
+                base[key] = overlay_value
+        else:
+            base[key] = overlay_value

torchx/specs/test/components/__init__.py CHANGED Viewed

@@ -3,3 +3,5 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict

torchx/specs/test/components/a/__init__.py CHANGED Viewed

@@ -3,6 +3,8 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 import torchx
 from torchx import specs

torchx/specs/test/components/a/b/__init__.py CHANGED Viewed

@@ -3,3 +3,5 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict

torchx/specs/test/components/a/b/c.py CHANGED Viewed

@@ -3,6 +3,8 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 import torchx
 from torchx import specs

torchx/specs/test/components/c/__init__.py CHANGED Viewed

@@ -4,3 +4,5 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict

torchx/specs/test/components/c/d.py CHANGED Viewed

@@ -3,6 +3,8 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 import torchx
 from torchx import specs

torchx/tracker/__init__.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 """
 .. note:: PROTOTYPE, USE AT YOUR OWN RISK, APIs SUBJECT TO CHANGE
@@ -30,14 +32,14 @@ implementation.
 Example usage
 -------------
-Sample `code <https://github.com/pytorch/torchx/blob/main/torchx/examples/apps/tracker/main.py>`__ using tracker API.
+Sample `code <https://github.com/meta-pytorch/torchx/blob/main/torchx/examples/apps/tracker/main.py>`__ using tracker API.
 Tracker Setup
 -------------
 To enable tracking it requires:
-1. Defining tracker backends (entrypoints and configuration) on launcher side using :doc:`runner.config`
+1. Defining tracker backends (entrypoints/modules and configuration) on launcher side using :doc:`runner.config`
 2. Adding entrypoints within a user job using entry_points (`specification`_)
 .. _specification: https://packaging.python.org/en/latest/specifications/entry-points/
@@ -49,13 +51,13 @@ To enable tracking it requires:
 User can define any number of tracker backends under **torchx:tracker** section in :doc:`runner.config`, where:
    * Key: is an arbitrary name for the tracker, where the name will be used to configure its properties
         under [tracker:<TRACKER_NAME>]
-   * Value: is *entrypoint/factory method* that must be available within user job. The value will be injected into a
+   * Value: is *entrypoint* or *module* factory method that must be available within user job. The value will be injected into a
         user job and used to construct tracker implementation.
 .. code-block:: ini
     [torchx:tracker]
-    tracker_name=<entry_point>
+    tracker_name=<entry_point_or_module_factory_method>
 Each tracker can be additionally configured (currently limited to `config` parameter) under `[tracker:<TRACKER NAME>]` section:
@@ -71,11 +73,15 @@ For example, ~/.torchxconfig may be setup as:
     [torchx:tracker]
     tracker1=tracker1
-    tracker12=backend_2_entry_point
+    tracker2=backend_2_entry_point
+    tracker3=torchx.tracker.mlflow:create_tracker
     [tracker:tracker1]
     config=s3://my_bucket/config.json
+    [tracker:tracker3]
+    config=my_config.json
 2. User job configuration (Advanced)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -105,7 +111,7 @@ Use :py:meth:`~torchx.tracker.app_run_from_env`:
 Reference :py:class:`~torchx.tracker.api.TrackerBase` implementation
 --------------------------------------------------------------------
 :py:class:`~torchx.tracker.backend.fsspec.FsspecTracker` provides reference implementation of a tracker backend.
-GitHub example `directory <https://github.com/pytorch/torchx/blob/main/torchx/examples/apps/tracker/>`__ provides example on how to
+GitHub example `directory <https://github.com/meta-pytorch/torchx/blob/main/torchx/examples/apps/tracker/>`__ provides example on how to
 configure and use it in user application.

torchx/tracker/api.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 from __future__ import annotations
 import logging
@@ -14,6 +16,7 @@ from functools import lru_cache
 from typing import Iterable, Mapping, Optional
 from torchx.util.entrypoints import load_group
+from torchx.util.modules import load_module
 logger: logging.Logger = logging.getLogger(__name__)
@@ -66,8 +69,7 @@ class AppRunTrackableSource:
     artifact_name: Optional[str]
-class Lineage:
-    ...
+class Lineage: ...
 class TrackerBase(ABC):
@@ -177,30 +179,26 @@ def _extract_tracker_name_and_config_from_environ() -> Mapping[str, Optional[str
 def build_trackers(
-    entrypoint_and_config: Mapping[str, Optional[str]]
+    factory_and_config: Mapping[str, Optional[str]]
 ) -> Iterable[TrackerBase]:
     trackers = []
-    entrypoint_factories = load_group("torchx.tracker")
+    entrypoint_factories = load_group("torchx.tracker") or {}
     if not entrypoint_factories:
-        logger.warning(
-            "No 'torchx.tracker' entry_points are defined. Tracking will not capture any data."
-        )
-        return trackers
+        logger.warning("No 'torchx.tracker' entry_points are defined.")
-    for entrypoint_key, config in entrypoint_and_config.items():
-        if entrypoint_key not in entrypoint_factories:
+    for factory_name, config in factory_and_config.items():
+        factory = entrypoint_factories.get(factory_name) or load_module(factory_name)
+        if not factory:
             logger.warning(
-                f"Could not find `{entrypoint_key}` tracker entrypoint. Skipping..."
+                f"No tracker factory `{factory_name}` found in entry_points or modules. See https://meta-pytorch.org/torchx/main/tracker.html#module-torchx.tracker"
             )
             continue
-        factory = entrypoint_factories[entrypoint_key]
         if config:
-            logger.info(f"Tracker config found for `{entrypoint_key}` as `{config}`")
-            tracker = factory(config)
+            logger.info(f"Tracker config found for `{factory_name}` as `{config}`")
         else:
-            logger.info(f"No tracker config specified for `{entrypoint_key}`")
-            tracker = factory(None)
+            logger.info(f"No tracker config specified for `{factory_name}`")
+        tracker = factory(config)
         trackers.append(tracker)
     return trackers
@@ -335,5 +333,4 @@ class AppRun:
         return model_run_sources
-    def children(self) -> Iterable[AppRun]:
-        ...
+    def children(self) -> Iterable[AppRun]: ...

torchx/tracker/backend/fsspec.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 from __future__ import annotations
 import json

torchx/util/cuda.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 import torch

torchx/util/datetime.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 from datetime import datetime, timedelta

torchx/util/entrypoints.py CHANGED Viewed

@@ -4,13 +4,14 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-from typing import Any, Dict, Optional
+# pyre-strict
+# pyre-ignore-all-errors[3, 2, 16]
-import importlib_metadata as metadata
-from importlib_metadata import EntryPoint
+from importlib import metadata
+from importlib.metadata import EntryPoint
+from typing import Any, Dict, Optional
-# pyre-ignore-all-errors[3, 2]
 def load(group: str, name: str, default=None):
     """
     Loads the entry point specified by
@@ -28,13 +29,34 @@ def load(group: str, name: str, default=None):
     raises an error.
     """
-    entrypoints = metadata.entry_points().select(group=group)
+    # [note_on_entrypoints]
+    # return type of importlib.metadata.entry_points() is different between python-3.9 and python-3.10
+    # https://docs.python.org/3.9/library/importlib.metadata.html#importlib.metadata.entry_points
+    # https://docs.python.org/3.10/library/importlib.metadata.html#importlib.metadata.entry_points
+    if hasattr(metadata.entry_points(), "select"):
+        # python>=3.10
+        entrypoints = metadata.entry_points().select(group=group)
-    if name not in entrypoints.names and default is not None:
-        return default
+        if name not in entrypoints.names and default is not None:
+            return default
+        ep = entrypoints[name]
+        return ep.load()
-    ep = entrypoints[name]
-    return ep.load()
+    else:
+        # python<3.10 (e.g. 3.9)
+        # metadata.entry_points() returns dict[str, tuple[EntryPoint]] (not EntryPoints) in python-3.9
+        entrypoints = metadata.entry_points().get(group, ())
+        for ep in entrypoints:
+            if ep.name == name:
+                return ep.load()
+        # [group].name not found
+        if default is not None:
+            return default
+        else:
+            raise KeyError(f"entrypoint {group}.{name} not found")
 def _defer_load_ep(ep: EntryPoint) -> object:
@@ -47,11 +69,7 @@ def _defer_load_ep(ep: EntryPoint) -> object:
     return run
-# pyre-ignore-all-errors[3, 2]
-def load_group(
-    group: str,
-    default: Optional[Dict[str, Any]] = None,
-):
+def load_group(group: str, default: Optional[Dict[str, Any]] = None):
     """
     Loads all the entry points specified by ``group`` and returns
     the entry points as a map of ``name (str) -> deferred_load_fn``.
@@ -85,7 +103,13 @@ def load_group(
     """
-    entrypoints = metadata.entry_points().select(group=group)
+    # see [note_on_entrypoints] above
+    if hasattr(metadata.entry_points(), "select"):
+        # python>=3.10
+        entrypoints = metadata.entry_points().select(group=group)
+    else:
+        # python<3.10 (e.g. 3.9)
+        entrypoints = metadata.entry_points().get(group, ())
     if len(entrypoints) == 0:
         return default

torchx/util/io.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 from os import path
 from pathlib import Path
 from typing import Optional

torchx/util/log_tee_helpers.py ADDED Viewed

@@ -0,0 +1,210 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-strict
+"""
+If you're wrapping the TorchX API with your own CLI, these functions can
+help show the logs of the job within your CLI, just like
+`torchx log`
+"""
+import logging
+import threading
+from queue import Queue
+from typing import List, Optional, TextIO, Tuple, TYPE_CHECKING
+from torchx.util.types import none_throws
+if TYPE_CHECKING:
+    from torchx.runner.api import Runner
+    from torchx.schedulers.api import Stream
+    from torchx.specs.api import AppDef
+logger: logging.Logger = logging.getLogger(__name__)
+# A torchX job can have stderr/stdout for many replicas, of many roles
+# The scheduler API has functions that allow us to get,
+# with unspecified detail, the log lines of a given replica of
+# a given role.
+#
+# So, to neatly tee the results, we:
+# 1) Determine every role ID / replica ID pair we want to monitor
+# 2) Request the given stderr / stdout / combined streams from them (1 thread each)
+# 3) Concatenate each of those streams to a given destination file
+def _find_role_replicas(
+    app: "AppDef",
+    role_name: Optional[str],
+) -> List[Tuple[str, int]]:
+    """
+    Enumerate all (role, replica id) pairs in the given AppDef.
+    Replica IDs are 0-indexed, and range up to num_replicas,
+    for each role.
+    If role_name is provided, filters to only that name.
+    """
+    role_replicas = []
+    for role in app.roles:
+        if role_name is None or role_name == role.name:
+            for i in range(role.num_replicas):
+                role_replicas.append((role.name, i))
+    return role_replicas
+def _prefix_line(prefix: str, line: str) -> str:
+    """
+    _prefix_line ensure the prefix is still present even when dealing with return characters
+    """
+    if "\r" in line:
+        line = line.replace("\r", f"\r{prefix}")
+    if "\n" in line[:-1]:
+        line = line[:-1].replace("\n", f"\n{prefix}") + line[-1:]
+    if not line.startswith("\r"):
+        line = f"{prefix}{line}"
+    return line
+def _print_log_lines_for_role_replica(
+    dst: TextIO,
+    app_handle: str,
+    regex: Optional[str],
+    runner: "Runner",
+    which_role: str,
+    which_replica: int,
+    exceptions: "Queue[Exception]",
+    should_tail: bool,
+    streams: Optional["Stream"],
+    colorize: bool = False,
+) -> None:
+    """
+    Helper function that'll run in parallel - one
+    per monitored replica of a given role.
+    Based on print_log_lines .. but not designed for TTY
+    """
+    try:
+        for line in runner.log_lines(
+            app_handle,
+            which_role,
+            which_replica,
+            regex,
+            should_tail=should_tail,
+            streams=streams,
+        ):
+            if colorize:
+                color_begin = "\033[32m"
+                color_end = "\033[0m"
+            else:
+                color_begin = ""
+                color_end = ""
+            prefix = f"{color_begin}{which_role}/{which_replica}{color_end} "
+            print(_prefix_line(prefix, line.strip()), file=dst, end="\n", flush=True)
+    except Exception as e:
+        exceptions.put(e)
+        raise
+def _start_threads_to_monitor_role_replicas(
+    dst: TextIO,
+    app_handle: str,
+    regex: Optional[str],
+    runner: "Runner",
+    which_role: Optional[str] = None,
+    should_tail: bool = False,
+    streams: Optional["Stream"] = None,
+    colorize: bool = False,
+) -> None:
+    threads = []
+    app = none_throws(runner.describe(app_handle))
+    replica_ids = _find_role_replicas(app, role_name=which_role)
+    # Holds exceptions raised by all threads, in a thread-safe
+    # object
+    exceptions = Queue()
+    if not replica_ids:
+        valid_roles = [role.name for role in app.roles]
+        raise ValueError(
+            f"{which_role} is not a valid role name. Available: {valid_roles}"
+        )
+    for role_name, replica_id in replica_ids:
+        threads.append(
+            threading.Thread(
+                target=_print_log_lines_for_role_replica,
+                kwargs={
+                    "dst": dst,
+                    "runner": runner,
+                    "app_handle": app_handle,
+                    "which_role": role_name,
+                    "which_replica": replica_id,
+                    "regex": regex,
+                    "should_tail": should_tail,
+                    "exceptions": exceptions,
+                    "streams": streams,
+                    "colorize": colorize,
+                },
+                daemon=True,
+            )
+        )
+    for t in threads:
+        t.start()
+    for t in threads:
+        t.join()
+    # Retrieve all exceptions, print all except one and raise the first recorded exception
+    threads_exceptions = []
+    while not exceptions.empty():
+        threads_exceptions.append(exceptions.get())
+    if len(threads_exceptions) > 0:
+        for i in range(1, len(threads_exceptions)):
+            logger.error(threads_exceptions[i])
+        raise threads_exceptions[0]
+def tee_logs(
+    dst: TextIO,
+    app_handle: str,
+    regex: Optional[str],
+    runner: "Runner",
+    should_tail: bool = False,
+    streams: Optional["Stream"] = None,
+    colorize: bool = False,
+) -> threading.Thread:
+    """
+    Makes a thread, which in turn will start 1 thread per replica
+    per role, that tees that role-replica's logs to the given
+    destination buffer.
+    You'll need to start and join with this parent thread.
+    dst:  TextIO to tee the logs into
+    app_handle: The return value of runner.run() or runner.schedule()
+    regex: Regex to filter the logs that are tee-d
+    runner: The Runner you used to schedule the job
+    should_tail: If true, continue until we run out of logs. Otherwise, just fetch
+                 what's available
+    streams: Whether to fetch STDERR, STDOUT, or the temporally COMBINED (default) logs
+    """
+    thread = threading.Thread(
+        target=_start_threads_to_monitor_role_replicas,
+        kwargs={
+            "dst": dst,
+            "runner": runner,
+            "app_handle": app_handle,
+            "regex": None,
+            "should_tail": True,
+            "colorize": colorize,
+        },
+        daemon=True,
+    )
+    return thread

torchx-nightly 2023.10.21__py3-none-any.whl → 2025.12.24__py3-none-any.whl

Potentially problematic release.

torchx-nightly 2023.10.21py3-none-any.whl → 2025.12.24py3-none-any.whl