PyPI - torchx-nightly - Versions diffs - 2024.1.6__py3-none-any.whl → 2025.12.24__py3-none-any.whl - Mend

torchx-nightly 2024.1.6py3-none-any.whl → 2025.12.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchx-nightly might be problematic. Click here for more details.

Files changed (110) hide show

torchx/__init__.py +2 -0
torchx/{schedulers/ray/__init__.py → _version.py} +3 -1
torchx/apps/serve/serve.py +2 -0
torchx/apps/utils/booth_main.py +2 -0
torchx/apps/utils/copy_main.py +2 -0
torchx/apps/utils/process_monitor.py +2 -0
torchx/cli/__init__.py +2 -0
torchx/cli/argparse_util.py +38 -3
torchx/cli/cmd_base.py +2 -0
torchx/cli/cmd_cancel.py +2 -0
torchx/cli/cmd_configure.py +2 -0
torchx/cli/cmd_delete.py +30 -0
torchx/cli/cmd_describe.py +2 -0
torchx/cli/cmd_list.py +8 -4
torchx/cli/cmd_log.py +6 -24
torchx/cli/cmd_run.py +269 -45
torchx/cli/cmd_runopts.py +2 -0
torchx/cli/cmd_status.py +12 -1
torchx/cli/cmd_tracker.py +3 -1
torchx/cli/colors.py +2 -0
torchx/cli/main.py +4 -0
torchx/components/__init__.py +3 -8
torchx/components/component_test_base.py +2 -0
torchx/components/dist.py +18 -7
torchx/components/integration_tests/component_provider.py +4 -2
torchx/components/integration_tests/integ_tests.py +2 -0
torchx/components/serve.py +2 -0
torchx/components/structured_arg.py +4 -3
torchx/components/utils.py +15 -4
torchx/distributed/__init__.py +2 -4
torchx/examples/apps/datapreproc/datapreproc.py +2 -0
torchx/examples/apps/lightning/data.py +5 -3
torchx/examples/apps/lightning/model.py +7 -6
torchx/examples/apps/lightning/profiler.py +7 -4
torchx/examples/apps/lightning/train.py +11 -2
torchx/examples/torchx_out_of_sync_training.py +11 -0
torchx/notebook.py +2 -0
torchx/runner/__init__.py +2 -0
torchx/runner/api.py +167 -60
torchx/runner/config.py +43 -10
torchx/runner/events/__init__.py +57 -13
torchx/runner/events/api.py +14 -3
torchx/runner/events/handlers.py +2 -0
torchx/runtime/tracking/__init__.py +2 -0
torchx/runtime/tracking/api.py +2 -0
torchx/schedulers/__init__.py +16 -15
torchx/schedulers/api.py +70 -14
torchx/schedulers/aws_batch_scheduler.py +75 -6
torchx/schedulers/aws_sagemaker_scheduler.py +598 -0
torchx/schedulers/devices.py +17 -4
torchx/schedulers/docker_scheduler.py +43 -11
torchx/schedulers/ids.py +29 -23
torchx/schedulers/kubernetes_mcad_scheduler.py +9 -7
torchx/schedulers/kubernetes_scheduler.py +383 -38
torchx/schedulers/local_scheduler.py +100 -27
torchx/schedulers/lsf_scheduler.py +5 -4
torchx/schedulers/slurm_scheduler.py +336 -20
torchx/schedulers/streams.py +2 -0
torchx/specs/__init__.py +89 -12
torchx/specs/api.py +418 -30
torchx/specs/builders.py +176 -38
torchx/specs/file_linter.py +143 -57
torchx/specs/finder.py +68 -28
torchx/specs/named_resources_aws.py +181 -4
torchx/specs/named_resources_generic.py +2 -0
torchx/specs/overlays.py +106 -0
torchx/specs/test/components/__init__.py +2 -0
torchx/specs/test/components/a/__init__.py +2 -0
torchx/specs/test/components/a/b/__init__.py +2 -0
torchx/specs/test/components/a/b/c.py +2 -0
torchx/specs/test/components/c/__init__.py +2 -0
torchx/specs/test/components/c/d.py +2 -0
torchx/tracker/__init__.py +12 -6
torchx/tracker/api.py +15 -18
torchx/tracker/backend/fsspec.py +2 -0
torchx/util/cuda.py +2 -0
torchx/util/datetime.py +2 -0
torchx/util/entrypoints.py +39 -15
torchx/util/io.py +2 -0
torchx/util/log_tee_helpers.py +210 -0
torchx/util/modules.py +65 -0
torchx/util/session.py +42 -0
torchx/util/shlex.py +2 -0
torchx/util/strings.py +3 -1
torchx/util/types.py +90 -29
torchx/version.py +4 -2
torchx/workspace/__init__.py +2 -0
torchx/workspace/api.py +136 -6
torchx/workspace/dir_workspace.py +2 -0
torchx/workspace/docker_workspace.py +30 -2
torchx_nightly-2025.12.24.dist-info/METADATA +167 -0
torchx_nightly-2025.12.24.dist-info/RECORD +113 -0
{torchx_nightly-2024.1.6.dist-info → torchx_nightly-2025.12.24.dist-info}/WHEEL +1 -1
{torchx_nightly-2024.1.6.dist-info → torchx_nightly-2025.12.24.dist-info}/entry_points.txt +0 -1
torchx/examples/pipelines/__init__.py +0 -0
torchx/examples/pipelines/kfp/__init__.py +0 -0
torchx/examples/pipelines/kfp/advanced_pipeline.py +0 -287
torchx/examples/pipelines/kfp/dist_pipeline.py +0 -69
torchx/examples/pipelines/kfp/intro_pipeline.py +0 -81
torchx/pipelines/kfp/__init__.py +0 -28
torchx/pipelines/kfp/adapter.py +0 -271
torchx/pipelines/kfp/version.py +0 -17
torchx/schedulers/gcp_batch_scheduler.py +0 -487
torchx/schedulers/ray/ray_common.py +0 -22
torchx/schedulers/ray/ray_driver.py +0 -307
torchx/schedulers/ray_scheduler.py +0 -453
torchx_nightly-2024.1.6.dist-info/METADATA +0 -176
torchx_nightly-2024.1.6.dist-info/RECORD +0 -118
{torchx_nightly-2024.1.6.dist-info → torchx_nightly-2025.12.24.dist-info/licenses}/LICENSE +0 -0
{torchx_nightly-2024.1.6.dist-info → torchx_nightly-2025.12.24.dist-info}/top_level.txt +0 -0

torchx/specs/finder.py CHANGED Viewed

@@ -4,7 +4,10 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 import abc
+import copy
 import importlib
 import inspect
 import logging
@@ -17,11 +20,17 @@ from types import ModuleType
 from typing import Callable, Dict, Generator, List, Optional, Union
 from torchx.specs import AppDef
-from torchx.specs.file_linter import get_fn_docstring, validate
+from torchx.specs.file_linter import (
+    ComponentFunctionValidator,
+    get_fn_docstring,
+    validate,
+)
 from torchx.util import entrypoints
 from torchx.util.io import read_conf_file
 from torchx.util.types import none_throws
 logger: logging.Logger = logging.getLogger(__name__)
@@ -51,13 +60,17 @@ class _Component:
     name: str
     description: str
     fn_name: str
     fn: Callable[..., AppDef]
     validation_errors: List[str]
 class ComponentsFinder(abc.ABC):
     @abc.abstractmethod
-    def find(self) -> List[_Component]:
+    def find(
+        self, validators: Optional[List[ComponentFunctionValidator]]
+    ) -> List[_Component]:
         """
         Retrieves a set of components. A component is defined as a python
         function that conforms to ``torchx.specs.file_linter`` linter.
@@ -201,10 +214,12 @@ class ModuleComponentsFinder(ComponentsFinder):
                 else:
                     yield self._try_import(module_info.name)
-    def find(self) -> List[_Component]:
+    def find(
+        self, validators: Optional[List[ComponentFunctionValidator]]
+    ) -> List[_Component]:
         components = []
         for m in self._iter_modules_recursive(self.base_module):
-            components += self._get_components_from_module(m)
+            components += self._get_components_from_module(m, validators)
         return components
     def _try_import(self, module: Union[str, ModuleType]) -> ModuleType:
@@ -219,7 +234,9 @@ class ModuleComponentsFinder(ComponentsFinder):
         else:
             return module
-    def _get_components_from_module(self, module: ModuleType) -> List[_Component]:
+    def _get_components_from_module(
+        self, module: ModuleType, validators: Optional[List[ComponentFunctionValidator]]
+    ) -> List[_Component]:
         functions = getmembers(module, isfunction)
         component_defs = []
@@ -228,7 +245,7 @@ class ModuleComponentsFinder(ComponentsFinder):
         module_path = os.path.abspath(module_path)
         rel_module_name = module_relname(module, relative_to=self.base_module)
         for function_name, function in functions:
-            linter_errors = validate(module_path, function_name)
+            linter_errors = validate(module_path, function_name, validators)
             component_desc, _ = get_fn_docstring(function)
             # remove empty string to deal with group=""
@@ -253,17 +270,26 @@ class CustomComponentsFinder(ComponentsFinder):
         self._filepath = filepath
         self._function_name = function_name
-    def _get_validation_errors(self, path: str, function_name: str) -> List[str]:
-        linter_errors = validate(path, function_name)
+    def _get_validation_errors(
+        self,
+        path: str,
+        function_name: str,
+        validators: Optional[List[ComponentFunctionValidator]],
+    ) -> List[str]:
+        linter_errors = validate(path, function_name, validators)
         return [linter_error.description for linter_error in linter_errors]
-    def find(self) -> List[_Component]:
+    def find(
+        self, validators: Optional[List[ComponentFunctionValidator]]
+    ) -> List[_Component]:
         validation_errors = self._get_validation_errors(
-            self._filepath, self._function_name
+            self._filepath, self._function_name, validators
         )
         file_source = read_conf_file(self._filepath)
-        namespace = globals()
+        namespace = copy.copy(globals())
+        # so that __file__ used inside the component points to the correct file
+        namespace["__file__"] = os.path.abspath(self._filepath)
         exec(file_source, namespace)  # noqa: P204
         if self._function_name not in namespace:
             raise ComponentNotFoundException(
@@ -282,7 +308,9 @@ class CustomComponentsFinder(ComponentsFinder):
         ]
-def _load_custom_components() -> List[_Component]:
+def _load_custom_components(
+    validators: Optional[List[ComponentFunctionValidator]],
+) -> List[_Component]:
     component_modules = {
         name: load_fn()
         for name, load_fn in
@@ -301,11 +329,13 @@ def _load_custom_components() -> List[_Component]:
         # _0 = torchx.components.dist
         # _1 = torchx.components.utils
         group = "" if group.startswith("_") else group
-        components += ModuleComponentsFinder(module, group).find()
+        components += ModuleComponentsFinder(module, group).find(validators)
     return components
-def _load_components() -> Dict[str, _Component]:
+def _load_components(
+    validators: Optional[List[ComponentFunctionValidator]],
+) -> Dict[str, _Component]:
     """
     Loads either the custom component defs from the entrypoint ``[torchx.components]``
     or the default builtins from ``torchx.components`` module.
@@ -316,19 +346,21 @@ def _load_components() -> Dict[str, _Component]:
     """
-    components = _load_custom_components()
+    components = _load_custom_components(validators)
     if not components:
-        components = ModuleComponentsFinder("torchx.components", "").find()
+        components = ModuleComponentsFinder("torchx.components", "").find(validators)
     return {c.name: c for c in components}
 _components: Optional[Dict[str, _Component]] = None
-def _find_components() -> Dict[str, _Component]:
+def _find_components(
+    validators: Optional[List[ComponentFunctionValidator]],
+) -> Dict[str, _Component]:
     global _components
     if not _components:
-        _components = _load_components()
+        _components = _load_components(validators)
     return none_throws(_components)
@@ -336,17 +368,21 @@ def _is_custom_component(component_name: str) -> bool:
     return ":" in component_name
-def _find_custom_components(name: str) -> Dict[str, _Component]:
+def _find_custom_components(
+    name: str, validators: Optional[List[ComponentFunctionValidator]]
+) -> Dict[str, _Component]:
     if ":" not in name:
         raise ValueError(
             f"Invalid custom component: {name}, valid template : `FILEPATH`:`FUNCTION_NAME`"
         )
     filepath, component_name = name.split(":")
-    components = CustomComponentsFinder(filepath, component_name).find()
+    components = CustomComponentsFinder(filepath, component_name).find(validators)
     return {component.name: component for component in components}
-def get_components() -> Dict[str, _Component]:
+def get_components(
+    validators: Optional[List[ComponentFunctionValidator]] = None,
+) -> Dict[str, _Component]:
     """
     Returns all custom components registered via ``[torchx.components]`` entrypoints
     OR builtin components that ship with TorchX (but not both).
@@ -393,13 +429,15 @@ def get_components() -> Dict[str, _Component]:
     """
     valid_components: Dict[str, _Component] = {}
-    for component_name, component in _find_components().items():
+    for component_name, component in _find_components(validators).items():
         if len(component.validation_errors) == 0:
             valid_components[component_name] = component
     return valid_components
-def get_component(name: str) -> _Component:
+def get_component(
+    name: str, validators: Optional[List[ComponentFunctionValidator]] = None
+) -> _Component:
     """
     Retrieves components by the provided name.
@@ -407,14 +445,14 @@ def get_component(name: str) -> _Component:
         Component or None if no component with ``name`` exists
     """
     if _is_custom_component(name):
-        components = _find_custom_components(name)
+        components = _find_custom_components(name, validators)
     else:
-        components = _find_components()
+        components = _find_components(validators)
     if name not in components:
         raise ComponentNotFoundException(
             f"Component `{name}` not found. Please make sure it is one of the "
             "builtins: `torchx builtins`. Or registered via `[torchx.components]` "
-            "entry point (see: https://pytorch.org/torchx/latest/configure.html)"
+            "entry point (see: https://meta-pytorch.org/torchx/latest/configure.html)"
         )
     component = components[name]
@@ -426,7 +464,9 @@ def get_component(name: str) -> _Component:
     return component
-def get_builtin_source(name: str) -> str:
+def get_builtin_source(
+    name: str, validators: Optional[List[ComponentFunctionValidator]] = None
+) -> str:
     """
     Returns a string of the the builtin component's function source code
     with all the import statements. Intended to be used to make a copy
@@ -444,7 +484,7 @@ def get_builtin_source(name: str) -> str:
     are optimized and formatting adheres to your organization's standards.
     """
-    component = get_component(name)
+    component = get_component(name, validators)
     fn = component.fn
     fn_name = component.name.split(".")[-1]

torchx/specs/named_resources_aws.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 r"""
 `torchx.specs.named_resources_aws` contains resource definitions that represent corresponding AWS instance types
 taken from https://aws.amazon.com/ec2/instance-types/. The resources are exposed
@@ -14,7 +16,7 @@ the equvalent resource in mem, cpu and gpu numbers.
 .. note::
     These resource definitions may change in future. It is expected for each user to
-    manage their own resources. Follow https://pytorch.org/torchx/latest/specs.html#torchx.specs.get_named_resources
+    manage their own resources. Follow https://meta-pytorch.org/torchx/latest/specs.html#torchx.specs.get_named_resources
     to set up named resources.
 Usage:
@@ -35,6 +37,7 @@ from typing import Callable, Mapping
 from torchx.specs.api import Resource
 EFA_DEVICE = "vpc.amazonaws.com/efa"
+NEURON_DEVICE = "aws.amazon.com/neurondevice"
 # ecs and ec2 have memtax and currently AWS Batch uses hard memory limits
 # so we have to account for mem tax when registering these resources for AWS
@@ -44,7 +47,7 @@ EFA_DEVICE = "vpc.amazonaws.com/efa"
 MEM_TAX = 0.96
 # determines instance type for non-honogeneous CEs
-# see https://github.com/pytorch/torchx/issues/780
+# see https://github.com/meta-pytorch/torchx/issues/780
 K8S_ITYPE = "node.kubernetes.io/instance-type"
 GiB: int = int(1024 * MEM_TAX)
@@ -107,6 +110,36 @@ def aws_p4de_24xlarge() -> Resource:
     )
+def aws_p5_48xlarge() -> Resource:
+    return Resource(
+        cpu=192,
+        gpu=8,
+        memMB=2048 * GiB,
+        capabilities={K8S_ITYPE: "p5.48xlarge"},
+        devices={EFA_DEVICE: 32},
+    )
+def aws_p5e_48xlarge() -> Resource:
+    return Resource(
+        cpu=192,
+        gpu=8,
+        memMB=2048 * GiB,
+        capabilities={K8S_ITYPE: "p5e.48xlarge"},
+        devices={EFA_DEVICE: 32},
+    )
+def aws_p5en_48xlarge() -> Resource:
+    return Resource(
+        cpu=192,
+        gpu=8,
+        memMB=2048 * GiB,
+        capabilities={K8S_ITYPE: "p5en.48xlarge"},
+        devices={EFA_DEVICE: 16},
+    )
 def aws_t3_medium() -> Resource:
     return Resource(cpu=2, gpu=0, memMB=4 * GiB, capabilities={K8S_ITYPE: "t3.medium"})
@@ -117,6 +150,16 @@ def aws_m5_2xlarge() -> Resource:
     )
+def aws_c5_18xlarge() -> Resource:
+    return Resource(
+        # using lower memory size than the spec since MEM_TAX is not enough for adjustment
+        cpu=72,
+        gpu=0,
+        memMB=142 * GiB,
+        capabilities={K8S_ITYPE: "c5.18xlarge"},
+    )
 def aws_g4dn_xlarge() -> Resource:
     return Resource(
         cpu=4, gpu=1, memMB=16 * GiB, capabilities={K8S_ITYPE: "g4dn.xlarge"}
@@ -241,9 +284,87 @@ def aws_g5_48xlarge() -> Resource:
     )
+def aws_g6e_xlarge() -> Resource:
+    return Resource(
+        cpu=4,
+        gpu=1,
+        memMB=32 * GiB,
+        capabilities={K8S_ITYPE: "g6e.xlarge"},
+    )
+def aws_g6e_2xlarge() -> Resource:
+    return Resource(
+        cpu=8,
+        gpu=1,
+        memMB=64 * GiB,
+        capabilities={K8S_ITYPE: "g6e.2xlarge"},
+    )
+def aws_g6e_4xlarge() -> Resource:
+    return Resource(
+        cpu=16,
+        gpu=1,
+        memMB=128 * GiB,
+        capabilities={K8S_ITYPE: "g6e.4xlarge"},
+    )
+def aws_g6e_8xlarge() -> Resource:
+    return Resource(
+        cpu=32,
+        gpu=1,
+        memMB=256 * GiB,
+        capabilities={K8S_ITYPE: "g6e.8xlarge"},
+    )
+def aws_g6e_16xlarge() -> Resource:
+    return Resource(
+        cpu=64,
+        gpu=1,
+        memMB=512 * GiB,
+        capabilities={K8S_ITYPE: "g6e.16xlarge"},
+    )
+def aws_g6e_12xlarge() -> Resource:
+    return Resource(
+        cpu=48,
+        gpu=4,
+        memMB=384 * GiB,
+        capabilities={K8S_ITYPE: "g6e.12xlarge"},
+    )
+def aws_g6e_24xlarge() -> Resource:
+    return Resource(
+        cpu=96,
+        gpu=4,
+        memMB=768 * GiB,
+        capabilities={K8S_ITYPE: "g6e.24xlarge"},
+        devices={EFA_DEVICE: 2},
+    )
+def aws_g6e_48xlarge() -> Resource:
+    return Resource(
+        cpu=192,
+        gpu=8,
+        memMB=1536 * GiB,
+        capabilities={K8S_ITYPE: "g6e.48xlarge"},
+        devices={EFA_DEVICE: 4},
+    )
 def aws_trn1_2xlarge() -> Resource:
     return Resource(
-        cpu=8, gpu=0, memMB=32 * GiB, capabilities={K8S_ITYPE: "trn1.2xlarge"}
+        cpu=8,
+        gpu=0,
+        memMB=32 * GiB,
+        capabilities={K8S_ITYPE: "trn1.2xlarge"},
+        devices={NEURON_DEVICE: 1},
     )
@@ -253,19 +374,63 @@ def aws_trn1_32xlarge() -> Resource:
         gpu=0,
         memMB=512 * GiB,
         capabilities={K8S_ITYPE: "trn1.32xlarge"},
-        devices={EFA_DEVICE: 8},
+        devices={EFA_DEVICE: 8, NEURON_DEVICE: 16},
+    )
+def aws_inf2_xlarge() -> Resource:
+    return Resource(
+        cpu=4,
+        gpu=0,
+        memMB=16 * GiB,
+        capabilities={K8S_ITYPE: "inf2.xlarge"},
+        devices={NEURON_DEVICE: 1},
+    )
+def aws_inf2_8xlarge() -> Resource:
+    return Resource(
+        cpu=32,
+        gpu=0,
+        memMB=128 * GiB,
+        capabilities={K8S_ITYPE: "inf2.8xlarge"},
+        devices={NEURON_DEVICE: 1},
+    )
+def aws_inf2_24xlarge() -> Resource:
+    return Resource(
+        cpu=96,
+        gpu=0,
+        memMB=384 * GiB,
+        capabilities={K8S_ITYPE: "inf2.24xlarge"},
+        devices={NEURON_DEVICE: 6},
+    )
+def aws_inf2_48xlarge() -> Resource:
+    return Resource(
+        cpu=192,
+        gpu=0,
+        memMB=768 * GiB,
+        capabilities={K8S_ITYPE: "inf2.48xlarge"},
+        devices={NEURON_DEVICE: 12},
     )
 NAMED_RESOURCES: Mapping[str, Callable[[], Resource]] = {
     "aws_t3.medium": aws_t3_medium,
     "aws_m5.2xlarge": aws_m5_2xlarge,
+    "aws_c5.18xlarge": aws_c5_18xlarge,
     "aws_p3.2xlarge": aws_p3_2xlarge,
     "aws_p3.8xlarge": aws_p3_8xlarge,
     "aws_p3.16xlarge": aws_p3_16xlarge,
     "aws_p3dn.24xlarge": aws_p3dn_24xlarge,
     "aws_p4d.24xlarge": aws_p4d_24xlarge,
     "aws_p4de.24xlarge": aws_p4de_24xlarge,
+    "aws_p5.48xlarge": aws_p5_48xlarge,
+    "aws_p5e.48xlarge": aws_p5e_48xlarge,
+    "aws_p5en.48xlarge": aws_p5en_48xlarge,
     "aws_g4dn.xlarge": aws_g4dn_xlarge,
     "aws_g4dn.2xlarge": aws_g4dn_2xlarge,
     "aws_g4dn.4xlarge": aws_g4dn_4xlarge,
@@ -281,6 +446,18 @@ NAMED_RESOURCES: Mapping[str, Callable[[], Resource]] = {
     "aws_g5.12xlarge": aws_g5_12xlarge,
     "aws_g5.24xlarge": aws_g5_24xlarge,
     "aws_g5.48xlarge": aws_g5_48xlarge,
+    "aws_g6e.xlarge": aws_g6e_xlarge,
+    "aws_g6e.2xlarge": aws_g6e_2xlarge,
+    "aws_g6e.4xlarge": aws_g6e_4xlarge,
+    "aws_g6e.8xlarge": aws_g6e_8xlarge,
+    "aws_g6e.16xlarge": aws_g6e_16xlarge,
+    "aws_g6e.12xlarge": aws_g6e_12xlarge,
+    "aws_g6e.24xlarge": aws_g6e_24xlarge,
+    "aws_g6e.48xlarge": aws_g6e_48xlarge,
     "aws_trn1.2xlarge": aws_trn1_2xlarge,
     "aws_trn1.32xlarge": aws_trn1_32xlarge,
+    "aws_inf2.xlarge": aws_inf2_xlarge,
+    "aws_inf2.8xlarge": aws_inf2_8xlarge,
+    "aws_inf2.24xlarge": aws_inf2_24xlarge,
+    "aws_inf2.48xlarge": aws_inf2_48xlarge,
 }

torchx/specs/named_resources_generic.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 """
 Defines generic named resources that are not specific to any cloud provider's
 instance types. These generic named resources are meant to be used as

torchx/specs/overlays.py ADDED Viewed

@@ -0,0 +1,106 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-strict
+"""
+Overlays are JSON structs applied to :py:class:`~torchx.specs.AppDef` and :py:class:`~torchx.specs.Role`
+to specify attributes of the scheduler's submit-job request that are not currently representable
+as attributes of :py:class:`~torchx.specs.AppDef` and :py:class:`~torchx.specs.Role`.
+For end-uses, here are a few use-cases of overlays:
+1. A new version of the scheduler has concepts/features that have not yet been added to TorchX.
+2. A bespoke internal scheduler has custom features that do not generalize hence not in TorchX.
+3. Re-using a pre-built ``AppDef`` but need to make a small change to the resulting scheduler request.
+And for scheduler authors:
+1. Scheduler setting needs to be applied to a ``Role``, which makes it hard to add as ``runopts``
+   since ``runopts`` apply at the ``AppDef`` level.
+2. Scheduler setting cannot be represented naturally as the types supported by ``runopts``.
+3. Exposing the setting as a ``runopts`` obfuscates things.
+See :py:func:`~torchx.specs.overlays.apply_overlay` for rules on how overlays are applied.
+"""
+from typing import Any
+Json = dict[str, Any]
+def apply_overlay(base: Json, overlay: Json) -> None:
+    """Applies ``overlay`` on ``base``.
+    .. note:: this function mutates the ``base``!
+    Overlays follow these rules:
+    1. Dicts, upsert key, value in base with the ones in overlay.
+    2. Nested dicts, overlay recursively.
+    3. Lists, append the overlay values to the base values.
+    4. Nested lists DO NOT append recursively.
+    5. Primitives (bool, str, int, float), replace base with the value in overlay.
+    .. doctest::
+        from torchx.specs.overlays import apply_overlay
+        base = {
+            "scheduler": {"policy": "default"},
+            "resources": {"limits": {"cpu": "500m"}},
+            "tolerations": [{"key": "gpu"}],
+            "nodeSelectorTerms": [
+                [{"matchExpressions": []}]
+            ],
+            "maxPods": 110,
+        }
+        overlay = {
+            "scheduler": {"policy": "binpacking"},
+            "resources": {"limits": {"memory": "1Gi"}},
+            "tolerations": [{"key": "spot"}],
+            "nodeSelectorTerms": [
+                [{"matchExpressions": [{"key": "disk"}]}]
+            ],
+            "maxPods": 250,
+        }
+        apply_overlay(base, overlay)
+        assert {
+            "scheduler": {"policy": "binpacking"},
+            "resources": {"limits": {"cpu": "500m", "memory": "1Gi"}},
+            "tolerations": [{"key": "gpu"}, {"key": "spot"}],
+            "nodeSelectorTerms": [
+                [{"matchExpressions": []}],
+                [{"matchExpressions": [{"key": "disk"}]}],
+            ],
+            "maxPods": 250,
+        } == base
+    """
+    def assert_type_equal(key: str, o1: object, o2: object) -> None:
+        o1_type = type(o1)
+        o2_type = type(o2)
+        assert (
+            o1_type == o2_type
+        ), f"Type mismatch for attr: `{key}`. {o1_type.__qualname__} != {o2_type.__qualname__}"
+    for key, overlay_value in overlay.items():
+        if key in base:
+            base_value = base[key]
+            assert_type_equal(key, base_value, overlay_value)
+            if isinstance(base_value, dict) and isinstance(overlay_value, dict):
+                apply_overlay(base_value, overlay_value)
+            elif isinstance(base_value, list) and isinstance(overlay_value, list):
+                base_value.extend(overlay_value)
+            else:
+                base[key] = overlay_value
+        else:
+            base[key] = overlay_value

torchx/specs/test/components/__init__.py CHANGED Viewed

@@ -3,3 +3,5 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict

torchx/specs/test/components/a/__init__.py CHANGED Viewed

@@ -3,6 +3,8 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 import torchx
 from torchx import specs

torchx/specs/test/components/a/b/__init__.py CHANGED Viewed

@@ -3,3 +3,5 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict

torchx/specs/test/components/a/b/c.py CHANGED Viewed

@@ -3,6 +3,8 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 import torchx
 from torchx import specs

torchx/specs/test/components/c/__init__.py CHANGED Viewed

@@ -4,3 +4,5 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict

torchx/specs/test/components/c/d.py CHANGED Viewed

@@ -3,6 +3,8 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+# pyre-strict
 import torchx
 from torchx import specs

torchx-nightly 2024.1.6__py3-none-any.whl → 2025.12.24__py3-none-any.whl

Potentially problematic release.

torchx-nightly 2024.1.6py3-none-any.whl → 2025.12.24py3-none-any.whl