PyPI - torchx-nightly - Versions diffs - 2025.8.5__py3-none-any.whl → 2026.1.11__py3-none-any.whl - Mend

torchx-nightly 2025.8.5py3-none-any.whl → 2026.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

torchx/{schedulers/ray/__init__.py → _version.py} +3 -1
torchx/cli/cmd_delete.py +30 -0
torchx/cli/cmd_list.py +1 -2
torchx/cli/cmd_run.py +202 -28
torchx/cli/cmd_tracker.py +1 -1
torchx/cli/main.py +2 -0
torchx/components/__init__.py +1 -8
torchx/components/dist.py +9 -3
torchx/components/integration_tests/component_provider.py +2 -2
torchx/components/utils.py +1 -1
torchx/distributed/__init__.py +1 -1
torchx/runner/api.py +102 -81
torchx/runner/config.py +3 -1
torchx/runner/events/__init__.py +20 -10
torchx/runner/events/api.py +1 -1
torchx/schedulers/__init__.py +7 -10
torchx/schedulers/api.py +66 -25
torchx/schedulers/aws_batch_scheduler.py +47 -6
torchx/schedulers/aws_sagemaker_scheduler.py +1 -1
torchx/schedulers/docker_scheduler.py +4 -3
torchx/schedulers/ids.py +27 -23
torchx/schedulers/kubernetes_mcad_scheduler.py +1 -4
torchx/schedulers/kubernetes_scheduler.py +355 -36
torchx/schedulers/local_scheduler.py +2 -1
torchx/schedulers/lsf_scheduler.py +1 -1
torchx/schedulers/slurm_scheduler.py +102 -27
torchx/specs/__init__.py +40 -9
torchx/specs/api.py +222 -12
torchx/specs/builders.py +109 -28
torchx/specs/file_linter.py +117 -53
torchx/specs/finder.py +25 -37
torchx/specs/named_resources_aws.py +13 -2
torchx/specs/overlays.py +106 -0
torchx/tracker/__init__.py +2 -2
torchx/tracker/api.py +1 -1
torchx/util/entrypoints.py +1 -6
torchx/util/strings.py +1 -1
torchx/util/types.py +12 -1
torchx/version.py +2 -2
torchx/workspace/api.py +102 -5
{torchx_nightly-2025.8.5.dist-info → torchx_nightly-2026.1.11.dist-info}/METADATA +35 -49
{torchx_nightly-2025.8.5.dist-info → torchx_nightly-2026.1.11.dist-info}/RECORD +46 -56
{torchx_nightly-2025.8.5.dist-info → torchx_nightly-2026.1.11.dist-info}/WHEEL +1 -1
torchx/examples/pipelines/__init__.py +0 -0
torchx/examples/pipelines/kfp/__init__.py +0 -0
torchx/examples/pipelines/kfp/advanced_pipeline.py +0 -289
torchx/examples/pipelines/kfp/dist_pipeline.py +0 -71
torchx/examples/pipelines/kfp/intro_pipeline.py +0 -83
torchx/pipelines/kfp/__init__.py +0 -30
torchx/pipelines/kfp/adapter.py +0 -274
torchx/pipelines/kfp/version.py +0 -19
torchx/schedulers/gcp_batch_scheduler.py +0 -497
torchx/schedulers/ray/ray_common.py +0 -22
torchx/schedulers/ray/ray_driver.py +0 -307
torchx/schedulers/ray_scheduler.py +0 -454
{torchx_nightly-2025.8.5.dist-info → torchx_nightly-2026.1.11.dist-info}/entry_points.txt +0 -0
{torchx_nightly-2025.8.5.dist-info → torchx_nightly-2026.1.11.dist-info/licenses}/LICENSE +0 -0
{torchx_nightly-2025.8.5.dist-info → torchx_nightly-2026.1.11.dist-info}/top_level.txt +0 -0

torchx/examples/pipelines/kfp/advanced_pipeline.py DELETED Viewed

@@ -1,289 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-# pyre-strict
-"""
-Advanced KubeFlow Pipelines Example
-===================================
-This is an example pipeline using KubeFlow Pipelines built with only TorchX
-components.
-KFP adapters can be used transform the TorchX components directly into
-something that can be used within KFP.
-"""
-# %%
-# Input Arguments
-# ###############
-# Lets first define some arguments for the pipeline.
-import argparse
-import os.path
-import sys
-from typing import Dict
-import kfp
-import torchx
-from torchx import specs
-from torchx.components.dist import ddp as dist_ddp
-from torchx.components.serve import torchserve
-from torchx.components.utils import copy as utils_copy, python as utils_python
-from torchx.pipelines.kfp.adapter import container_from_app
-parser = argparse.ArgumentParser(description="example kfp pipeline")
-# %%
-# TorchX components are built around images. Depending on what scheduler
-# you're using this can vary but for KFP these images are specified as
-# docker containers. We have one container for the example apps and one for
-# the standard built in apps. If you modify the torchx example code you'll
-# need to rebuild the container before launching it on KFP
-parser.add_argument(
-    "--image",
-    type=str,
-    help="docker image to use for the examples apps",
-    default=torchx.IMAGE,
-)
-# %%
-# Most TorchX components use
-# `fsspec <https://filesystem-spec.readthedocs.io/en/latest/>`_ to abstract
-# away dealing with remote filesystems. This allows the components to take
-# paths like ``s3://`` to make it easy to use cloud storage providers.
-parser.add_argument(
-    "--output_path",
-    type=str,
-    help="path to place the data",
-    required=True,
-)
-parser.add_argument("--load_path", type=str, help="checkpoint path to load from")
-# %%
-# This example uses the torchserve for inference so we need to specify some
-# options. This assumes you have a TorchServe instance running in the same
-# Kubernetes cluster with with the service name ``torchserve`` in the default
-# namespace.
-#
-# See https://github.com/pytorch/serve/blob/master/kubernetes/README.md for info
-# on how to setup TorchServe.
-parser.add_argument(
-    "--management_api",
-    type=str,
-    help="path to the torchserve management API",
-    default="http://torchserve.default.svc.cluster.local:8081",
-)
-parser.add_argument(
-    "--model_name",
-    type=str,
-    help="the name of the inference model",
-    default="tiny_image_net",
-)
-# %% Parse the arguments, you'll need to set these accordingly if running from a
-# notebook.
-if "NOTEBOOK" in globals():
-    argv = [
-        "--output_path",
-        "/tmp/output",
-    ]
-else:
-    argv = sys.argv[1:]
-args: argparse.Namespace = parser.parse_args(argv)
-# %%
-# Creating the Components
-# #######################
-# The first step is downloading the data to somewhere we can work on it. For
-# this we can just the builtin copy component. This component takes two valid
-# fsspec paths and copies them from one to another. In this case we're using
-# http as the source and a file under the output_path as the output.
-data_path: str = os.path.join(args.output_path, "tiny-imagenet-200.zip")
-copy_app: specs.AppDef = utils_copy(
-    "http://cs231n.stanford.edu/tiny-imagenet-200.zip",
-    data_path,
-    image=args.image,
-)
-# %%
-# The next component is for data preprocessing. This takes in the raw data from
-# the previous operator and runs some transforms on it for use with the trainer.
-#
-# datapreproc outputs the data to a specified fsspec path. These paths are all
-# specified ahead of time so we have a fully static pipeline.
-processed_data_path: str = os.path.join(args.output_path, "processed")
-datapreproc_app: specs.AppDef = utils_python(
-    "--output_path",
-    processed_data_path,
-    "--input_path",
-    data_path,
-    "--limit",
-    "100",
-    image=args.image,
-    m="torchx.examples.apps.datapreproc.datapreproc",
-    cpu=1,
-    memMB=1024,
-)
-# %%
-# Next we'll create the trainer component that takes in the training data from the
-# previous datapreproc component. We've defined this in a separate component
-# file as you normally would.
-#
-# Having a separate component file allows you to launch your trainer from the
-# TorchX CLI via ``torchx run`` for fast iteration as well as run it from a
-# pipeline in an automated fashion.
-# make sure examples is on the path
-if "__file__" in globals():
-    sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
-logs_path: str = os.path.join(args.output_path, "logs")
-models_path: str = os.path.join(args.output_path, "models")
-trainer_app: specs.AppDef = dist_ddp(
-    *(
-        "--output_path",
-        models_path,
-        "--load_path",
-        args.load_path or "",
-        "--log_path",
-        logs_path,
-        "--data_path",
-        processed_data_path,
-        "--epochs",
-        str(1),
-    ),
-    image=args.image,
-    m="torchx.examples.apps.lightning.train",
-    j="1x1",
-    # per node resource settings
-    cpu=1,
-    memMB=3000,
-)
-# %%
-# To have the tensorboard path show up in KFPs UI we need to some metadata so
-# KFP knows where to consume the metrics from.
-#
-# This will get used when we create the KFP container.
-ui_metadata: Dict[str, object] = {
-    "outputs": [
-        {
-            "type": "tensorboard",
-            "source": os.path.join(logs_path, "lightning_logs"),
-        }
-    ]
-}
-# %%
-# For the inference, we're leveraging one of the builtin TorchX components. This
-# component takes in a model and uploads it to the TorchServe management API
-# endpoints.
-serve_app: specs.AppDef = torchserve(
-    model_path=os.path.join(models_path, "model.mar"),
-    management_api=args.management_api,
-    image=args.image,
-    params={
-        "model_name": args.model_name,
-        # set this to allocate a worker
-        # "initial_workers": 1,
-    },
-)
-# %%
-# For model interpretability we're leveraging a custom component stored in it's
-# own component file. This component takes in the output from datapreproc and
-# train components and produces images with integrated gradient results.
-interpret_path: str = os.path.join(args.output_path, "interpret")
-interpret_app: specs.AppDef = utils_python(
-    *(
-        "--load_path",
-        os.path.join(models_path, "last.ckpt"),
-        "--data_path",
-        processed_data_path,
-        "--output_path",
-        interpret_path,
-    ),
-    image=args.image,
-    m="torchx.examples.apps.lightning.interpret",
-)
-# %%
-# Pipeline Definition
-# ###################
-# The last step is to define the actual pipeline using the torchx components via
-# the KFP adapter and export the pipeline package that can be uploaded to a KFP
-# cluster.
-#
-# The KFP adapter currently doesn't track the input and outputs so the
-# containers need to have their dependencies specified via `.after()`.
-#
-# We call `.set_tty()` to make the logs from the components more responsive for
-# example purposes.
-def pipeline() -> None:
-    # container_from_app creates a KFP container from the TorchX app
-    # definition.
-    copy = container_from_app(copy_app)
-    copy.container.set_tty()
-    datapreproc = container_from_app(datapreproc_app)
-    datapreproc.container.set_tty()
-    datapreproc.after(copy)
-    # For the trainer we want to log that UI metadata so you can access
-    # tensorboard from the UI.
-    trainer = container_from_app(trainer_app, ui_metadata=ui_metadata)
-    trainer.container.set_tty()
-    trainer.after(datapreproc)
-    if False:
-        serve = container_from_app(serve_app)
-        serve.container.set_tty()
-        serve.after(trainer)
-    if False:
-        # Serve and interpret only require the trained model so we can run them
-        # in parallel to each other.
-        interpret = container_from_app(interpret_app)
-        interpret.container.set_tty()
-        interpret.after(trainer)
-kfp.compiler.Compiler().compile(
-    pipeline_func=pipeline,
-    package_path="pipeline.yaml",
-)
-with open("pipeline.yaml", "rt") as f:
-    print(f.read())
-# %%
-# Once this has all run you should have a pipeline file (typically
-# pipeline.yaml) that you can upload to your KFP cluster via the UI or
-# a kfp.Client.
-# sphinx_gallery_thumbnail_path = '_static/img/gallery-kfp.png'

torchx/examples/pipelines/kfp/dist_pipeline.py DELETED Viewed

@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-# pyre-strict
-"""
-Distributed KubeFlow Pipelines Example
-======================================
-This is an example KFP pipeline that uses resource_from_app to launch a
-distributed operator using the kubernetes/volcano job scheduler. This only works
-in Kubernetes KFP clusters with https://volcano.sh/en/docs/ installed on them.
-"""
-import kfp
-from torchx import specs
-from torchx.pipelines.kfp.adapter import resource_from_app
-def pipeline() -> None:
-    # First we define our AppDef for the component, we set
-    echo_app = specs.AppDef(
-        name="test-dist",
-        roles=[
-            specs.Role(
-                name="dist-echo",
-                image="alpine",
-                entrypoint="/bin/echo",
-                args=["hello dist!"],
-                num_replicas=3,
-            ),
-        ],
-    )
-    # To convert the TorchX AppDef into a KFP container we use
-    # the resource_from_app adapter. This takes generates a KFP Kubernetes
-    # resource operator definition from the TorchX app def and instantiates it.
-    echo_container: kfp.dsl.BaseOp = resource_from_app(echo_app, queue="default")
-# %%
-# To generate the pipeline definition file we need to call into the KFP compiler
-# with our pipeline function.
-kfp.compiler.Compiler().compile(
-    pipeline_func=pipeline,
-    package_path="pipeline.yaml",
-)
-with open("pipeline.yaml", "rt") as f:
-    print(f.read())
-# %%
-# Once this has all run you should have a pipeline file (typically
-# pipeline.yaml) that you can upload to your KFP cluster via the UI or
-# a kfp.Client.
-#
-# See the
-# `KFP SDK Examples <https://www.kubeflow.org/docs/components/pipelines/legacy-v1/tutorials/sdk-examples/#examples>`_
-# for more info on launching KFP pipelines.
-# %%
-# See the :ref:`examples_pipelines/kfp/advanced_pipeline:Advanced KubeFlow Pipelines Example` for how to chain multiple
-# components together and use builtin components.
-# sphinx_gallery_thumbnail_path = '_static/img/gallery-kfp.png'

torchx/examples/pipelines/kfp/intro_pipeline.py DELETED Viewed

@@ -1,83 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-# pyre-strict
-"""
-Intro KubeFlow Pipelines Example
-================================
-This an introductory pipeline using KubeFlow Pipelines built with only TorchX
-components.
-TorchX is intended to allow making cross platform components. As such, we have
-a standard definition that uses adapters to convert it to the specific
-pipeline platform. This is an example of using the KFP adapter to run a TorchX
-component as part of a KubeFlow Pipeline.
-TorchX tries to leverage standard mechanisms wherever possible. For KFP we use
-the existing KFP pipeline definition syntax and add a single
-`component_from_app` conversion step to convert a TorchX component into one
-KFP can understand.
-Typically you have a separate component file but for this example we define the
-AppDef inline.
-"""
-import kfp
-from torchx import specs
-from torchx.pipelines.kfp.adapter import container_from_app
-def pipeline() -> None:
-    # First we define our AppDef for the component. AppDef is a core part of TorchX
-    # and can be used to describe complex distributed multi container apps or
-    # just a single node component like here.
-    echo_app: specs.AppDef = specs.AppDef(
-        name="examples-intro",
-        roles=[
-            specs.Role(
-                name="worker",
-                entrypoint="/bin/echo",
-                args=["Hello TorchX!"],
-                image="alpine",
-            )
-        ],
-    )
-    # To convert the TorchX AppDef into a KFP container we use
-    # the container_from_app adapter. This takes generates a KFP component
-    # definition from the TorchX app def and instantiates it into a container.
-    echo_container: kfp.dsl.ContainerOp = container_from_app(echo_app)
-# %%
-# To generate the pipeline definition file we need to call into the KFP compiler
-# with our pipeline function.
-kfp.compiler.Compiler().compile(
-    pipeline_func=pipeline,
-    package_path="pipeline.yaml",
-)
-with open("pipeline.yaml", "rt") as f:
-    print(f.read())
-# %%
-# Once this has all run you should have a pipeline file (typically
-# pipeline.yaml) that you can upload to your KFP cluster via the UI or
-# a kfp.Client.
-#
-# See the
-# `KFP SDK Examples <https://www.kubeflow.org/docs/components/pipelines/legacy-v1/tutorials/sdk-examples/#examples>`_
-# for more info on launching KFP pipelines.
-# %%
-# See the :ref:`examples_pipelines/kfp/advanced_pipeline:Advanced KubeFlow Pipelines Example` for how to chain multiple
-# components together and use builtin components.
-# sphinx_gallery_thumbnail_path = '_static/img/gallery-kfp.png'

torchx/pipelines/kfp/__init__.py DELETED Viewed

@@ -1,30 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-# pyre-strict
-"""
-This module contains adapters for converting TorchX components into KubeFlow
-Pipeline components.
-The current KFP adapters only support single node (1 role and 1 replica)
-components.
-"""
-import kfp
-from .version import __version__ as __version__  # noqa F401
-def _check_kfp_version() -> None:
-    if not kfp.__version__.startswith("1."):
-        raise ImportError(
-            f"Only kfp version 1.x.x is supported! kfp version {kfp.__version__}"
-        )
-_check_kfp_version()

torchx-nightly 2025.8.5__py3-none-any.whl → 2026.1.11__py3-none-any.whl

torchx-nightly 2025.8.5py3-none-any.whl → 2026.1.11py3-none-any.whl