PyPI - torchx-nightly - Versions diffs - 2025.9.21__py3-none-any.whl → 2025.9.23__py3-none-any.whl - Mend

torchx-nightly 2025.9.21py3-none-any.whl → 2025.9.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchx-nightly might be problematic. Click here for more details.

Files changed (11) hide show

torchx/cli/cmd_list.py CHANGED Viewed

@@ -33,8 +33,7 @@ class CmdList(SubCommand):
             type=str,
             default=get_default_scheduler_name(),
             choices=list(scheduler_names),
-            help=f"Name of the scheduler to use. One of: [{','.join(scheduler_names)}]."
-            " For listing app handles for ray scheduler, RAY_ADDRESS env variable should be set.",
+            help=f"Name of the scheduler to use. One of: [{','.join(scheduler_names)}].",
         )
     def run(self, args: argparse.Namespace) -> None:

torchx/components/integration_tests/component_provider.py CHANGED Viewed

@@ -109,7 +109,7 @@ class CopyComponentProvider(ComponentProvider):
         self._dst_path = "<None>"
     def setUp(self) -> None:
-        if self._scheduler in ["local_cwd", "ray"]:
+        if self._scheduler in ["local_cwd"]:
             fname = "torchx_copy_test.txt"
             self._src_path: str = os.path.join(tempfile.gettempdir(), fname)
             self._dst_path: str = os.path.join(tempfile.gettempdir(), f"{fname}.copy")
@@ -126,7 +126,7 @@ class CopyComponentProvider(ComponentProvider):
     def tearDown(self) -> None:
         if os.path.exists(self._dst_path):
             os.remove(self._dst_path)
-        if self._scheduler in ["local_cwd", "ray"] and os.path.exists(self._dst_path):
+        if self._scheduler in ["local_cwd"] and os.path.exists(self._dst_path):
             os.remove(self._dst_path)
     def get_app_def(self) -> AppDef:

torchx/schedulers/__init__.py CHANGED Viewed

@@ -21,7 +21,6 @@ DEFAULT_SCHEDULER_MODULES: Mapping[str, str] = {
     "kubernetes_mcad": "torchx.schedulers.kubernetes_mcad_scheduler",
     "aws_batch": "torchx.schedulers.aws_batch_scheduler",
     "aws_sagemaker": "torchx.schedulers.aws_sagemaker_scheduler",
-    "gcp_batch": "torchx.schedulers.gcp_batch_scheduler",
     "lsf": "torchx.schedulers.lsf_scheduler",
 }

torchx/util/strings.py CHANGED Viewed

@@ -13,7 +13,7 @@ def normalize_str(data: str) -> str:
     """
     Invokes ``lower`` on thes string and removes all
     characters that do not satisfy ``[a-z0-9\\-]`` pattern.
-    This method is mostly used to make sure kubernetes and gcp_batch scheduler gets
+    This method is mostly used to make sure kubernetes scheduler gets
     the job name that does not violate its restrictions.
     """
     if data.startswith("-"):

{torchx_nightly-2025.9.21.dist-info → torchx_nightly-2025.9.23.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: torchx-nightly
-Version: 2025.9.21
+Version: 2025.9.23
 Summary: TorchX SDK and Components
 Home-page: https://github.com/pytorch/torchx
 Author: TorchX Devs
@@ -22,7 +22,6 @@ Requires-Dist: pyyaml
 Requires-Dist: docker
 Requires-Dist: filelock
 Requires-Dist: fsspec>=2023.10.0
-Requires-Dist: urllib3<1.27,>=1.21.1
 Requires-Dist: tabulate
 Provides-Extra: aws_batch
 Requires-Dist: boto3; extra == "aws-batch"
@@ -36,9 +35,6 @@ Requires-Dist: kubernetes==25.3.0; extra == "dev"
 Requires-Dist: flake8==3.9.0; extra == "dev"
 Requires-Dist: fsspec==2024.3.1; extra == "dev"
 Requires-Dist: s3fs==2024.3.1; extra == "dev"
-Requires-Dist: google-cloud-batch==0.17.14; extra == "dev"
-Requires-Dist: google-cloud-logging==3.10.0; extra == "dev"
-Requires-Dist: google-cloud-runtimeconfig==0.34.0; extra == "dev"
 Requires-Dist: hydra-core; extra == "dev"
 Requires-Dist: ipython; extra == "dev"
 Requires-Dist: mlflow-skinny; extra == "dev"
@@ -61,14 +57,6 @@ Requires-Dist: ts==0.5.1; extra == "dev"
 Requires-Dist: wheel; extra == "dev"
 Requires-Dist: lintrunner; extra == "dev"
 Requires-Dist: lintrunner-adapters; extra == "dev"
-Requires-Dist: grpcio==1.62.1; extra == "dev"
-Requires-Dist: grpcio-status==1.48.1; extra == "dev"
-Requires-Dist: googleapis-common-protos==1.63.0; extra == "dev"
-Requires-Dist: google-api-core==2.18.0; extra == "dev"
-Provides-Extra: gcp_batch
-Requires-Dist: google-cloud-batch>=0.5.0; extra == "gcp-batch"
-Requires-Dist: google-cloud-logging>=3.0.0; extra == "gcp-batch"
-Requires-Dist: google-cloud-runtimeconfig>=0.33.2; extra == "gcp-batch"
 Provides-Extra: kubernetes
 Requires-Dist: kubernetes>=11; extra == "kubernetes"
@@ -93,7 +81,6 @@ TorchX currently supports:
 * AWS Batch
 * Docker
 * Local
-* GCP Batch (prototype)
 Need a scheduler not listed? [Let us know!](https://github.com/pytorch/torchx/issues?q=is%3Aopen+is%3Aissue+label%3Ascheduler-request)

{torchx_nightly-2025.9.21.dist-info → torchx_nightly-2025.9.23.dist-info}/RECORD RENAMED Viewed

@@ -14,7 +14,7 @@ torchx/cli/cmd_base.py,sha256=SdqMtqi04CEqnzcgcS35DbDbsBeMxSgEhfynfpIkMGk,790
 torchx/cli/cmd_cancel.py,sha256=NKfOCu_44Lch9vliGSQ0Uv6BVqpUqj7Tob652TI-ua4,835
 torchx/cli/cmd_configure.py,sha256=1kTv0qbsbV44So74plAySwWu56pQrqjhfW_kbfdC3Rw,1722
 torchx/cli/cmd_describe.py,sha256=E5disbHoKTsqYKp2s3DaFW9GDLCCOgdOc3pQoHKoyCs,1283
-torchx/cli/cmd_list.py,sha256=4Y1ZOq-kqJbztoBt56hAW_InJEaJuDAjpKWgMhBw4II,1507
+torchx/cli/cmd_list.py,sha256=alkS9aIaDI8lX3W8uj8Vtr3IU3G2VeCuokKSd3zOFug,1409
 torchx/cli/cmd_log.py,sha256=v-EZYUDOcG95rEgTnrsmPJMUyxM9Mk8YFAJtUxtgViE,5475
 torchx/cli/cmd_run.py,sha256=TshvEMTxMRj5O0KhetzHepZUaAFq8R5nFgY8GC_Gl6g,18576
 torchx/cli/cmd_runopts.py,sha256=NWZiP8XpQjfTDJgays2c6MgL_8wxFoeDge6NstaZdKk,1302
@@ -32,7 +32,7 @@ torchx/components/structured_arg.py,sha256=8jMcd0rtUmzCKEQKJ_JYzxSkMMK9q0fYjkwAs
 torchx/components/train.py,sha256=vtrQXRcD7bIcbb3lSeyD9BBlIe1mv1WNW6rnLK9R0Mw,1259
 torchx/components/utils.py,sha256=QRBxBm1OnNhOhpPs0lKdbJ8_mNhWYMklY6cl1gPIw9A,9363
 torchx/components/integration_tests/__init__.py,sha256=Md3cCHD7Ano9kV15PqGbicgUO-RMdh4aVy1yKiDt_xE,208
-torchx/components/integration_tests/component_provider.py,sha256=cFNGqmclcZTJlOW_YGf5XEuGeWloTmcJEAh02Aob_PQ,3995
+torchx/components/integration_tests/component_provider.py,sha256=g-4ig1vtd5Vzgug0VAKRAFUt6KAV3TgQrBCrwRSJ7ZY,3981
 torchx/components/integration_tests/integ_tests.py,sha256=O8jd8Jq5O0mns7xzIFsHexBDHkIIAIfELQkWCzNPzRw,5165
 torchx/distributed/__init__.py,sha256=lobebigfujmRTe_SJw07_a9iohBxDhq2iiPsV1YcKjw,10247
 torchx/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -56,13 +56,12 @@ torchx/runner/events/handlers.py,sha256=ThHCIJW21BfBgB7b6ftyjASJmD1KdizpjuTtsyqn
 torchx/runtime/__init__.py,sha256=Wxje2BryzeQneFu5r6P9JJiEKG-_C9W1CcZ_JNrKT6g,593
 torchx/runtime/tracking/__init__.py,sha256=dYnAPnrXYREfPXkpHhdOFkcYIODWEbA13PdD-wLQYBo,3055
 torchx/runtime/tracking/api.py,sha256=SmUQyUKZqG3KlAhT7CJOGqRz1O274E4m63wQeOVq3CU,5472
-torchx/schedulers/__init__.py,sha256=hliMsZHZNOKue0uTHUWxvO0V7xsKApBxN4Wb_9L0Mz4,2253
+torchx/schedulers/__init__.py,sha256=_Wx6-X3FNh8RJR82UGgUwKg7V_VQYsAkrveDoSSk2xU,2195
 torchx/schedulers/api.py,sha256=lfxNhrEO6eYYqVuQzzj9sTXrZShuZkyYxJ1jPE-Lvpo,14561
 torchx/schedulers/aws_batch_scheduler.py,sha256=hFxYzSZEK2SVS5sEyQC5YvNI0JJUJUQsWORlYpj_h3M,28105
 torchx/schedulers/aws_sagemaker_scheduler.py,sha256=flN8GumKE2Dz4X_foAt6Jnvt-ZVojWs6pcyrHwB0hz0,20921
 torchx/schedulers/devices.py,sha256=RjVcu22ZRl_9OKtOtmA1A3vNXgu2qD6A9ST0L0Hsg4I,1734
 torchx/schedulers/docker_scheduler.py,sha256=xuK00-dB6o8TV1YaZox7O5P09LHB2KeQ6t4eiNtqMYQ,16781
-torchx/schedulers/gcp_batch_scheduler.py,sha256=JQuaEJVL_7NSa9AeUc_0Qo74XZNJk_kp6XwgunvlUKI,16281
 torchx/schedulers/ids.py,sha256=3E-_vwVYC-8Tv8kjuY9-W7TbOe_-Laqd8a65uIN3hQY,1798
 torchx/schedulers/kubernetes_mcad_scheduler.py,sha256=1tuzq3OutCMdSPqg_dNmCHt_wyuSFKG0-ywLc3qITJo,42949
 torchx/schedulers/kubernetes_scheduler.py,sha256=0_loGJ7WnxEr9dhgFt3Gw-7nVLirMDVN-MAFTCq7erE,28217
@@ -97,15 +96,15 @@ torchx/util/log_tee_helpers.py,sha256=wPyozmh9BOt_2d3Gxa0iNogwnjzwFitIIMBJOJ1arI
 torchx/util/modules.py,sha256=o4y_d07gTpJ4nIVBcoUVJ0JtXIHEsEC5kbgBM6NGpgA,2135
 torchx/util/session.py,sha256=r6M_nyzXgcbk1GgYGZ324F_ehRGCqjjdVk4YgKxMj8M,1214
 torchx/util/shlex.py,sha256=eXEKu8KC3zIcd8tEy9_s8Ds5oma8BORr-0VGWNpG2dk,463
-torchx/util/strings.py,sha256=GkLWCmYS89Uv6bWc5hH0XwvHy7oQmprv2U7axC4A2e8,678
+torchx/util/strings.py,sha256=7Ef1loz2IYMrzeJ6Lewywi5cBIc3X3g7lSPbT1Tn_z4,664
 torchx/util/types.py,sha256=E9dxAWQnsJkIDuHtg-poeOJ4etucSI_xP_Z5kNJX8uI,9229
 torchx/workspace/__init__.py,sha256=cZsKVvUWwDYcGhe6SCXQGBQfbk_yTnKEImOkI6xmu30,809
 torchx/workspace/api.py,sha256=Ct_75VU94fsH9Rf1WRe-wJGpVgl5O05S_Dq_t2ArJWA,11348
 torchx/workspace/dir_workspace.py,sha256=npNW_IjUZm_yS5r-8hrRkH46ndDd9a_eApT64m1S1T4,2268
 torchx/workspace/docker_workspace.py,sha256=PFu2KQNVC-0p2aKJ-W_BKA9ZOmXdCY2ABEkCExp3udQ,10269
-torchx_nightly-2025.9.21.dist-info/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
-torchx_nightly-2025.9.21.dist-info/METADATA,sha256=0KvbX8m2uQZVgOBL_JiKB8nVyXDWBCVFRAypM61NWQU,5693
-torchx_nightly-2025.9.21.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-torchx_nightly-2025.9.21.dist-info/entry_points.txt,sha256=T328AMXeKI3JZnnxfkEew2ZcMN1oQDtkXjMz7lkV-P4,169
-torchx_nightly-2025.9.21.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
-torchx_nightly-2025.9.21.dist-info/RECORD,,
+torchx_nightly-2025.9.23.dist-info/LICENSE,sha256=WVHfXhFC0Ia8LTKt_nJVYobdqTJVg_4J3Crrfm2A8KQ,1721
+torchx_nightly-2025.9.23.dist-info/METADATA,sha256=Dsh27u65MAyHzYM5eRSPfYEQZmnh9qfsvs1_0vWkhCo,5003
+torchx_nightly-2025.9.23.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+torchx_nightly-2025.9.23.dist-info/entry_points.txt,sha256=T328AMXeKI3JZnnxfkEew2ZcMN1oQDtkXjMz7lkV-P4,169
+torchx_nightly-2025.9.23.dist-info/top_level.txt,sha256=pxew3bc2gsiViS0zADs0jb6kC5v8o_Yy_85fhHj_J1A,7
+torchx_nightly-2025.9.23.dist-info/RECORD,,

torchx/schedulers/gcp_batch_scheduler.py DELETED Viewed

@@ -1,497 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-# pyre-strict
-"""
-This contains the TorchX GCP Batch scheduler which can be used to run TorchX
-components directly on GCP Batch.
-This scheduler is in prototype stage and may change without notice.
-Prerequisites
-==============
-You need to have a GCP project configured to use Batch by enabling and setting it up.
-See https://cloud.google.com/batch/docs/get-started#prerequisites
-"""
-from dataclasses import dataclass
-from datetime import datetime
-from typing import Any, Dict, Iterable, List, Optional, TYPE_CHECKING, TypedDict
-import torchx
-import yaml
-from torchx.schedulers.api import (
-    DescribeAppResponse,
-    ListAppResponse,
-    Scheduler,
-    Stream,
-)
-from torchx.schedulers.ids import make_unique
-from torchx.specs.api import (
-    AppDef,
-    AppDryRunInfo,
-    AppState,
-    macros,
-    Resource,
-    Role,
-    runopts,
-)
-from torchx.util.strings import normalize_str
-if TYPE_CHECKING:
-    from google.cloud import batch_v1
-JOB_STATE: Dict[str, AppState] = {
-    "STATE_UNSPECIFIED": AppState.UNKNOWN,
-    "QUEUED": AppState.SUBMITTED,
-    "SCHEDULED": AppState.PENDING,
-    "RUNNING": AppState.RUNNING,
-    "SUCCEEDED": AppState.SUCCEEDED,
-    "FAILED": AppState.FAILED,
-    "DELETION_IN_PROGRESS": AppState.UNKNOWN,
-}
-GPU_COUNT_TO_TYPE: Dict[int, str] = {
-    1: "a2-highgpu-1g",
-    2: "a2-highgpu-2g",
-    4: "a2-highgpu-4g",
-    8: "a2-highgpu-8g",
-    16: "a2-highgpu-16g",
-}
-GPU_TYPE_TO_COUNT: Dict[str, int] = {v: k for k, v in GPU_COUNT_TO_TYPE.items()}
-LABEL_VERSION: str = "torchx_version"
-LABEL_APP_NAME: str = "torchx_app_name"
-DEFAULT_LOC: str = "us-central1"
-# TODO Remove LOCATIONS list once Batch supports all locations
-# or when there is an API to query locations supported by Batch
-LOCATIONS: List[str] = [
-    DEFAULT_LOC,
-    "us-west1",
-    "us-east1",
-    "asia-southeast1",
-    "europe-north1",
-    "europe-west6",
-]
-BATCH_LOGGER_NAME = "batch_task_logs"
-@dataclass
-class GCPBatchJob:
-    name: str
-    project: str
-    location: str
-    job_def: "batch_v1.Job"
-    def __str__(self) -> str:
-        return yaml.dump(self.job_def)
-    def __repr__(self) -> str:
-        return str(self)
-class GCPBatchOpts(TypedDict, total=False):
-    project: Optional[str]
-    location: Optional[str]
-class GCPBatchScheduler(Scheduler[GCPBatchOpts, AppDef, AppDryRunInfo[GCPBatchJob]]):
-    """
-    GCPBatchScheduler is a TorchX scheduling interface to GCP Batch.
-    .. code-block:: bash
-        $ pip install torchx[gcp_batch]
-        $ torchx run --scheduler gcp_batch utils.echo --msg hello
-        # This launches a job with app handle like gcp_batch://torchx/project:location:app_id1234 and prints it
-        $ torchx status gcp_batch://torchx/project:location:app_id1234
-        ...
-    Authentication is loaded from the environment using the gcloud credential handling.
-    **Config Options**
-    .. runopts::
-        class: torchx.schedulers.gcp_batch_scheduler.create_scheduler
-    **Compatibility**
-    .. compatibility::
-        type: scheduler
-        features:
-            cancel: true
-            logs: true
-            describe: true
-            distributed: true
-            workspaces: false
-            mounts: false
-            elasticity: false
-    """
-    def __init__(
-        self,
-        session_name: str,
-        # pyre-fixme[2]: Parameter annotation cannot be `Any`.
-        client: Optional[Any] = None,
-    ) -> None:
-        # NOTE: make sure any new init options are supported in create_scheduler(...)
-        Scheduler.__init__(self, "gcp_batch", session_name)
-        # pyre-fixme[4]: Attribute annotation cannot be `Any`.
-        self.__client = client
-    @property
-    # pyre-fixme[3]: Return annotation cannot be `Any`.
-    def _client(self) -> Any:
-        from google.api_core import gapic_v1
-        from google.cloud import batch_v1
-        c = self.__client
-        if c is None:
-            client_info = gapic_v1.client_info.ClientInfo(
-                user_agent=f"TorchX/{torchx.__version__}"
-            )
-            c = self.__client = batch_v1.BatchServiceClient(client_info=client_info)
-        return c
-    def schedule(self, dryrun_info: AppDryRunInfo[GCPBatchJob]) -> str:
-        from google.cloud import batch_v1
-        req = dryrun_info.request
-        assert req is not None, f"{dryrun_info} missing request"
-        request = batch_v1.CreateJobRequest(
-            parent=f"projects/{req.project}/locations/{req.location}",
-            job=req.job_def,
-            job_id=req.name,
-        )
-        response = self._client.create_job(request=request)
-        return f"{req.project}:{req.location}:{req.name}"
-    def _app_to_job(self, app: AppDef) -> "batch_v1.Job":
-        from google.cloud import batch_v1
-        name = normalize_str(make_unique(app.name))
-        taskGroups = []
-        allocationPolicy = None
-        # 1. Convert role to task
-        # TODO implement retry_policy, mount conversion
-        # NOTE: Supports only one role for now as GCP Batch supports only one TaskGroup
-        # which is ok to start with as most components have only one role
-        for role_idx, role in enumerate(app.roles):
-            values = macros.Values(
-                img_root="",
-                app_id=name,
-                replica_id=str(0),
-                rank0_env=("BATCH_MAIN_NODE_HOSTNAME"),
-            )
-            role_dict = values.apply(role)
-            role_dict.env["TORCHX_ROLE_IDX"] = str(role_idx)
-            role_dict.env["TORCHX_ROLE_NAME"] = str(role.name)
-            resource = role_dict.resource
-            res = batch_v1.ComputeResource()
-            cpu = resource.cpu
-            if cpu <= 0:
-                cpu = 1
-            MILLI = 1000
-            res.cpu_milli = cpu * MILLI
-            memMB = resource.memMB
-            if memMB < 0:
-                raise ValueError(
-                    f"memMB should to be set to a positive value, got {memMB}"
-                )
-            res.memory_mib = memMB
-            # TODO support named resources
-            # Using v100 as default GPU type as a100 does not allow changing count for now
-            # TODO See if there is a better default GPU type
-            if resource.gpu > 0:
-                if resource.gpu not in GPU_COUNT_TO_TYPE:
-                    raise ValueError(
-                        f"gpu should to be set to one of these values: {GPU_COUNT_TO_TYPE.keys()}"
-                    )
-                machineType = GPU_COUNT_TO_TYPE[resource.gpu]
-                allocationPolicy = batch_v1.AllocationPolicy(
-                    instances=[
-                        batch_v1.AllocationPolicy.InstancePolicyOrTemplate(
-                            install_gpu_drivers=True,
-                            policy=batch_v1.AllocationPolicy.InstancePolicy(
-                                machine_type=machineType,
-                            ),
-                        )
-                    ],
-                )
-                print(f"Using GPUs of type: {machineType}")
-            # Configure host firewall rules to accept ingress communication
-            config_network_runnable = batch_v1.Runnable(
-                script=batch_v1.Runnable.Script(
-                    text="/sbin/iptables -A INPUT -j ACCEPT"
-                )
-            )
-            runnable = batch_v1.Runnable(
-                container=batch_v1.Runnable.Container(
-                    image_uri=role_dict.image,
-                    commands=[role_dict.entrypoint] + role_dict.args,
-                    entrypoint="",
-                    # Configure docker to use the host network stack to communicate with containers/other hosts in the same network
-                    options="--net host",
-                )
-            )
-            ts = batch_v1.TaskSpec(
-                runnables=[config_network_runnable, runnable],
-                environment=batch_v1.Environment(variables=role_dict.env),
-                max_retry_count=role_dict.max_retries,
-                compute_resource=res,
-            )
-            task_env = [
-                batch_v1.Environment(variables={"TORCHX_REPLICA_IDX": str(i)})
-                for i in range(role_dict.num_replicas)
-            ]
-            tg = batch_v1.TaskGroup(
-                task_spec=ts,
-                task_count=role_dict.num_replicas,
-                task_count_per_node=1,
-                task_environments=task_env,
-                require_hosts_file=True,
-            )
-            taskGroups.append(tg)
-        # 2. Convert AppDef to Job
-        job = batch_v1.Job(
-            name=name,
-            task_groups=taskGroups,
-            allocation_policy=allocationPolicy,
-            logs_policy=batch_v1.LogsPolicy(
-                destination=batch_v1.LogsPolicy.Destination.CLOUD_LOGGING,
-            ),
-            # NOTE: GCP Batch does not allow label names with "."
-            labels={
-                LABEL_VERSION: torchx.__version__.replace(".", "-"),
-                LABEL_APP_NAME: name,
-            },
-        )
-        return job
-    def _get_project(self) -> str:
-        from google.cloud import runtimeconfig
-        return runtimeconfig.Client().project
-    def _submit_dryrun(
-        self, app: AppDef, cfg: GCPBatchOpts
-    ) -> AppDryRunInfo[GCPBatchJob]:
-        proj = cfg.get("project")
-        if proj is None:
-            proj = self._get_project()
-        assert proj is not None and isinstance(proj, str), "project must be a str"
-        loc = cfg.get("location")
-        assert loc is not None and isinstance(loc, str), "location must be a str"
-        job = self._app_to_job(app)
-        # Convert JobDef + BatchOpts to GCPBatchJob
-        req = GCPBatchJob(
-            name=str(job.name),
-            project=proj,
-            location=loc,
-            job_def=job,
-        )
-        return AppDryRunInfo(req, repr)
-    def run_opts(self) -> runopts:
-        opts = runopts()
-        opts.add(
-            "project",
-            type_=str,
-            help="Name of the GCP project. Defaults to the configured GCP project in the environment",
-        )
-        opts.add(
-            "location",
-            type_=str,
-            default=DEFAULT_LOC,
-            help=f"Name of the location to schedule the job in. Defaults to {DEFAULT_LOC}",
-        )
-        return opts
-    def _app_id_to_job_full_name(self, app_id: str) -> str:
-        """
-        app_id format: f"{project}:{location}:{name}"
-        job_full_name format: f"projects/{project}/locations/{location}/jobs/{name}"
-        where 'name' was created uniquely for the job from the app name
-        """
-        app_id_splits = app_id.split(":")
-        if len(app_id_splits) != 3:
-            raise ValueError(f"app_id not in expected format: {app_id}")
-        return f"projects/{app_id_splits[0]}/locations/{app_id_splits[1]}/jobs/{app_id_splits[2]}"
-    def _get_job(self, app_id: str) -> "batch_v1.Job":
-        from google.cloud import batch_v1
-        job_name = self._app_id_to_job_full_name(app_id)
-        request = batch_v1.GetJobRequest(
-            name=job_name,
-        )
-        return self._client.get_job(request=request)
-    def describe(self, app_id: str) -> Optional[DescribeAppResponse]:
-        job = self._get_job(app_id)
-        if job is None:
-            print(f"app not found: {app_id}")
-            return None
-        gpu = 0
-        if len(job.allocation_policy.instances) != 0:
-            gpu_type = job.allocation_policy.instances[0].policy.machine_type
-            gpu = GPU_TYPE_TO_COUNT[gpu_type]
-        roles = {}
-        for tg in job.task_groups:
-            env = tg.task_spec.environment.variables
-            role = env["TORCHX_ROLE_NAME"]
-            container = tg.task_spec.runnables[1].container
-            roles[role] = Role(
-                name=role,
-                num_replicas=tg.task_count,
-                image=container.image_uri,
-                entrypoint=container.commands[0],
-                args=list(container.commands[1:]),
-                resource=Resource(
-                    cpu=int(tg.task_spec.compute_resource.cpu_milli / 1000),
-                    memMB=tg.task_spec.compute_resource.memory_mib,
-                    gpu=gpu,
-                ),
-                env=dict(env),
-                max_retries=tg.task_spec.max_retry_count,
-            )
-        # Map job -> DescribeAppResponse
-        # TODO map role/replica status
-        desc = DescribeAppResponse(
-            app_id=app_id,
-            state=JOB_STATE[job.status.state.name],
-            roles=list(roles.values()),
-        )
-        return desc
-    def log_iter(
-        self,
-        app_id: str,
-        role_name: str = "",
-        k: int = 0,
-        regex: Optional[str] = None,
-        since: Optional[datetime] = None,
-        until: Optional[datetime] = None,
-        should_tail: bool = False,
-        streams: Optional[Stream] = None,
-    ) -> Iterable[str]:
-        if streams not in (None, Stream.COMBINED):
-            raise ValueError("GCPBatchScheduler only supports COMBINED log stream")
-        job = self._get_job(app_id)
-        if not job:
-            raise ValueError(f"app not found: {app_id}")
-        job_uid = job.uid
-        filters = [
-            f"labels.job_uid={job_uid}",
-            f"labels.task_id:{job_uid}-group0-{k}",
-        ]
-        if since is not None:
-            filters.append(f'timestamp>="{str(since.isoformat())}"')
-        else:
-            # gcloud logger.list by default only returns logs in the last 24 hours
-            # Since many ML jobs can run longer add timestamp filter to get all logs
-            filters.append(f'timestamp>="{str(datetime.fromtimestamp(0).isoformat())}"')
-        if until is not None:
-            filters.append(f'timestamp<="{str(until.isoformat())}"')
-        if regex is not None:
-            filters.append(f'textPayload =~ "{regex}"')
-        filter = " AND ".join(filters)
-        return self._batch_log_iter(filter)
-    def _batch_log_iter(self, filter: str) -> Iterable[str]:
-        from google.cloud import logging
-        logger = logging.Client().logger(BATCH_LOGGER_NAME)
-        for entry in logger.list_entries(filter_=filter):
-            yield entry.payload + "\n"
-    def _job_full_name_to_app_id(self, job_full_name: str) -> str:
-        """
-        job_full_name format: f"projects/{project}/locations/{location}/jobs/{name}"
-        app_id format: f"{project}:{location}:{name}"
-        where 'name' was created uniquely for the job from the app name
-        """
-        job_name_splits = job_full_name.split("/")
-        if len(job_name_splits) != 6:
-            raise ValueError(f"job full name not in expected format: {job_full_name}")
-        return f"{job_name_splits[1]}:{job_name_splits[3]}:{job_name_splits[5]}"
-    def list(self) -> List[ListAppResponse]:
-        all_jobs = []
-        proj = self._get_project()
-        for loc in LOCATIONS:
-            jobs = self._client.list_jobs(parent=f"projects/{proj}/locations/{loc}")
-            all_jobs += jobs
-        all_jobs.sort(key=lambda job: job.create_time.timestamp(), reverse=True)
-        return [
-            ListAppResponse(
-                app_id=self._job_full_name_to_app_id(job.name),
-                state=JOB_STATE[job.status.state.name],
-            )
-            for job in all_jobs
-        ]
-    def _validate(self, app: AppDef, scheduler: str, cfg: GCPBatchOpts) -> None:
-        # Skip validation step
-        pass
-    def _cancel_existing(self, app_id: str) -> None:
-        from google.cloud import batch_v1
-        job_name = self._app_id_to_job_full_name(app_id)
-        request = batch_v1.DeleteJobRequest(
-            name=job_name,
-            reason="Killed via TorchX",
-        )
-        self._client.delete_job(request=request)
-def create_scheduler(
-    session_name: str,
-    # pyre-fixme[2]: Parameter annotation cannot be `Any`.
-    client: Optional[Any] = None,
-    **kwargs: object,
-) -> GCPBatchScheduler:
-    return GCPBatchScheduler(
-        session_name=session_name,
-        client=client,
-    )

{torchx_nightly-2025.9.21.dist-info → torchx_nightly-2025.9.23.dist-info}/LICENSE RENAMED Viewed

File without changes

{torchx_nightly-2025.9.21.dist-info → torchx_nightly-2025.9.23.dist-info}/WHEEL RENAMED Viewed

File without changes

{torchx_nightly-2025.9.21.dist-info → torchx_nightly-2025.9.23.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{torchx_nightly-2025.9.21.dist-info → torchx_nightly-2025.9.23.dist-info}/top_level.txt RENAMED Viewed

File without changes

torchx-nightly 2025.9.21__py3-none-any.whl → 2025.9.23__py3-none-any.whl

Potentially problematic release.

torchx-nightly 2025.9.21py3-none-any.whl → 2025.9.23py3-none-any.whl