PyPI - tetra-rp - Versions diffs - 0.5.5__py3-none-any.whl - Mend

tetra-rp 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

tetra_rp/__init__.py +37 -0
tetra_rp/client.py +59 -0
tetra_rp/core/__init__.py +0 -0
tetra_rp/core/api/__init__.py +5 -0
tetra_rp/core/api/runpod.py +212 -0
tetra_rp/core/pool/__init__.py +0 -0
tetra_rp/core/pool/cluster_manager.py +177 -0
tetra_rp/core/pool/dataclass.py +18 -0
tetra_rp/core/pool/ex.py +38 -0
tetra_rp/core/pool/job.py +22 -0
tetra_rp/core/pool/worker.py +19 -0
tetra_rp/core/resources/__init__.py +33 -0
tetra_rp/core/resources/base.py +47 -0
tetra_rp/core/resources/cloud.py +4 -0
tetra_rp/core/resources/cpu.py +34 -0
tetra_rp/core/resources/environment.py +41 -0
tetra_rp/core/resources/gpu.py +53 -0
tetra_rp/core/resources/live_serverless.py +32 -0
tetra_rp/core/resources/resource_manager.py +80 -0
tetra_rp/core/resources/serverless.py +476 -0
tetra_rp/core/resources/template.py +94 -0
tetra_rp/core/resources/utils.py +50 -0
tetra_rp/core/utils/__init__.py +0 -0
tetra_rp/core/utils/backoff.py +43 -0
tetra_rp/core/utils/json.py +33 -0
tetra_rp/core/utils/singleton.py +7 -0
tetra_rp/logger.py +34 -0
tetra_rp/protos/__init__.py +0 -0
tetra_rp/protos/remote_execution.py +57 -0
tetra_rp/stubs/__init__.py +5 -0
tetra_rp/stubs/live_serverless.py +133 -0
tetra_rp/stubs/registry.py +85 -0
tetra_rp/stubs/serverless.py +30 -0
tetra_rp-0.5.5.dist-info/METADATA +806 -0
tetra_rp-0.5.5.dist-info/RECORD +37 -0
tetra_rp-0.5.5.dist-info/WHEEL +5 -0
tetra_rp-0.5.5.dist-info/top_level.txt +1 -0

tetra_rp/core/resources/serverless.py ADDED Viewed

@@ -0,0 +1,476 @@
+import asyncio
+import logging
+import os
+from typing import Any, Dict, List, Optional
+from enum import Enum
+from pydantic import (
+    field_serializer,
+    field_validator,
+    model_validator,
+    BaseModel,
+    Field,
+)
+from runpod.endpoint.runner import Job
+from ..api.runpod import RunpodGraphQLClient
+from ..utils.backoff import get_backoff_delay
+from .cloud import runpod
+from .base import DeployableResource
+from .template import PodTemplate, KeyValuePair
+from .gpu import GpuGroup
+from .cpu import CpuInstanceType
+from .environment import EnvironmentVars
+# Environment variables are loaded from the .env file
+def get_env_vars() -> Dict[str, str]:
+    """
+    Returns the environment variables from the .env file.
+    {
+        "KEY": "VALUE",
+    }
+    """
+    env_vars = EnvironmentVars()
+    return env_vars.get_env()
+log = logging.getLogger(__name__)
+CONSOLE_BASE_URL = os.environ.get("CONSOLE_BASE_URL", "https://console.runpod.io")
+CONSOLE_URL = f"{CONSOLE_BASE_URL}/serverless/user/endpoint/%s"
+class ServerlessScalerType(Enum):
+    QUEUE_DELAY = "QUEUE_DELAY"
+    REQUEST_COUNT = "REQUEST_COUNT"
+class CudaVersion(Enum):
+    V11_8 = "11.8"
+    V12_0 = "12.0"
+    V12_1 = "12.1"
+    V12_2 = "12.2"
+    V12_3 = "12.3"
+    V12_4 = "12.4"
+    V12_5 = "12.5"
+    V12_6 = "12.6"
+    V12_7 = "12.7"
+    V12_8 = "12.8"
+class ServerlessResource(DeployableResource):
+    """
+    Base class for GPU serverless resource
+    """
+    _input_only = {"id", "cudaVersions", "env", "gpus", "flashboot", "imageName"}
+    # === Input-only Fields ===
+    cudaVersions: Optional[List[CudaVersion]] = []  # for allowedCudaVersions
+    env: Optional[Dict[str, str]] = Field(default_factory=get_env_vars)
+    flashboot: Optional[bool] = True
+    gpus: Optional[List[GpuGroup]] = [GpuGroup.ANY]  # for gpuIds
+    imageName: Optional[str] = ""  # for template.imageName
+    # === Input Fields ===
+    executionTimeoutMs: Optional[int] = None
+    gpuCount: Optional[int] = 1
+    idleTimeout: Optional[int] = 5
+    instanceIds: Optional[List[CpuInstanceType]] = None
+    locations: Optional[str] = None
+    name: str
+    networkVolumeId: Optional[str] = None
+    scalerType: Optional[ServerlessScalerType] = ServerlessScalerType.QUEUE_DELAY
+    scalerValue: Optional[int] = 4
+    templateId: Optional[str] = None
+    workersMax: Optional[int] = 3
+    workersMin: Optional[int] = 0
+    workersPFBTarget: Optional[int] = None
+    # === Runtime Fields ===
+    activeBuildid: Optional[str] = None
+    aiKey: Optional[str] = None
+    allowedCudaVersions: Optional[str] = None
+    computeType: Optional[str] = None
+    createdAt: Optional[str] = None  # TODO: use datetime
+    gpuIds: Optional[str] = ""
+    hubRelease: Optional[str] = None
+    repo: Optional[str] = None
+    template: Optional[PodTemplate] = None
+    userId: Optional[str] = None
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}:{self.id}"
+    @property
+    def url(self) -> str:
+        if not self.id:
+            raise ValueError("Missing self.id")
+        return CONSOLE_URL % self.id
+    @property
+    def endpoint(self) -> runpod.Endpoint:
+        """
+        Returns the Runpod endpoint object for this serverless resource.
+        """
+        if not self.id:
+            raise ValueError("Missing self.id")
+        return runpod.Endpoint(self.id)
+    @field_serializer("scalerType")
+    def serialize_scaler_type(
+        self, value: Optional[ServerlessScalerType]
+    ) -> Optional[str]:
+        """Convert ServerlessScalerType enum to string."""
+        return value.value if value is not None else None
+    @field_serializer("instanceIds")
+    def serialize_instance_ids(self, value: List[CpuInstanceType]) -> List[str]:
+        """Convert CpuInstanceType enums to strings."""
+        return [item.value if hasattr(item, "value") else str(item) for item in value]
+    @field_validator("gpus")
+    @classmethod
+    def validate_gpus(cls, value: List[GpuGroup]) -> List[GpuGroup]:
+        """Expand ANY to all GPU groups"""
+        if value == [GpuGroup.ANY]:
+            return GpuGroup.all()
+        return value
+    @model_validator(mode="after")
+    def sync_input_fields(self):
+        """Sync between temporary inputs and exported fields"""
+        if self.flashboot:
+            self.name += "-fb"
+        if self.instanceIds:
+            return self._sync_input_fields_cpu()
+        else:
+            return self._sync_input_fields_gpu()
+    def _sync_input_fields_gpu(self):
+        # GPU-specific fields
+        if self.gpus:
+            # Convert gpus list to gpuIds string
+            self.gpuIds = ",".join(gpu.value for gpu in self.gpus)
+        elif self.gpuIds:
+            # Convert gpuIds string to gpus list (from backend responses)
+            gpu_values = [v.strip() for v in self.gpuIds.split(",") if v.strip()]
+            self.gpus = [GpuGroup(value) for value in gpu_values]
+        if self.cudaVersions:
+            # Convert cudaVersions list to allowedCudaVersions string
+            self.allowedCudaVersions = ",".join(v.value for v in self.cudaVersions)
+        elif self.allowedCudaVersions:
+            # Convert allowedCudaVersions string to cudaVersions list (from backend responses)
+            version_values = [
+                v.strip() for v in self.allowedCudaVersions.split(",") if v.strip()
+            ]
+            self.cudaVersions = [CudaVersion(value) for value in version_values]
+        return self
+    def _sync_input_fields_cpu(self):
+        # Override GPU-specific fields for CPU
+        self.gpuCount = 0
+        self.allowedCudaVersions = ""
+        self.gpuIds = ""
+        return self
+    def is_deployed(self) -> bool:
+        """
+        Checks if the serverless resource is deployed and available.
+        """
+        try:
+            if not self.id:
+                return False
+            response = self.endpoint.health()
+            return response is not None
+        except Exception as e:
+            log.error(f"Error checking {self}: {e}")
+            return False
+    async def deploy(self) -> "DeployableResource":
+        """
+        Deploys the serverless resource using the provided configuration.
+        Returns a DeployableResource object.
+        """
+        try:
+            # If the resource is already deployed, return it
+            if self.is_deployed():
+                log.debug(f"{self} exists")
+                return self
+            async with RunpodGraphQLClient() as client:
+                payload = self.model_dump(exclude=self._input_only, exclude_none=True)
+                result = await client.create_endpoint(payload)
+            if endpoint := self.__class__(**result):
+                return endpoint
+            raise ValueError("Deployment failed, no endpoint was returned.")
+        except Exception as e:
+            log.error(f"{self} failed to deploy: {e}")
+            raise
+    async def is_ready_for_requests(self, give_up_threshold=10) -> bool:
+        """
+        Asynchronously checks if the serverless resource is ready to handle
+        requests by polling its health endpoint.
+        Args:
+            give_up_threshold (int, optional): The maximum number of polling
+            attempts before giving up and raising an error. Defaults to 10.
+        Returns:
+            bool: True if the serverless resource is ready for requests.
+        Raises:
+            ValueError: If the serverless resource is not deployed.
+            RuntimeError: If the health status is THROTTLED, UNHEALTHY, or UNKNOWN
+            after exceeding the give_up_threshold.
+        """
+        if not self.is_deployed():
+            raise ValueError("Serverless is not deployed")
+        log.debug(f"{self} | API /health")
+        current_pace = 0
+        attempt = 0
+        # Poll for health status
+        while True:
+            await asyncio.sleep(current_pace)
+            health = await asyncio.to_thread(self.endpoint.health)
+            health = ServerlessHealth(**health)
+            if health.is_ready:
+                return True
+            else:
+                # nothing changed, increase the gap
+                attempt += 1
+                indicator = "." * (attempt // 2) if attempt % 2 == 0 else ""
+                if indicator:
+                    log.info(f"{self} | {indicator}")
+                status = health.workers.status
+                if status in [
+                    Status.THROTTLED,
+                    Status.UNHEALTHY,
+                    Status.UNKNOWN,
+                ]:
+                    log.debug(f"{self} | Health {status.value}")
+                    if attempt >= give_up_threshold:
+                        # Give up
+                        raise RuntimeError(f"Health {status.value}")
+            # Adjust polling pace appropriately
+            current_pace = get_backoff_delay(attempt)
+    async def run_sync(self, payload: Dict[str, Any]) -> "JobOutput":
+        """
+        Executes a serverless endpoint request with the payload.
+        Returns a JobOutput object.
+        """
+        if not self.id:
+            raise ValueError("Serverless is not deployed")
+        def _fetch_job():
+            return self.endpoint.rp_client.post(
+                f"{self.id}/runsync", payload, timeout=60
+            )
+        try:
+            # log.debug(f"[{log_group}] Payload: {payload}")
+            # Poll until requests can be sent
+            await self.is_ready_for_requests()
+            log.info(f"{self} | API /run_sync")
+            response = await asyncio.to_thread(_fetch_job)
+            return JobOutput(**response)
+        except Exception as e:
+            health = await asyncio.to_thread(self.endpoint.health)
+            health = ServerlessHealth(**health)
+            log.info(f"{self} | Health {health.workers.status}")
+            log.error(f"{self} | Exception: {e}")
+            raise
+    async def run(self, payload: Dict[str, Any]) -> "JobOutput":
+        """
+        Executes a serverless endpoint async request with the payload.
+        Returns a JobOutput object.
+        """
+        if not self.id:
+            raise ValueError("Serverless is not deployed")
+        job: Optional[Job] = None
+        try:
+            # log.debug(f"[{self}] Payload: {payload}")
+            # Poll until requests can be sent
+            await self.is_ready_for_requests()
+            # Create a job using the endpoint
+            log.info(f"{self} | API /run")
+            job = await asyncio.to_thread(self.endpoint.run, request_input=payload)
+            log_subgroup = f"Job:{job.job_id}"
+            log.info(f"{self} | Started {log_subgroup}")
+            current_pace = 0
+            attempt = 0
+            job_status = Status.UNKNOWN
+            last_status = job_status
+            # Poll for job status
+            while True:
+                await asyncio.sleep(current_pace)
+                if await self.is_ready_for_requests():
+                    # Check job status
+                    job_status = await asyncio.to_thread(job.status)
+                if last_status == job_status:
+                    # nothing changed, increase the gap
+                    attempt += 1
+                    indicator = "." * (attempt // 2) if attempt % 2 == 0 else ""
+                    if indicator:
+                        log.info(f"{log_subgroup} | {indicator}")
+                else:
+                    # status changed, reset the gap
+                    log.info(f"{log_subgroup} | Status: {job_status}")
+                    attempt = 0
+                last_status = job_status
+                # Adjust polling pace appropriately
+                current_pace = get_backoff_delay(attempt)
+                if job_status in ("COMPLETED", "FAILED", "CANCELLED"):
+                    response = await asyncio.to_thread(job._fetch_job)
+                    return JobOutput(**response)
+        except Exception as e:
+            if job and job.job_id:
+                log.info(f"{self} | Cancelling job {job.job_id}")
+                await asyncio.to_thread(job.cancel)
+            log.error(f"{self} | Exception: {e}")
+            raise
+class ServerlessEndpoint(ServerlessResource):
+    """
+    Represents a serverless endpoint distinct from a live serverless.
+    Inherits from ServerlessResource.
+    """
+    @model_validator(mode="after")
+    def set_serverless_template(self):
+        if not any([self.imageName, self.template, self.templateId]):
+            raise ValueError(
+                "Either imageName, template, or templateId must be provided"
+            )
+        if not self.templateId and not self.template:
+            self.template = PodTemplate(
+                name=self.resource_id,
+                imageName=self.imageName,
+                env=KeyValuePair.from_dict(self.env or get_env_vars()),
+            )
+        elif self.template:
+            self.template.name = f"{self.resource_id}__{self.template.resource_id}"
+            if self.imageName:
+                self.template.imageName = self.imageName
+            if self.env:
+                self.template.env = KeyValuePair.from_dict(self.env)
+        return self
+class CpuServerlessEndpoint(ServerlessEndpoint):
+    """
+    Convenience class for CPU serverless endpoint.
+    Represents a CPU-only serverless endpoint distinct from a live serverless.
+    Inherits from ServerlessEndpoint.
+    """
+    instanceIds: Optional[List[CpuInstanceType]] = [CpuInstanceType.CPU3G_2_8]
+class JobOutput(BaseModel):
+    id: str
+    workerId: str
+    status: str
+    delayTime: int
+    executionTime: int
+    output: Optional[Any] = None
+    error: Optional[str] = ""
+    def model_post_init(self, __context):
+        log_group = f"Worker:{self.workerId}"
+        log.info(f"{log_group} | Delay Time: {self.delayTime} ms")
+        log.info(f"{log_group} | Execution Time: {self.executionTime} ms")
+class Status(str, Enum):
+    READY = "READY"
+    INITIALIZING = "INITIALIZING"
+    THROTTLED = "THROTTLED"
+    UNHEALTHY = "UNHEALTHY"
+    UNKNOWN = "UNKNOWN"
+class WorkersHealth(BaseModel):
+    idle: int
+    initializing: int
+    ready: int
+    running: int
+    throttled: int
+    unhealthy: int
+    @property
+    def status(self) -> Status:
+        if self.ready or self.idle or self.running:
+            return Status.READY
+        if self.initializing:
+            return Status.INITIALIZING
+        if self.throttled:
+            return Status.THROTTLED
+        if self.unhealthy:
+            return Status.UNHEALTHY
+        return Status.UNKNOWN
+class JobsHealth(BaseModel):
+    completed: int
+    failed: int
+    inProgress: int
+    inQueue: int
+    retried: int
+class ServerlessHealth(BaseModel):
+    workers: WorkersHealth
+    jobs: JobsHealth
+    @property
+    def is_ready(self) -> bool:
+        return self.workers.status == Status.READY

tetra_rp/core/resources/template.py ADDED Viewed

@@ -0,0 +1,94 @@
+import requests
+from typing import Dict, List, Optional, Any
+from pydantic import BaseModel, model_validator
+from .base import BaseResource
+class KeyValuePair(BaseModel):
+    key: str
+    value: str
+    @classmethod
+    def from_dict(cls, data: Dict[str, str]) -> "List[KeyValuePair]":
+        """
+        Create a list of KeyValuePair instances from a dictionary.
+        """
+        if not isinstance(data, dict):
+            raise ValueError("Input must be a dictionary.")
+        return [cls(key=key, value=value) for key, value in data.items()]
+class PodTemplate(BaseResource):
+    advancedStart: Optional[bool] = False
+    config: Optional[Dict[str, Any]] = {}
+    containerDiskInGb: Optional[int] = 10
+    containerRegistryAuthId: Optional[str] = ""
+    dockerArgs: Optional[str] = ""
+    env: Optional[List[KeyValuePair]] = []
+    imageName: Optional[str] = ""
+    name: Optional[str] = ""
+    ports: Optional[str] = ""
+    startScript: Optional[str] = ""
+    @model_validator(mode="after")
+    def sync_input_fields(self):
+        self.name = f"{self.name}__{self.resource_id}"
+        return self
+def update_system_dependencies(
+    template_id, token, system_dependencies, base_entry_cmd=None
+):
+    """
+    Updates Runpod template with system dependencies installed via apt-get,
+    and appends the app start command.
+    Args:
+        template_id (str): Runpod template ID.
+        token (str): Runpod API token.
+        system_dependencies (List[str]): List of apt packages to install.
+        base_entry_cmd (List[str]): The default command to run the app, e.g. ["uv", "run", "handler.py"]
+    Returns:
+        dict: API response JSON or error info.
+    """
+    # Compose apt-get install command if any packages specified
+    apt_cmd = ""
+    if system_dependencies:
+        joined_pkgs = " ".join(system_dependencies)
+        apt_cmd = f"apt-get update && apt-get install -y {joined_pkgs} && "
+    # Default start command if not provided
+    app_cmd = base_entry_cmd or ["uv", "run", "handler.py"]
+    app_cmd_str = " ".join(app_cmd)
+    # Full command to run in entrypoint shell
+    full_cmd = f"{apt_cmd}exec {app_cmd_str}"
+    payload = {
+        # other required fields like disk, env, image, etc, should be fetched or passed in real usage
+        "dockerEntrypoint": ["/bin/bash", "-c", full_cmd],
+        "dockerStartCmd": [],
+        # placeholder values, replace as needed or fetch from current template state
+        "containerDiskInGb": 50,
+        "containerRegistryAuthId": "",
+        "env": {},
+        "imageName": "your-image-name",
+        "isPublic": False,
+        "name": "your-template-name",
+        "ports": ["8888/http", "22/tcp"],
+        "readme": "",
+        "volumeInGb": 20,
+        "volumeMountPath": "/workspace",
+    }
+    headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
+    url = f"https://rest.runpod.io/v1/templates/{template_id}/update"
+    response = requests.post(url, json=payload, headers=headers)
+    try:
+        return response.json()
+    except Exception:
+        return {"error": "Invalid JSON response", "text": response.text}

tetra_rp/core/resources/utils.py ADDED Viewed

@@ -0,0 +1,50 @@
+from typing import Callable, Any, List, Union
+from pydantic import BaseModel
+from .gpu import GpuType, GpuTypeDetail
+from .serverless import ServerlessEndpoint
+"""
+Define the mapping for the methods and their return types
+Only include methods from runpod.*
+"""
+RUNPOD_TYPED_OPERATIONS = {
+    "get_gpus": List[GpuType],
+    "get_gpu": GpuTypeDetail,
+    "get_endpoints": List[ServerlessEndpoint],
+}
+def inquire(method: Callable, *args, **kwargs) -> Union[List[Any], Any]:
+    """
+    This function dynamically determines the return type of the provided method
+    based on a predefined mapping (`definitions`) and validates the result using
+    Pydantic models if applicable.
+    Refer to `RUNPOD_TYPED_OPERATIONS` for the mapping.
+    Example:
+    ----------
+    >>> import runpod
+    >>> inquire(runpod.get_gpus)
+    [
+        GpuType(id='NVIDIA A100 80GB', displayName='A100 80GB', memoryInGb=80),
+        GpuType(id='NVIDIA A100 40GB', displayName='A100 40GB', memoryInGb=40),
+        GpuType(id='NVIDIA A10', displayName='A10', memoryInGb=24)
+    ]
+    """
+    method_name = method.__name__
+    return_type = RUNPOD_TYPED_OPERATIONS.get(method_name)
+    raw_result = method(*args, **kwargs)
+    if hasattr(return_type, "__origin__") and return_type.__origin__ is list:
+        # List case
+        model_type = return_type.__args__[0]
+        if issubclass(model_type, BaseModel):
+            return [model_type.model_validate(item) for item in raw_result]
+    elif isinstance(return_type, type) and issubclass(return_type, BaseModel):
+        # Single object case
+        return return_type.model_validate(raw_result)
+    else:
+        raise ValueError(f"Unsupported return type for method '{method_name}'")

tetra_rp/core/utils/__init__.py ADDED Viewed

File without changes

tetra_rp/core/utils/backoff.py ADDED Viewed

@@ -0,0 +1,43 @@
+import math
+import random
+from enum import Enum
+class BackoffStrategy(str, Enum):
+    EXPONENTIAL = "exponential"
+    LINEAR = "linear"
+    LOGARITHMIC = "logarithmic"
+def get_backoff_delay(
+    attempt: int,
+    base: float = 0.1,
+    max_seconds: float = 10.0,
+    jitter: float = 0.2,
+    strategy: BackoffStrategy = BackoffStrategy.EXPONENTIAL,
+) -> float:
+    """
+    Returns a backoff delay in seconds based on the number of attempts and strategy.
+    Parameters:
+    - attempt (int): The number of failed attempts or polls.
+    - base (float): The base delay time in seconds.
+    - max_seconds (float): The maximum delay.
+    - jitter (float): Random jitter as a fraction (e.g., 0.2 = ±20%). Prevent thundering herd
+    - strategy (BackoffStrategy): The backoff curve to apply.
+    Returns:
+    - float: The delay in seconds.
+    """
+    if strategy == BackoffStrategy.EXPONENTIAL:
+        delay = base * (2**attempt)
+    elif strategy == BackoffStrategy.LINEAR:
+        delay = base + (attempt * base)
+    elif strategy == BackoffStrategy.LOGARITHMIC:
+        delay = base * math.log2(attempt + 2)
+    else:
+        raise ValueError(f"Unsupported backoff strategy: {strategy}")
+    # Clamp to max and apply jitter
+    delay = min(delay, max_seconds)
+    return delay * random.uniform(1 - jitter, 1 + jitter)

tetra_rp/core/utils/json.py ADDED Viewed

@@ -0,0 +1,33 @@
+from enum import Enum
+from typing import Any
+from pydantic import BaseModel
+def normalize_for_json(obj: Any) -> Any:
+    """
+    Recursively normalizes an object for JSON serialization.
+    This function handles various data types and ensures that objects
+    are converted into JSON-serializable formats. It supports the following:
+    - `BaseModel` instances: Converts them to dictionaries using `model_dump()`.
+    - Dictionaries: Recursively normalizes their values.
+    - Lists: Recursively normalizes their elements.
+    - Tuples: Recursively normalizes their elements and returns a tuple.
+    - Other types: Returns the object as is.
+    Args:
+        obj (Any): The object to normalize.
+    Returns:
+        Any: A JSON-serializable representation of the input object.
+    """
+    if isinstance(obj, BaseModel):
+        return normalize_for_json(obj.model_dump())
+    elif isinstance(obj, Enum):
+        return obj.value
+    elif isinstance(obj, dict):
+        return {k: normalize_for_json(v) for k, v in obj.items()}
+    elif isinstance(obj, (list, tuple)):
+        return type(obj)(normalize_for_json(i) for i in obj)
+    else:
+        return obj