tetra-rp 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tetra_rp/client.py CHANGED
@@ -1,17 +1,19 @@
1
+ import inspect
1
2
  import logging
2
3
  from functools import wraps
3
- from typing import List
4
- from .core.resources import ServerlessResource, ResourceManager
5
- from .stubs import stub_resource
4
+ from typing import List, Optional
6
5
 
6
+ from .core.resources import ResourceManager, ServerlessResource
7
+ from .execute_class import create_remote_class
8
+ from .stubs import stub_resource
7
9
 
8
10
  log = logging.getLogger(__name__)
9
11
 
10
12
 
11
13
  def remote(
12
14
  resource_config: ServerlessResource,
13
- dependencies: List[str] = None,
14
- system_dependencies: List[str] = None,
15
+ dependencies: Optional[List[str]] = None,
16
+ system_dependencies: Optional[List[str]] = None,
15
17
  **extra,
16
18
  ):
17
19
  """
@@ -24,8 +26,6 @@ def remote(
24
26
  to be provisioned or used.
25
27
  dependencies (List[str], optional): A list of pip package names to be installed in the remote
26
28
  environment before executing the function. Defaults to None.
27
- mount_volume (NetworkVolume, optional): Configuration for creating and mounting a network volume.
28
- Should contain 'size', 'datacenter_id', and 'name' keys. Defaults to None.
29
29
  extra (dict, optional): Additional parameters for the execution of the resource. Defaults to an empty dict.
30
30
 
31
31
  Returns:
@@ -45,17 +45,26 @@ def remote(
45
45
  ```
46
46
  """
47
47
 
48
- def decorator(func):
49
- @wraps(func)
50
- async def wrapper(*args, **kwargs):
51
- resource_manager = ResourceManager()
52
- remote_resource = await resource_manager.get_or_deploy_resource(
53
- resource_config
48
+ def decorator(func_or_class):
49
+ if inspect.isclass(func_or_class):
50
+ # Handle class decoration
51
+ return create_remote_class(
52
+ func_or_class, resource_config, dependencies, system_dependencies, extra
54
53
  )
54
+ else:
55
+ # Handle function decoration (unchanged)
56
+ @wraps(func_or_class)
57
+ async def wrapper(*args, **kwargs):
58
+ resource_manager = ResourceManager()
59
+ remote_resource = await resource_manager.get_or_deploy_resource(
60
+ resource_config
61
+ )
55
62
 
56
- stub = stub_resource(remote_resource, **extra)
57
- return await stub(func, dependencies, system_dependencies, *args, **kwargs)
63
+ stub = stub_resource(remote_resource, **extra)
64
+ return await stub(
65
+ func_or_class, dependencies, system_dependencies, *args, **kwargs
66
+ )
58
67
 
59
- return wrapper
68
+ return wrapper
60
69
 
61
70
  return decorator
@@ -3,9 +3,13 @@ import os
3
3
  from pydantic import model_validator
4
4
  from .serverless import ServerlessEndpoint
5
5
 
6
-
7
- TETRA_GPU_IMAGE = os.environ.get("TETRA_GPU_IMAGE", "runpod/tetra-rp:dev")
8
- TETRA_CPU_IMAGE = os.environ.get("TETRA_CPU_IMAGE", "runpod/tetra-rp-cpu:dev")
6
+ TETRA_IMAGE_TAG = os.environ.get("TETRA_IMAGE_TAG", "latest")
7
+ TETRA_GPU_IMAGE = os.environ.get(
8
+ "TETRA_GPU_IMAGE", f"runpod/tetra-rp:{TETRA_IMAGE_TAG}"
9
+ )
10
+ TETRA_CPU_IMAGE = os.environ.get(
11
+ "TETRA_CPU_IMAGE", f"runpod/tetra-rp-cpu:{TETRA_IMAGE_TAG}"
12
+ )
9
13
 
10
14
 
11
15
  class LiveServerless(ServerlessEndpoint):
@@ -0,0 +1,10 @@
1
+ """
2
+ Constants for utility modules and caching configurations.
3
+
4
+ This module contains configurable constants used across the tetra-rp codebase
5
+ to ensure consistency and easy maintenance.
6
+ """
7
+
8
+ # Cache key generation constants
9
+ HASH_TRUNCATE_LENGTH = 16 # Length to truncate hash values for cache keys
10
+ UUID_FALLBACK_LENGTH = 8 # Length to truncate UUID values for fallback keys
@@ -0,0 +1,75 @@
1
+ """
2
+ LRU Cache implementation using OrderedDict for memory-efficient caching with automatic eviction.
3
+
4
+ This module provides a Least Recently Used (LRU) cache implementation that automatically
5
+ manages memory by evicting the least recently used items when the cache exceeds its
6
+ maximum size limit. It maintains O(1) access time and provides a dict-like interface.
7
+ Thread-safe for concurrent access.
8
+ """
9
+
10
+ import threading
11
+ from collections import OrderedDict
12
+ from typing import Any, Dict, Optional
13
+
14
+
15
+ class LRUCache:
16
+ """
17
+ A Least Recently Used (LRU) cache implementation using OrderedDict.
18
+
19
+ Automatically evicts the least recently used items when the cache exceeds
20
+ the maximum size limit. Provides dict-like interface with O(1) operations.
21
+ Thread-safe for concurrent access using RLock.
22
+
23
+ Args:
24
+ max_size: Maximum number of items to store in cache (default: 1000)
25
+ """
26
+
27
+ def __init__(self, max_size: int = 1000):
28
+ self.max_size = max_size
29
+ self.cache = OrderedDict()
30
+ self._lock = threading.RLock()
31
+
32
+ def get(self, key: str) -> Optional[Dict[str, Any]]:
33
+ """Get item from cache, moving it to end (most recent) if found."""
34
+ with self._lock:
35
+ if key in self.cache:
36
+ self.cache.move_to_end(key)
37
+ return self.cache[key]
38
+ return None
39
+
40
+ def set(self, key: str, value: Dict[str, Any]) -> None:
41
+ """Set item in cache, evicting oldest if at capacity."""
42
+ with self._lock:
43
+ if key in self.cache:
44
+ self.cache.move_to_end(key)
45
+ else:
46
+ if len(self.cache) >= self.max_size:
47
+ self.cache.popitem(last=False) # Remove oldest
48
+ self.cache[key] = value
49
+
50
+ def clear(self) -> None:
51
+ """Clear all items from cache."""
52
+ with self._lock:
53
+ self.cache.clear()
54
+
55
+ def __contains__(self, key: str) -> bool:
56
+ """Check if key exists in cache."""
57
+ with self._lock:
58
+ return key in self.cache
59
+
60
+ def __len__(self) -> int:
61
+ """Return number of items in cache."""
62
+ with self._lock:
63
+ return len(self.cache)
64
+
65
+ def __getitem__(self, key: str) -> Dict[str, Any]:
66
+ """Get item using bracket notation, moving to end if found."""
67
+ with self._lock:
68
+ if key in self.cache:
69
+ self.cache.move_to_end(key)
70
+ return self.cache[key]
71
+ raise KeyError(key)
72
+
73
+ def __setitem__(self, key: str, value: Dict[str, Any]) -> None:
74
+ """Set item using bracket notation."""
75
+ self.set(key, value)
@@ -0,0 +1,316 @@
1
+ """
2
+ Class execution module for remote class instantiation and method calls.
3
+
4
+ This module provides functionality to create and execute remote class instances,
5
+ with automatic caching of class serialization data to improve performance and
6
+ prevent memory leaks through LRU eviction.
7
+ """
8
+
9
+ import base64
10
+ import hashlib
11
+ import inspect
12
+ import logging
13
+ import textwrap
14
+ import uuid
15
+ from typing import List, Optional, Type
16
+
17
+ import cloudpickle
18
+
19
+ from .core.resources import ResourceManager, ServerlessResource
20
+ from .core.utils.constants import HASH_TRUNCATE_LENGTH, UUID_FALLBACK_LENGTH
21
+ from .core.utils.lru_cache import LRUCache
22
+ from .protos.remote_execution import FunctionRequest
23
+ from .stubs import stub_resource
24
+
25
+ log = logging.getLogger(__name__)
26
+
27
+ # Global in-memory cache for serialized class data with LRU eviction
28
+ _SERIALIZED_CLASS_CACHE = LRUCache(max_size=1000)
29
+
30
+
31
+ def serialize_constructor_args(args, kwargs):
32
+ """Serialize constructor arguments for caching."""
33
+ serialized_args = [
34
+ base64.b64encode(cloudpickle.dumps(arg)).decode("utf-8") for arg in args
35
+ ]
36
+ serialized_kwargs = {
37
+ k: base64.b64encode(cloudpickle.dumps(v)).decode("utf-8")
38
+ for k, v in kwargs.items()
39
+ }
40
+ return serialized_args, serialized_kwargs
41
+
42
+
43
+ def get_or_cache_class_data(
44
+ cls: Type, args: tuple, kwargs: dict, cache_key: str
45
+ ) -> str:
46
+ """Get class code from cache or extract and cache it."""
47
+ if cache_key not in _SERIALIZED_CLASS_CACHE:
48
+ # Cache miss - extract and cache class code
49
+ clean_class_code = extract_class_code_simple(cls)
50
+
51
+ try:
52
+ serialized_args, serialized_kwargs = serialize_constructor_args(
53
+ args, kwargs
54
+ )
55
+
56
+ # Cache the serialized data
57
+ _SERIALIZED_CLASS_CACHE.set(
58
+ cache_key,
59
+ {
60
+ "class_code": clean_class_code,
61
+ "constructor_args": serialized_args,
62
+ "constructor_kwargs": serialized_kwargs,
63
+ },
64
+ )
65
+
66
+ log.debug(f"Cached class data for {cls.__name__} with key: {cache_key}")
67
+
68
+ except (TypeError, AttributeError, OSError) as e:
69
+ log.warning(
70
+ f"Could not serialize constructor arguments for {cls.__name__}: {e}"
71
+ )
72
+ log.warning(
73
+ f"Skipping constructor argument caching for {cls.__name__} due to unserializable arguments"
74
+ )
75
+
76
+ # Store minimal cache entry to avoid repeated attempts
77
+ _SERIALIZED_CLASS_CACHE.set(
78
+ cache_key,
79
+ {
80
+ "class_code": clean_class_code,
81
+ "constructor_args": None, # Signal that args couldn't be cached
82
+ "constructor_kwargs": None,
83
+ },
84
+ )
85
+
86
+ return clean_class_code
87
+ else:
88
+ # Cache hit - retrieve cached data
89
+ cached_data = _SERIALIZED_CLASS_CACHE.get(cache_key)
90
+ log.debug(
91
+ f"Retrieved cached class data for {cls.__name__} with key: {cache_key}"
92
+ )
93
+ return cached_data["class_code"]
94
+
95
+
96
+ def extract_class_code_simple(cls: Type) -> str:
97
+ """Extract clean class code without decorators and proper indentation"""
98
+ try:
99
+ # Get source code
100
+ source = inspect.getsource(cls)
101
+
102
+ # Split into lines
103
+ lines = source.split("\n")
104
+
105
+ # Find the class definition line (starts with 'class' and contains ':')
106
+ class_start_idx = -1
107
+ for i, line in enumerate(lines):
108
+ stripped = line.strip()
109
+ if stripped.startswith("class ") and ":" in stripped:
110
+ class_start_idx = i
111
+ break
112
+
113
+ if class_start_idx == -1:
114
+ raise ValueError("Could not find class definition")
115
+
116
+ # Take lines from class definition onwards (ignore everything before)
117
+ class_lines = lines[class_start_idx:]
118
+
119
+ # Remove empty lines at the end
120
+ while class_lines and not class_lines[-1].strip():
121
+ class_lines.pop()
122
+
123
+ # Join back and dedent to remove any leading indentation
124
+ class_code = "\n".join(class_lines)
125
+ class_code = textwrap.dedent(class_code)
126
+
127
+ # Validate the code by trying to compile it
128
+ compile(class_code, "<string>", "exec")
129
+
130
+ log.debug(f"Successfully extracted class code for {cls.__name__}")
131
+ return class_code
132
+
133
+ except Exception as e:
134
+ log.warning(f"Could not extract class code for {cls.__name__}: {e}")
135
+ log.warning("Falling back to basic class structure")
136
+
137
+ # Enhanced fallback: try to preserve method signatures
138
+ fallback_methods = []
139
+ for name, method in inspect.getmembers(cls, predicate=inspect.isfunction):
140
+ try:
141
+ sig = inspect.signature(method)
142
+ fallback_methods.append(f" def {name}{sig}:")
143
+ fallback_methods.append(" pass")
144
+ fallback_methods.append("")
145
+ except (TypeError, ValueError, OSError) as e:
146
+ log.warning(f"Could not extract method signature for {name}: {e}")
147
+ fallback_methods.append(f" def {name}(self, *args, **kwargs):")
148
+ fallback_methods.append(" pass")
149
+ fallback_methods.append("")
150
+
151
+ fallback_code = f"""class {cls.__name__}:
152
+ def __init__(self, *args, **kwargs):
153
+ pass
154
+
155
+ {chr(10).join(fallback_methods)}"""
156
+
157
+ return fallback_code
158
+
159
+
160
+ def get_class_cache_key(
161
+ cls: Type, constructor_args: tuple, constructor_kwargs: dict
162
+ ) -> str:
163
+ """Generate a cache key for class serialization based on class source and constructor args.
164
+
165
+ Args:
166
+ cls: The class type to generate a key for
167
+ constructor_args: Positional arguments passed to class constructor
168
+ constructor_kwargs: Keyword arguments passed to class constructor
169
+
170
+ Returns:
171
+ A unique cache key string, or a UUID-based fallback if serialization fails
172
+
173
+ Note:
174
+ Falls back to UUID-based key if constructor arguments cannot be serialized,
175
+ which disables caching benefits but maintains functionality.
176
+ """
177
+ try:
178
+ # Get class source code for hashing
179
+ class_source = extract_class_code_simple(cls)
180
+
181
+ # Create hash of class source
182
+ class_hash = hashlib.sha256(class_source.encode()).hexdigest()
183
+
184
+ # Create hash of constructor arguments
185
+ args_data = cloudpickle.dumps((constructor_args, constructor_kwargs))
186
+ args_hash = hashlib.sha256(args_data).hexdigest()
187
+
188
+ # Combine hashes for final cache key
189
+ cache_key = f"{cls.__name__}_{class_hash[:HASH_TRUNCATE_LENGTH]}_{args_hash[:HASH_TRUNCATE_LENGTH]}"
190
+
191
+ log.debug(f"Generated cache key for {cls.__name__}: {cache_key}")
192
+ return cache_key
193
+
194
+ except (TypeError, AttributeError, OSError) as e:
195
+ log.warning(f"Could not generate cache key for {cls.__name__}: {e}")
196
+ # Fallback to basic key without caching benefits
197
+ return f"{cls.__name__}_{uuid.uuid4().hex[:UUID_FALLBACK_LENGTH]}"
198
+
199
+
200
+ def create_remote_class(
201
+ cls: Type,
202
+ resource_config: ServerlessResource,
203
+ dependencies: Optional[List[str]],
204
+ system_dependencies: Optional[List[str]],
205
+ extra: dict,
206
+ ):
207
+ """
208
+ Create a remote class wrapper.
209
+ """
210
+ # Validate inputs
211
+ if not inspect.isclass(cls):
212
+ raise TypeError(f"Expected a class, got {type(cls).__name__}")
213
+ if not hasattr(cls, "__name__"):
214
+ raise ValueError("Class must have a __name__ attribute")
215
+
216
+ class RemoteClassWrapper:
217
+ def __init__(self, *args, **kwargs):
218
+ self._class_type = cls
219
+ self._resource_config = resource_config
220
+ self._dependencies = dependencies or []
221
+ self._system_dependencies = system_dependencies or []
222
+ self._extra = extra
223
+ self._constructor_args = args
224
+ self._constructor_kwargs = kwargs
225
+ self._instance_id = (
226
+ f"{cls.__name__}_{uuid.uuid4().hex[:UUID_FALLBACK_LENGTH]}"
227
+ )
228
+ self._initialized = False
229
+
230
+ # Generate cache key and get class code
231
+ self._cache_key = get_class_cache_key(cls, args, kwargs)
232
+ self._clean_class_code = get_or_cache_class_data(
233
+ cls, args, kwargs, self._cache_key
234
+ )
235
+
236
+ log.debug(f"Created remote class wrapper for {cls.__name__}")
237
+
238
+ async def _ensure_initialized(self):
239
+ """Ensure the remote instance is created."""
240
+ if self._initialized:
241
+ return
242
+
243
+ # Get remote resource
244
+ resource_manager = ResourceManager()
245
+ remote_resource = await resource_manager.get_or_deploy_resource(
246
+ self._resource_config
247
+ )
248
+ self._stub = stub_resource(remote_resource, **self._extra)
249
+
250
+ # Create the remote instance by calling a method (which will trigger instance creation)
251
+ # We'll do this on first method call
252
+ self._initialized = True
253
+
254
+ def __getattr__(self, name):
255
+ """Dynamically create method proxies for all class methods."""
256
+ if name.startswith("_"):
257
+ raise AttributeError(
258
+ f"'{self.__class__.__name__}' object has no attribute '{name}'"
259
+ )
260
+
261
+ async def method_proxy(*args, **kwargs):
262
+ await self._ensure_initialized()
263
+
264
+ # Get cached data
265
+ cached_data = _SERIALIZED_CLASS_CACHE.get(self._cache_key)
266
+
267
+ # Serialize method arguments (these change per call, so no caching)
268
+ method_args = [
269
+ base64.b64encode(cloudpickle.dumps(arg)).decode("utf-8")
270
+ for arg in args
271
+ ]
272
+ method_kwargs = {
273
+ k: base64.b64encode(cloudpickle.dumps(v)).decode("utf-8")
274
+ for k, v in kwargs.items()
275
+ }
276
+
277
+ # Handle constructor args - use cached if available, else serialize fresh
278
+ if cached_data["constructor_args"] is not None:
279
+ # Use cached constructor args
280
+ constructor_args = cached_data["constructor_args"]
281
+ constructor_kwargs = cached_data["constructor_kwargs"]
282
+ else:
283
+ # Constructor args couldn't be cached due to serialization issues
284
+ # Serialize them fresh for each method call (fallback behavior)
285
+ constructor_args = [
286
+ base64.b64encode(cloudpickle.dumps(arg)).decode("utf-8")
287
+ for arg in self._constructor_args
288
+ ]
289
+ constructor_kwargs = {
290
+ k: base64.b64encode(cloudpickle.dumps(v)).decode("utf-8")
291
+ for k, v in self._constructor_kwargs.items()
292
+ }
293
+
294
+ request = FunctionRequest(
295
+ execution_type="class",
296
+ class_name=self._class_type.__name__,
297
+ class_code=cached_data["class_code"],
298
+ method_name=name,
299
+ args=method_args,
300
+ kwargs=method_kwargs,
301
+ constructor_args=constructor_args,
302
+ constructor_kwargs=constructor_kwargs,
303
+ dependencies=self._dependencies,
304
+ system_dependencies=self._system_dependencies,
305
+ instance_id=self._instance_id,
306
+ create_new_instance=not hasattr(
307
+ self, "_stub"
308
+ ), # Create new only on first call
309
+ )
310
+
311
+ # Execute via stub
312
+ return await self._stub.execute_class_method(request) # type: ignore
313
+
314
+ return method_proxy
315
+
316
+ return RemoteClassWrapper
@@ -1,15 +1,18 @@
1
1
  # TODO: generate using betterproto
2
-
3
2
  from abc import ABC, abstractmethod
4
- from typing import List, Dict, Optional
5
- from pydantic import BaseModel, Field
3
+ from typing import Dict, List, Optional
4
+
5
+ from pydantic import BaseModel, Field, model_validator
6
6
 
7
7
 
8
8
  class FunctionRequest(BaseModel):
9
- function_name: str = Field(
9
+ # MADE OPTIONAL - can be None for class-only execution
10
+ function_name: Optional[str] = Field(
11
+ default=None,
10
12
  description="Name of the function to execute",
11
13
  )
12
- function_code: str = Field(
14
+ function_code: Optional[str] = Field(
15
+ default=None,
13
16
  description="Source code of the function to execute",
14
17
  )
15
18
  args: List = Field(
@@ -29,8 +32,67 @@ class FunctionRequest(BaseModel):
29
32
  description="Optional list of system dependencies to install before executing the function",
30
33
  )
31
34
 
35
+ # NEW FIELDS FOR CLASS SUPPORT
36
+ execution_type: str = Field(
37
+ default="function", description="Type of execution: 'function' or 'class'"
38
+ )
39
+ class_name: Optional[str] = Field(
40
+ default=None,
41
+ description="Name of the class to instantiate (for class execution)",
42
+ )
43
+ class_code: Optional[str] = Field(
44
+ default=None,
45
+ description="Source code of the class to instantiate (for class execution)",
46
+ )
47
+ constructor_args: Optional[List] = Field(
48
+ default_factory=list,
49
+ description="List of base64-encoded cloudpickle-serialized constructor arguments",
50
+ )
51
+ constructor_kwargs: Optional[Dict] = Field(
52
+ default_factory=dict,
53
+ description="Dictionary of base64-encoded cloudpickle-serialized constructor keyword arguments",
54
+ )
55
+ method_name: str = Field(
56
+ default="__call__",
57
+ description="Name of the method to call on the class instance",
58
+ )
59
+ instance_id: Optional[str] = Field(
60
+ default=None,
61
+ description="Unique identifier for the class instance (for persistence)",
62
+ )
63
+ create_new_instance: bool = Field(
64
+ default=True,
65
+ description="Whether to create a new instance or reuse existing one",
66
+ )
67
+
68
+ @model_validator(mode="after")
69
+ def validate_execution_requirements(self) -> "FunctionRequest":
70
+ """Validate that required fields are provided based on execution_type"""
71
+ if self.execution_type == "function":
72
+ if self.function_name is None:
73
+ raise ValueError(
74
+ 'function_name is required when execution_type is "function"'
75
+ )
76
+ if self.function_code is None:
77
+ raise ValueError(
78
+ 'function_code is required when execution_type is "function"'
79
+ )
80
+
81
+ elif self.execution_type == "class":
82
+ if self.class_name is None:
83
+ raise ValueError(
84
+ 'class_name is required when execution_type is "class"'
85
+ )
86
+ if self.class_code is None:
87
+ raise ValueError(
88
+ 'class_code is required when execution_type is "class"'
89
+ )
90
+
91
+ return self
92
+
32
93
 
33
94
  class FunctionResponse(BaseModel):
95
+ # EXISTING FIELDS (unchanged)
34
96
  success: bool = Field(
35
97
  description="Indicates if the function execution was successful",
36
98
  )
@@ -47,6 +109,15 @@ class FunctionResponse(BaseModel):
47
109
  description="Captured standard output from the function execution",
48
110
  )
49
111
 
112
+ # NEW FIELDS FOR CLASS SUPPORT
113
+ instance_id: Optional[str] = Field(
114
+ default=None, description="ID of the class instance that was used/created"
115
+ )
116
+ instance_info: Optional[Dict] = Field(
117
+ default=None,
118
+ description="Metadata about the class instance (creation time, call count, etc.)",
119
+ )
120
+
50
121
 
51
122
  class RemoteExecutorStub(ABC):
52
123
  """Abstract base class for remote execution."""
@@ -1,13 +1,13 @@
1
1
  import logging
2
2
  from functools import singledispatch
3
- from .live_serverless import LiveServerlessStub
4
- from .serverless import ServerlessEndpointStub
3
+
5
4
  from ..core.resources import (
6
5
  CpuServerlessEndpoint,
7
6
  LiveServerless,
8
7
  ServerlessEndpoint,
9
8
  )
10
-
9
+ from .live_serverless import LiveServerlessStub
10
+ from .serverless import ServerlessEndpointStub
11
11
 
12
12
  log = logging.getLogger(__name__)
13
13
 
@@ -22,20 +22,29 @@ def stub_resource(resource, **extra):
22
22
 
23
23
  @stub_resource.register(LiveServerless)
24
24
  def _(resource, **extra):
25
+ stub = LiveServerlessStub(resource)
26
+
27
+ # Function execution
25
28
  async def stubbed_resource(
26
29
  func, dependencies, system_dependencies, *args, **kwargs
27
30
  ) -> dict:
28
31
  if args == (None,):
29
- # cleanup: when the function is called with no args
30
32
  args = []
31
33
 
32
- stub = LiveServerlessStub(resource)
33
34
  request = stub.prepare_request(
34
35
  func, dependencies, system_dependencies, *args, **kwargs
35
36
  )
36
37
  response = await stub.ExecuteFunction(request)
37
38
  return stub.handle_response(response)
38
39
 
40
+ # Class method execution
41
+ async def execute_class_method(request):
42
+ response = await stub.ExecuteFunction(request)
43
+ return stub.handle_response(response)
44
+
45
+ # Attach the method to the function
46
+ stubbed_resource.execute_class_method = execute_class_method
47
+
39
48
  return stubbed_resource
40
49
 
41
50
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tetra_rp
3
- Version: 0.7.0
3
+ Version: 0.9.0
4
4
  Summary: A Python library for distributed inference and serving of machine learning models
5
5
  Author-email: Marut Pandya <pandyamarut@gmail.com>, Patrick Rachford <prachford@icloud.com>, Dean Quinanola <dean.quinanola@runpod.io>
6
6
  License: MIT
@@ -11,7 +11,7 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: <3.14,>=3.9
12
12
  Description-Content-Type: text/markdown
13
13
  Requires-Dist: cloudpickle>=3.1.1
14
- Requires-Dist: runpod~=1.7.9
14
+ Requires-Dist: runpod
15
15
  Requires-Dist: python-dotenv>=1.0.0
16
16
 
17
17
  # Tetra: Serverless computing for AI workloads
@@ -801,6 +801,6 @@ def fetch_data(url):
801
801
  This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
802
802
 
803
803
  <p align="center">
804
- <a href="https://github.com/yourusername/tetra">Tetra</a> •
804
+ <a href="https://github.com/runpod/tetra-rp">Tetra</a> •
805
805
  <a href="https://runpod.io">Runpod</a>
806
806
  </p>
@@ -1,15 +1,10 @@
1
1
  tetra_rp/__init__.py,sha256=-1S5sYIKtnUV8V1HlSIbX1yZwiUrsO8J5b3ZEIR_phU,687
2
- tetra_rp/client.py,sha256=5zerW5tTUnTSe75cRGgTBhqsKNXoCVWgb3Kzh9tJvPA,2209
2
+ tetra_rp/client.py,sha256=rAMMmn4ejAayFXJMZzx7dG_8Y65tCEMI6wSSKgur4zQ,2500
3
+ tetra_rp/execute_class.py,sha256=HoH-qWDA7X6yGvQMwmHn5-MKxbLWHEDEHsuat5dzl2U,11912
3
4
  tetra_rp/logger.py,sha256=gk5-PWp3k_GQ5DxndsRkBCX0jarp_3lgZ1oiTFuThQg,1125
4
5
  tetra_rp/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
6
  tetra_rp/core/api/__init__.py,sha256=oldrEKMwxYoBPLvPfVlaFS3wfUtTTxCN6-HzlpTh6vE,124
6
7
  tetra_rp/core/api/runpod.py,sha256=sux4q6xg2PDRKJI5kLkcW4i8UISZUOmQxsdf0g6wgpw,9711
7
- tetra_rp/core/pool/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- tetra_rp/core/pool/cluster_manager.py,sha256=KJxEp_044HjnbOhfIdiXZbks_bFDYE1KgKeR5W9VvbY,6007
9
- tetra_rp/core/pool/dataclass.py,sha256=YngS328_NTewY8Etitj4k7MmdM5GWqqE_OMbytrVNlw,338
10
- tetra_rp/core/pool/ex.py,sha256=AZOrn9t_X5ycMl-tDg7-jcIURj_9kVmzn9_da8h1TFI,1273
11
- tetra_rp/core/pool/job.py,sha256=4bisW_ZwiQ2-qD5l0y9SbHcO4EQvSKimmBBU1fpI_YE,567
12
- tetra_rp/core/pool/worker.py,sha256=N4cOnf8MiDcPFH2XSMmSnnWMACZYUNnKWVhOx2aSxvM,478
13
8
  tetra_rp/core/resources/__init__.py,sha256=UhIwo1Y6-tw5qsULamR296sQiztuz-oWrSTreqfmFSw,814
14
9
  tetra_rp/core/resources/base.py,sha256=UJeDiFN45aO1n5SBcxn56ohLhj-AWHoj0KO7mF4yJ_o,1440
15
10
  tetra_rp/core/resources/cloud.py,sha256=XJOWPfzYlDVJGHxgffcfpEaOKrWhGdi7AzTlaGuYj0o,70
@@ -17,7 +12,7 @@ tetra_rp/core/resources/constants.py,sha256=F1gPqFaXcCmfrbUSO9PQtUBv984TxFc3pySg
17
12
  tetra_rp/core/resources/cpu.py,sha256=YIE-tKolSU3JJzpPB7ey-PbRdqKWsJZ_Ad4h2OYaaiA,1231
18
13
  tetra_rp/core/resources/environment.py,sha256=FC9kJCa8YLSar75AKUKqJYnNLrUdjZj8ZTOrspBrS00,1267
19
14
  tetra_rp/core/resources/gpu.py,sha256=2jIIMr8PNnlIAP8ZTKO8Imx-rdxXp2rbdSHJeVfjawk,1858
20
- tetra_rp/core/resources/live_serverless.py,sha256=6r4I4TEx9AmZ0-OJvE86qrY0S7BEx9t_P2zwHVdtbew,1074
15
+ tetra_rp/core/resources/live_serverless.py,sha256=A3JRdCYwHR2KN_OlmTLcv-m_ObxNhBhc5CnUzXOpOtc,1177
21
16
  tetra_rp/core/resources/network_volume.py,sha256=5_gwJlxt77VHs7T0d41l3IMZR0LhdoyQhroXCYfFF7w,3274
22
17
  tetra_rp/core/resources/resource_manager.py,sha256=kUVZDblfUzaG78S8FwOzu4rN6QSegUgQNK3fJ_X7l0w,2834
23
18
  tetra_rp/core/resources/serverless.py,sha256=RYH-gl_edEguGOlxR669Hfi_rXII4OEaYzlB2PhzOhI,15753
@@ -25,15 +20,17 @@ tetra_rp/core/resources/template.py,sha256=UkflJXZFWIbQkLuUt4oRLAjn-yIpw9_mT2X1c
25
20
  tetra_rp/core/resources/utils.py,sha256=mgXfgz_NuHN_IC7TzMNdH9II-LMjxcDCG7syDTcPiGs,1721
26
21
  tetra_rp/core/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
22
  tetra_rp/core/utils/backoff.py,sha256=1pfa0smFNpib8nztcIgBbtrVvQeECKh-aNOfL2TztgU,1324
23
+ tetra_rp/core/utils/constants.py,sha256=Dm4XiO5zTzfdqOSeYVfAjaf2LyHnIEVmbOi_s_k1J_E,375
28
24
  tetra_rp/core/utils/json.py,sha256=q0r7aEdfh8kKVeHGeh9fBDfuhHYNopSreislAMB6HhM,1163
25
+ tetra_rp/core/utils/lru_cache.py,sha256=drwKg-DfLbeBRGTzuxKqNKMQq0EuZV15LMTZIOyZuVk,2618
29
26
  tetra_rp/core/utils/singleton.py,sha256=JRli0HhBfq4P9mBUOg1TZUUwMvIenRqWdymX3qFMm2k,210
30
27
  tetra_rp/protos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- tetra_rp/protos/remote_execution.py,sha256=I4641-Dlzj4O7AuZbVei8O9aV2VNrRTcE8r7Fm0e-V8,1901
28
+ tetra_rp/protos/remote_execution.py,sha256=F4uwobnp5q-lX3lR7NCAB23J6OzlzcsB35cezwuoSnI,4638
32
29
  tetra_rp/stubs/__init__.py,sha256=ozKsHs8q0T7o2qhQEquub9hqomh1Htys53mMraaRu2E,72
33
30
  tetra_rp/stubs/live_serverless.py,sha256=o1NH5XEwUD-27NXJsEGO0IwnuDp8iXwUiw5nZtaZZOI,4199
34
- tetra_rp/stubs/registry.py,sha256=V4m3CeXl8j1pguHuuflxqpWeBgVDQ93YkhxJbElyP7Q,2599
31
+ tetra_rp/stubs/registry.py,sha256=dmbyC7uBp04_sXsG2wJCloFfFRzYjYQ-naEBKhTRo-U,2839
35
32
  tetra_rp/stubs/serverless.py,sha256=BM_a5Ml5VADBYu2WRNmo9qnicP8NnXDGl5ywifulbD0,947
36
- tetra_rp-0.7.0.dist-info/METADATA,sha256=jcXAGoiAFJVTYosJV2SGMw8L14UkUw2PHNKKH5mXkR8,28055
37
- tetra_rp-0.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
38
- tetra_rp-0.7.0.dist-info/top_level.txt,sha256=bBay7JTDwJXsTYvVjrwno9hnF-j0q272lk65f2AcPjU,9
39
- tetra_rp-0.7.0.dist-info/RECORD,,
33
+ tetra_rp-0.9.0.dist-info/METADATA,sha256=qkRvg25koaP7AHCTpdd9mbZU1GGpBYebK5Gu3aHxics,28045
34
+ tetra_rp-0.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
+ tetra_rp-0.9.0.dist-info/top_level.txt,sha256=bBay7JTDwJXsTYvVjrwno9hnF-j0q272lk65f2AcPjU,9
36
+ tetra_rp-0.9.0.dist-info/RECORD,,
File without changes
@@ -1,177 +0,0 @@
1
- import time
2
- from worker import Worker
3
- from job import Job
4
-
5
- from dataclass import WorkerStatus, JobStatus
6
-
7
- import logging
8
- import inspect
9
-
10
-
11
- def setup_logging(level=logging.INFO, fmt=None):
12
- if fmt is None:
13
- fmt = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
14
- logging.basicConfig(level=level, format=fmt)
15
-
16
-
17
- def get_logger(name=None):
18
- """
19
- Returns a logger. If no name is provided, it infers the caller's module name.
20
- """
21
- if name is None:
22
- # Get the caller's module name.
23
- frame = inspect.stack()[1]
24
- module = inspect.getmodule(frame[0])
25
- name = module.__name__ if module else "__main__"
26
- return logging.getLogger(name)
27
-
28
-
29
- logger = get_logger(__name__)
30
-
31
-
32
- class ClusterManager:
33
- """
34
- Manages workers and Jobs currently in Memory:
35
- - Runpod for provisioning
36
- - Real remote execution
37
- - Data base for the
38
- """
39
-
40
- def __init__(self):
41
- self.workers = {} # Worker ID -> Worker
42
- self.jobs = {} # Job ID -> Job
43
-
44
- # ----------------- Worker Management -----------------
45
- # ------------------------------------------------------
46
- def add_worker(self, resource_config: dict):
47
- """
48
- Add a new worker to the cluster
49
- """
50
- # here will go the logic to create a worker and add it to the cluster: RUNPOD LOGIC will be added here.
51
- worker = Worker(resource_config)
52
- self.workers[worker.worker_id] = worker
53
-
54
- logger.info(f"Added worker {worker.worker_id} to the cluster")
55
- return worker.worker_id
56
-
57
- def remove_worker(self, worker_id):
58
- """
59
- Remove a worker from the cluster
60
- """
61
- worker = self.workers.get(worker_id)
62
- if not worker:
63
- logger.error(f"Worker {worker_id} not found")
64
- return False
65
- if worker.status == WorkerStatus.RUNNING:
66
- logger.error(f"Worker {worker_id} is still running")
67
- return False
68
- del self.workers[worker_id]
69
- logger.info(f"Removed worker {worker_id} from the cluster")
70
- return True
71
-
72
- def list_workers(self):
73
- """
74
- List all workers in the cluster
75
- """
76
- return list(self.workers.values())
77
-
78
- # ----------------- Job Management -----------------
79
- # ---------------------------------------------------
80
-
81
- def submit_job(self, resource_config: dict):
82
- """
83
- Submit a new job to the cluster (Queueud). Then attempt to scheduel it.
84
- """
85
- job = Job(resource_config)
86
- self.jobs[job.job_id] = job
87
- logger.info(f"Submitted job {job.job_id} to the cluster")
88
- # attempt to schedule the job
89
- self.schedule_job(job)
90
- return job.job_id
91
-
92
- def schedule_job(self, job: Job):
93
- """
94
- find a suitable worker for the job. It none, Job remains queued.
95
- If we want to a auto provision we can actually add a logic here to add a worker if none is available.
96
- """
97
- if job.status != JobStatus.QUEUED:
98
- logger.error(f"Job {job.job_id} is not pending")
99
- return False
100
-
101
- # Find worker candidate
102
- candidate = self.find_idle_worker(job.resource_config)
103
- if candidate:
104
- self.assign_job_to_worker(job, candidate)
105
- else:
106
- logger.info(f"No worker available for job {job.job_id}")
107
- # we cn either provision new worker from here and then scehediule the job from here.
108
-
109
- def find_idle_worker(self, resource_config: dict):
110
- """
111
- Find an idle worker that can run the job
112
- """
113
- for w in self.workers.values():
114
- if w.status == WorkerStatus.IDLE:
115
- # check the resource config
116
- if w.resource_config == resource_config:
117
- continue
118
- return w
119
- return None
120
-
121
- def assign_job_to_worker(self, job: Job, worker: Worker):
122
- """
123
- Mark the job as running and the worker as Running and 'execute' the job.
124
- In a real system, we would send a remote command to the worker (eg: gRPC) to execute the job.
125
- """
126
- job.worker_id = worker.worker_id
127
- job.status = JobStatus.RUNNING
128
- worker.status = WorkerStatus.RUNNING
129
- worker.current_job_id = job.job_id
130
- logger.info(f"Assigned job {job.job_id} to worker {worker.worker_id}")
131
- self._execute_job(job, worker)
132
-
133
- def _execute_job(self, job: Job, worker: Worker):
134
- """
135
- Simulate the remote execution. right now, we jsut sleep for 1s.
136
- In production, what we we can do is:
137
- - Open a gRPC connection to the worker
138
- - pass the job details
139
- - wait for the compeltion call back
140
- """
141
- try:
142
- logger.info(f"Executing job {job.job_id} on worker {worker.worker_id}")
143
- time.sleep(
144
- 1
145
- ) # Here we can add the actual execution logic, currently it mimics the execution.
146
-
147
- # mark the job as completed
148
- job.status = JobStatus.COMPLETED
149
- job.result = "Job completed successfully"
150
- logger.info(f"[Cluster Manager] Job {job.job_id} completed successfully")
151
- except Exception as e:
152
- job.status = JobStatus.FAILED
153
- job.result = f"Job failed: {str(e)}"
154
- logger.error(f"[Cluster Manager] Job {job.job_id} failed: {str(e)}")
155
- finally:
156
- worker.status = WorkerStatus.IDLE
157
- worker.current_job_id = None
158
-
159
- def get_job_status(self, job_id):
160
- """
161
- Get the job details
162
- """
163
- job = self.jobs.get(job_id)
164
- if not job:
165
- logger.error(f"Job {job_id} not found")
166
- return None
167
- return job
168
-
169
- # this function has retry logic but it's currently fuzzy, we might have to change it.
170
-
171
- def retry_queued_jobs(self):
172
- """
173
- Retry all queued jobs
174
- """
175
- for job in self.jobs.values():
176
- if job.status == JobStatus.QUEUED:
177
- self.schedule_job(job)
@@ -1,18 +0,0 @@
1
- from enum import Enum
2
-
3
-
4
- class WorkerStatus(Enum):
5
- """Enum representing the status of a worker"""
6
-
7
- IDLE = "idle"
8
- RUNNING = "running"
9
- OFFLINE = "offline"
10
-
11
-
12
- class JobStatus(Enum):
13
- """Enum representing the status of a job"""
14
-
15
- QUEUED = "queued"
16
- RUNNING = "running"
17
- COMPLETED = "completed"
18
- FAILED = "failed"
tetra_rp/core/pool/ex.py DELETED
@@ -1,38 +0,0 @@
1
- from cluster_manager import ClusterManager
2
-
3
-
4
- if __name__ == "__main__":
5
- cm = ClusterManager()
6
-
7
- # 1) Submit a job with no existing workers (use resource_config dict)
8
- job_id = cm.submit_job(
9
- resource_config={"gpu": "H100", "memory": 16, "network_volume": 50}
10
- )
11
- print(
12
- "Job status:", cm.get_job_status(job_id)
13
- ) # should be QUEUED, no suitable worker
14
-
15
- # 2) Add a worker that doesn't match the GPU
16
- w1 = cm.add_worker(
17
- resource_config={"gpu": "H100", "memory": 16, "network_volume": 50}
18
- )
19
- # Re-try scheduling
20
- cm.retry_queued_jobs()
21
- print("Job status (still queued):", cm.get_job_status(job_id))
22
-
23
- # 3) Add a matching worker
24
- w2 = cm.add_worker(
25
- resource_config={"gpu": "H100", "memory": 16, "network_volume": 50}
26
- )
27
- # Re-try scheduling
28
- cm.retry_queued_jobs()
29
- print("Job status (should complete):", cm.get_job_status(job_id))
30
-
31
- # 4) Submit another job that requires less resources
32
- job_id2 = cm.submit_job(resource_config={"memory": 8, "network_volume": 10})
33
- # Should be assigned to w1 if it's idle
34
- print("Job2 final status:", cm.get_job_status(job_id2))
35
-
36
- # 5) Show final state of workers
37
- for worker in cm.list_workers():
38
- print("Worker:", worker)
tetra_rp/core/pool/job.py DELETED
@@ -1,22 +0,0 @@
1
- import uuid
2
- from dataclass import JobStatus
3
-
4
-
5
- class Job:
6
- """Represents a 'job' in the system
7
-
8
- In a real system, this might contain the function to run,
9
- arguments, and reference to data or code.
10
- """
11
-
12
- def __init__(self, resource_config: dict):
13
- self.job_id = str(uuid.uuid4())[:8]
14
- self.resource_config = resource_config
15
- self.status = JobStatus.QUEUED
16
-
17
- self.worker_id = None
18
- self.result = None
19
- self.error = None
20
-
21
- def __repr__(self):
22
- return f"Job(job_id={self.job_id}, status={self.status})"
@@ -1,19 +0,0 @@
1
- import uuid
2
- from dataclass import WorkerStatus
3
-
4
-
5
- class Worker:
6
- """Represents a single worker in the pool
7
-
8
- For Now we store ressources in memory
9
- """
10
-
11
- def __init__(self, resource_config: dict):
12
- self.worker_id = str(uuid.uuid4())[:8]
13
- self.resource_config = resource_config
14
- self.status = WorkerStatus.IDLE
15
-
16
- self.current_job_id = None
17
-
18
- def __repr__(self):
19
- return f"Worker(worker_id={self.worker_id}, status={self.status})"