tetra-rp 0.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tetra-rp might be problematic. Click here for more details.

Files changed (66) hide show
  1. tetra_rp/__init__.py +43 -0
  2. tetra_rp/cli/__init__.py +0 -0
  3. tetra_rp/cli/commands/__init__.py +1 -0
  4. tetra_rp/cli/commands/build.py +534 -0
  5. tetra_rp/cli/commands/deploy.py +370 -0
  6. tetra_rp/cli/commands/init.py +119 -0
  7. tetra_rp/cli/commands/resource.py +191 -0
  8. tetra_rp/cli/commands/run.py +100 -0
  9. tetra_rp/cli/main.py +85 -0
  10. tetra_rp/cli/utils/__init__.py +1 -0
  11. tetra_rp/cli/utils/conda.py +127 -0
  12. tetra_rp/cli/utils/deployment.py +172 -0
  13. tetra_rp/cli/utils/ignore.py +139 -0
  14. tetra_rp/cli/utils/skeleton.py +184 -0
  15. tetra_rp/cli/utils/skeleton_template/.env.example +3 -0
  16. tetra_rp/cli/utils/skeleton_template/.flashignore +40 -0
  17. tetra_rp/cli/utils/skeleton_template/.gitignore +44 -0
  18. tetra_rp/cli/utils/skeleton_template/README.md +256 -0
  19. tetra_rp/cli/utils/skeleton_template/main.py +43 -0
  20. tetra_rp/cli/utils/skeleton_template/requirements.txt +1 -0
  21. tetra_rp/cli/utils/skeleton_template/workers/__init__.py +0 -0
  22. tetra_rp/cli/utils/skeleton_template/workers/cpu/__init__.py +20 -0
  23. tetra_rp/cli/utils/skeleton_template/workers/cpu/endpoint.py +38 -0
  24. tetra_rp/cli/utils/skeleton_template/workers/gpu/__init__.py +20 -0
  25. tetra_rp/cli/utils/skeleton_template/workers/gpu/endpoint.py +62 -0
  26. tetra_rp/client.py +128 -0
  27. tetra_rp/config.py +29 -0
  28. tetra_rp/core/__init__.py +0 -0
  29. tetra_rp/core/api/__init__.py +6 -0
  30. tetra_rp/core/api/runpod.py +319 -0
  31. tetra_rp/core/exceptions.py +50 -0
  32. tetra_rp/core/resources/__init__.py +37 -0
  33. tetra_rp/core/resources/base.py +47 -0
  34. tetra_rp/core/resources/cloud.py +4 -0
  35. tetra_rp/core/resources/constants.py +4 -0
  36. tetra_rp/core/resources/cpu.py +146 -0
  37. tetra_rp/core/resources/environment.py +41 -0
  38. tetra_rp/core/resources/gpu.py +68 -0
  39. tetra_rp/core/resources/live_serverless.py +62 -0
  40. tetra_rp/core/resources/network_volume.py +148 -0
  41. tetra_rp/core/resources/resource_manager.py +145 -0
  42. tetra_rp/core/resources/serverless.py +463 -0
  43. tetra_rp/core/resources/serverless_cpu.py +162 -0
  44. tetra_rp/core/resources/template.py +94 -0
  45. tetra_rp/core/resources/utils.py +50 -0
  46. tetra_rp/core/utils/__init__.py +0 -0
  47. tetra_rp/core/utils/backoff.py +43 -0
  48. tetra_rp/core/utils/constants.py +10 -0
  49. tetra_rp/core/utils/file_lock.py +260 -0
  50. tetra_rp/core/utils/json.py +33 -0
  51. tetra_rp/core/utils/lru_cache.py +75 -0
  52. tetra_rp/core/utils/singleton.py +21 -0
  53. tetra_rp/core/validation.py +44 -0
  54. tetra_rp/execute_class.py +319 -0
  55. tetra_rp/logger.py +34 -0
  56. tetra_rp/protos/__init__.py +0 -0
  57. tetra_rp/protos/remote_execution.py +148 -0
  58. tetra_rp/stubs/__init__.py +5 -0
  59. tetra_rp/stubs/live_serverless.py +155 -0
  60. tetra_rp/stubs/registry.py +117 -0
  61. tetra_rp/stubs/serverless.py +30 -0
  62. tetra_rp-0.17.1.dist-info/METADATA +976 -0
  63. tetra_rp-0.17.1.dist-info/RECORD +66 -0
  64. tetra_rp-0.17.1.dist-info/WHEEL +5 -0
  65. tetra_rp-0.17.1.dist-info/entry_points.txt +2 -0
  66. tetra_rp-0.17.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,976 @@
1
+ Metadata-Version: 2.4
2
+ Name: tetra_rp
3
+ Version: 0.17.1
4
+ Summary: A Python library for distributed inference and serving of machine learning models
5
+ Author-email: Marut Pandya <pandyamarut@gmail.com>, Patrick Rachford <prachford@icloud.com>, Dean Quinanola <dean.quinanola@runpod.io>
6
+ License: MIT
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: <3.14,>=3.9
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: cloudpickle>=3.1.1
14
+ Requires-Dist: runpod
15
+ Requires-Dist: python-dotenv>=1.0.0
16
+ Requires-Dist: pydantic>=2.0.0
17
+ Requires-Dist: rich>=14.0.0
18
+ Requires-Dist: typer>=0.12.0
19
+ Requires-Dist: questionary>=2.0.0
20
+ Requires-Dist: pathspec>=0.11.0
21
+
22
+ # Flash: Serverless computing for AI workloads
23
+
24
+ Runpod Flash is a Python SDK that streamlines the development and deployment of AI workflows on Runpod's [Serverless infrastructure](http://docs.runpod.io/serverless/overview). Write Python functions locally, and Flash handles the infrastructure, provisioning GPUs and CPUs, managing dependencies, and transferring data, allowing you to focus on building AI applications.
25
+
26
+ You can find a repository of prebuilt Flash examples at [runpod/flash-examples](https://github.com/runpod/flash-examples).
27
+
28
+ > [!Note]
29
+ > **New feature - Consolidated template management:** `PodTemplate` overrides now seamlessly integrate with `ServerlessResource` defaults, providing more consistent resource configuration and reducing deployment complexity.
30
+
31
+ ## Table of contents
32
+
33
+ - [Overview](#overview)
34
+ - [Get started](#get-started)
35
+ - [Create Flash API endpoints](#create-flash-api-endpoints)
36
+ - [Key concepts](#key-concepts)
37
+ - [How it works](#how-it-works)
38
+ - [Advanced features](#advanced-features)
39
+ - [Configuration](#configuration)
40
+ - [Workflow examples](#workflow-examples)
41
+ - [Use cases](#use-cases)
42
+ - [Limitations](#limitations)
43
+ - [Contributing](#contributing)
44
+ - [Troubleshooting](#troubleshooting)
45
+
46
+ ## Overview
47
+
48
+ There are two basic modes for using Flash. You can:
49
+
50
+ - Build and run standalone Python scripts using the `@remote` decorator.
51
+ - Create Flash API endpoints with FastAPI (using the same script syntax).
52
+
53
+ Follow the steps in the next section to install Flash and create your first script before learning how to [create Flash API endpoints](#create-flash-api-endpoints).
54
+
55
+ To learn more about how Flash works, see [Key concepts](#key-concepts).
56
+
57
+ ## Get started
58
+
59
+ Before you can use Flash, you'll need:
60
+
61
+ - Python 3.9 (or higher) installed on your local machine.
62
+ - A Runpod account with API key ([sign up here](https://runpod.io/console)).
63
+ - Basic knowledge of Python and async programming.
64
+
65
+ ### Step 1: Install Flash
66
+
67
+ ```bash
68
+ pip install tetra_rp
69
+ ```
70
+
71
+ ### Step 2: Set your API key
72
+
73
+ Generate an API key from the [Runpod account settings](https://docs.runpod.io/get-started/api-keys) page and set it as an environment variable:
74
+
75
+ ```bash
76
+ export RUNPOD_API_KEY=[YOUR_API_KEY]
77
+ ```
78
+
79
+ Or save it in a `.env` file in your project directory:
80
+
81
+ ```bash
82
+ echo "RUNPOD_API_KEY=[YOUR_API_KEY]" > .env
83
+ ```
84
+
85
+ ### Step 3: Create your first Flash function
86
+
87
+ Add the following code to a new Python file:
88
+
89
+ ```python
90
+ import asyncio
91
+ from tetra_rp import remote, LiveServerless
92
+ from dotenv import load_dotenv
93
+
94
+ # Uncomment if using a .env file
95
+ # load_dotenv()
96
+
97
+ # Configure GPU resources
98
+ gpu_config = LiveServerless(name="flash-quickstart")
99
+
100
+ @remote(
101
+ resource_config=gpu_config,
102
+ dependencies=["torch", "numpy"]
103
+ )
104
+ def gpu_compute(data):
105
+ import torch
106
+ import numpy as np
107
+
108
+ # This runs on a GPU in Runpod's cloud
109
+ tensor = torch.tensor(data, device="cuda")
110
+ result = tensor.sum().item()
111
+
112
+ return {
113
+ "result": result,
114
+ "device": torch.cuda.get_device_name(0)
115
+ }
116
+
117
+ async def main():
118
+ # This runs locally
119
+ result = await gpu_compute([1, 2, 3, 4, 5])
120
+ print(f"Sum: {result['result']}")
121
+ print(f"Computed on: {result['device']}")
122
+
123
+ if __name__ == "__main__":
124
+ asyncio.run(main())
125
+ ```
126
+
127
+ Run the example:
128
+
129
+ ```bash
130
+ python your_script.py
131
+ ```
132
+
133
+ The first time you run the script, it will take significantly longer to process than successive runs (about one minute for first run vs. one second for future runs), as your endpoint must be initialized.
134
+
135
+ When it's finished, you should see output similar to this:
136
+
137
+ ```bash
138
+ 2025-11-19 12:35:15,109 | INFO | Created endpoint: rb50waqznmn2kg - flash-quickstart-fb
139
+ 2025-11-19 12:35:15,112 | INFO | URL: https://console.runpod.io/serverless/user/endpoint/rb50waqznmn2kg
140
+ 2025-11-19 12:35:15,114 | INFO | LiveServerless:rb50waqznmn2kg | API /run
141
+ 2025-11-19 12:35:15,655 | INFO | LiveServerless:rb50waqznmn2kg | Started Job:b0b341e7-e460-4305-9acd-fc2dfd1bd65c-u2
142
+ 2025-11-19 12:35:15,762 | INFO | Job:b0b341e7-e460-4305-9acd-fc2dfd1bd65c-u2 | Status: IN_QUEUE
143
+ 2025-11-19 12:35:16,301 | INFO | Job:b0b341e7-e460-4305-9acd-fc2dfd1bd65c-u2 | .
144
+ 2025-11-19 12:35:17,756 | INFO | Job:b0b341e7-e460-4305-9acd-fc2dfd1bd65c-u2 | ..
145
+ 2025-11-19 12:35:22,610 | INFO | Job:b0b341e7-e460-4305-9acd-fc2dfd1bd65c-u2 | ...
146
+ 2025-11-19 12:35:37,163 | INFO | Job:b0b341e7-e460-4305-9acd-fc2dfd1bd65c-u2 | ....
147
+ 2025-11-19 12:35:59,248 | INFO | Job:b0b341e7-e460-4305-9acd-fc2dfd1bd65c-u2 | .....
148
+ 2025-11-19 12:36:09,983 | INFO | Job:b0b341e7-e460-4305-9acd-fc2dfd1bd65c-u2 | Status: COMPLETED
149
+ 2025-11-19 12:36:10,068 | INFO | Worker:icmkdgnrmdf8gz | Delay Time: 51842 ms
150
+ 2025-11-19 12:36:10,068 | INFO | Worker:icmkdgnrmdf8gz | Execution Time: 1533 ms
151
+ 2025-11-19 17:36:07,485 | INFO | Installing Python dependencies: ['torch', 'numpy']
152
+ Sum: 15
153
+ Computed on: NVIDIA GeForce RTX 4090
154
+ ```
155
+
156
+ ## Create Flash API endpoints
157
+
158
+ > [!Note]
159
+ > **Flash API endpoints are currently only available for local testing:** Using `flash run` will start the API server on your local machine. Future updates will add the ability to build and deploy API servers for production deployments.
160
+
161
+ You can use Flash to deploy and serve API endpoints that compute responses using GPU and CPU Serverless workers. These endpoints will run scripts using the same Python remote decorators [demonstrated above](#get-started)
162
+
163
+ ### Step 1: Initialize a new project
164
+
165
+ Use the `flash init` command to generate a structured project template with a preconfigured FastAPI application entry point.
166
+
167
+ Run this command to initialize a new project directory:
168
+
169
+ ```bash
170
+ flash init my_project
171
+ ```
172
+
173
+ You can also initialize your current directory:
174
+ ```
175
+ flash init
176
+ ```
177
+
178
+ ### Step 2: Explore the project template
179
+
180
+ This is the structure of the project template created by `flash init`:
181
+
182
+ ```txt
183
+ my_project/
184
+ ├── main.py # FastAPI application entry point
185
+ ├── workers/
186
+ │ ├── gpu/ # GPU worker example
187
+ │ │ ├── __init__.py # FastAPI router
188
+ │ │ └── endpoint.py # GPU script @remote decorated function
189
+ │ └── cpu/ # CPU worker example
190
+ │ ├── __init__.py # FastAPI router
191
+ │ └── endpoint.py # CPU script with @remote decorated function
192
+ ├── .env # Environment variable template
193
+ ├── .gitignore # Git ignore patterns
194
+ ├── .flashignore # Flash deployment ignore patterns
195
+ ├── requirements.txt # Python dependencies
196
+ └── README.md # Project documentation
197
+ ```
198
+
199
+ This template includes:
200
+
201
+ - A FastAPI application entry point and routers.
202
+ - Templates for Python dependencies, `.env`, `.gitignore`, etc.
203
+ - Flash scripts (`endpoint.py`) for both GPU and CPU workers, which include:
204
+ - Pre-configured worker scaling limits using the `LiveServerless()` object.
205
+ - A `@remote` decorated function that returns a response from a worker.
206
+
207
+ When you start the FastAPI server, it creates API endpoints at `/gpu/hello` and `/cpu/hello`, which call the remote function described in their respective `endpoint.py` files.
208
+
209
+ ### Step 3: Install Python dependencies
210
+
211
+ After initializing the project, navigate into the project directory:
212
+
213
+ ```bash
214
+ cd my_project
215
+ ```
216
+
217
+ Install required dependencies:
218
+
219
+ ```bash
220
+ pip install -r requirements.txt
221
+ ```
222
+
223
+ ### Step 4: Configure your API key
224
+
225
+ Open the `.env` template file in a text editor and add your [Runpod API key](https://docs.runpod.io/get-started/api-keys):
226
+
227
+ ```bash
228
+ # Use your text editor of choice, e.g.
229
+ cursor .env
230
+ ```
231
+
232
+ Remove the `#` symbol from the beginning of the `RUNPOD_API_KEY` line and replace `your_api_key_here` with your actual Runpod API key:
233
+
234
+ ```txt
235
+ # RUNPOD_API_KEY=your_api_key_here
236
+ # PORT=80
237
+ # LOG_LEVEL=INFO
238
+ ```
239
+
240
+ Save the file and close it.
241
+
242
+ ### Step 5: Start the local API server
243
+
244
+ Use `flash run` to start the API server:
245
+
246
+ ```bash
247
+ flash run
248
+ ```
249
+
250
+ Open a new terminal tab or window and test your GPU API using cURL:
251
+
252
+ ```bash
253
+ curl -X POST http://localhost:8888/gpu/hello \
254
+ -H "Content-Type: application/json" \
255
+ -d '{"message": "Hello from the GPU!"}'
256
+ ```
257
+
258
+ If you switch back to the terminal tab where you used `flash run`, you'll see the details of the job's progress.
259
+
260
+ ### Step 6: Open the API explorer
261
+
262
+ Besides starting the API server, `flash run` also starts an interactive API explorer. Point your web browser at [http://localhost:8888/docs](http://localhost:8888/docs) to explore the API.
263
+
264
+ To run remote functions in the explorer:
265
+
266
+ 1. Expand one of the functions under **GPU Workers** or **CPU Workers**.
267
+ 2. Click **Try it out** and then **Execute**
268
+
269
+ You'll get a response from your workers right in the explorer.
270
+
271
+ ### Step 7: Customize your API
272
+
273
+ To customize your API endpoint and functionality:
274
+
275
+ 1. Add/edit remote functions in your `endpoint.py` files.
276
+ 2. Test the scripts individually by running `python endpoint.py`.
277
+ 3. Configure your FastAPI routers by editing the `__init__.py` files.
278
+ 4. Add any new endpoints to your `main.py` file.
279
+
280
+ ## Key concepts
281
+
282
+ ### Remote functions
283
+
284
+ The Flash `@remote` decorator marks functions for execution on Runpod's infrastructure. Everything inside the decorated function runs remotely, while code outside runs locally.
285
+
286
+ ```python
287
+ @remote(resource_config=config, dependencies=["pandas"])
288
+ def process_data(data):
289
+ # This code runs remotely
290
+ import pandas as pd
291
+ df = pd.DataFrame(data)
292
+ return df.describe().to_dict()
293
+
294
+ async def main():
295
+ # This code runs locally
296
+ result = await process_data(my_data)
297
+ ```
298
+
299
+ ### Resource configuration
300
+
301
+ Flash provides fine-grained control over hardware allocation through configuration objects:
302
+
303
+ ```python
304
+ from tetra_rp import LiveServerless, GpuGroup, CpuInstanceType, PodTemplate
305
+
306
+ # GPU configuration
307
+ gpu_config = LiveServerless(
308
+ name="ml-inference",
309
+ gpus=[GpuGroup.AMPERE_80], # A100 80GB
310
+ workersMax=5,
311
+ template=PodTemplate(containerDiskInGb=100) # Extra disk space
312
+ )
313
+
314
+ # CPU configuration
315
+ cpu_config = LiveServerless(
316
+ name="data-processor",
317
+ instanceIds=[CpuInstanceType.CPU5C_4_16], # 4 vCPU, 16GB RAM
318
+ workersMax=3
319
+ )
320
+ ```
321
+
322
+ ### Dependency management
323
+
324
+ Specify Python packages in the decorator, and Flash installs them automatically:
325
+
326
+ ```python
327
+ @remote(
328
+ resource_config=gpu_config,
329
+ dependencies=["transformers==4.36.0", "torch", "pillow"]
330
+ )
331
+ def generate_image(prompt):
332
+ # Import inside the function
333
+ from transformers import pipeline
334
+ import torch
335
+ from PIL import Image
336
+
337
+ # Your code here
338
+ ```
339
+
340
+ ### Parallel execution
341
+
342
+ Run multiple remote functions concurrently using Python's async capabilities:
343
+
344
+ ```python
345
+ # Process multiple items in parallel
346
+ results = await asyncio.gather(
347
+ process_item(item1),
348
+ process_item(item2),
349
+ process_item(item3)
350
+ )
351
+ ```
352
+
353
+ ## How it works
354
+
355
+ Flash orchestrates workflow execution through a sophisticated multi-step process:
356
+
357
+ 1. **Function identification**: The `@remote` decorator marks functions for remote execution, enabling Flash to distinguish between local and remote operations.
358
+ 2. **Dependency analysis**: Flash automatically analyzes function dependencies to construct an optimal execution order, ensuring data flows correctly between sequential and parallel operations.
359
+ 3. **Resource provisioning and execution**: For each remote function, Flash:
360
+ - Dynamically provisions endpoint and worker resources on Runpod's infrastructure.
361
+ - Serializes and securely transfers input data to the remote worker.
362
+ - Executes the function on the remote infrastructure with the specified GPU or CPU resources.
363
+ - Returns results to your local environment for further processing.
364
+ 4. **Data orchestration**: Results flow seamlessly between functions according to your local Python code structure, maintaining the same programming model whether functions run locally or remotely.
365
+
366
+
367
+ ## Advanced features
368
+
369
+ ### Custom Docker images
370
+
371
+ `LiveServerless` resources use a fixed Docker image that's optimized for Flash runtime, and supports full remote code execution. For specialized environments that require a custom Docker image, use `ServerlessEndpoint` or `CpuServerlessEndpoint`:
372
+
373
+ ```python
374
+ from tetra_rp import ServerlessEndpoint
375
+
376
+ custom_gpu = ServerlessEndpoint(
377
+ name="custom-ml-env",
378
+ imageName="pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime",
379
+ gpus=[GpuGroup.AMPERE_80]
380
+ )
381
+ ```
382
+
383
+ Unlike `LiveServerless`, these endpoints only support dictionary payloads in the form of `{"input": {...}}` (similar to a traditional [Serverless endpoint request](https://docs.runpod.io/serverless/endpoints/send-requests)), and cannot execute arbitrary Python functions remotely.
384
+
385
+ ### Persistent storage with network volumes
386
+
387
+ Attach [network volumes](https://docs.runpod.io/storage/network-volumes) for persistent storage across workers and endpoints:
388
+
389
+ ```python
390
+ config = LiveServerless(
391
+ name="model-server",
392
+ networkVolumeId="vol_abc123", # Your volume ID
393
+ template=PodTemplate(containerDiskInGb=100)
394
+ )
395
+ ```
396
+
397
+ ### Environment variables
398
+
399
+ Pass configuration to remote functions:
400
+
401
+ ```python
402
+ config = LiveServerless(
403
+ name="api-worker",
404
+ env={"HF_TOKEN": "your_token", "MODEL_ID": "gpt2"}
405
+ )
406
+ ```
407
+
408
+ ## Configuration
409
+
410
+ ### GPU configuration parameters
411
+
412
+ The following parameters can be used with `LiveServerless` (full remote code execution) and `ServerlessEndpoint` (dictionary payload only) to configure your Runpod GPU endpoints:
413
+
414
+ | Parameter | Description | Default | Example Values |
415
+ |--------------------|-------------------------------------------------|---------------|-------------------------------------|
416
+ | `name` | (Required) Name for your endpoint | `""` | `"stable-diffusion-server"` |
417
+ | `gpus` | GPU pool IDs that can be used by workers | `[GpuGroup.ANY]` | `[GpuGroup.ADA_24]` for RTX 4090 |
418
+ | `gpuCount` | Number of GPUs per worker | 1 | 1, 2, 4 |
419
+ | `workersMin` | Minimum number of workers | 0 | Set to 1 for persistence |
420
+ | `workersMax` | Maximum number of workers | 3 | Higher for more concurrency |
421
+ | `idleTimeout` | Minutes before scaling down | 5 | 10, 30, 60 |
422
+ | `env` | Environment variables | `None` | `{"HF_TOKEN": "xyz"}` |
423
+ | `networkVolumeId` | Persistent storage ID | `None` | `"vol_abc123"` |
424
+ | `executionTimeoutMs`| Max execution time (ms) | 0 (no limit) | 600000 (10 min) |
425
+ | `scalerType` | Scaling strategy | `QUEUE_DELAY` | `REQUEST_COUNT` |
426
+ | `scalerValue` | Scaling parameter value | 4 | 1-10 range typical |
427
+ | `locations` | Preferred datacenter locations | `None` | `"us-east,eu-central"` |
428
+ | `imageName` | Custom Docker image (`ServerlessEndpoint` only) | Fixed for LiveServerless | `"pytorch/pytorch:latest"`, `"my-registry/custom:v1.0"` |
429
+
430
+ ### CPU configuration parameters
431
+
432
+ The same GPU configuration parameters above apply to `LiveServerless` (full remote code execution) and `CpuServerlessEndpoint` (dictionary payload only), with these additional CPU-specific parameters:
433
+
434
+ | Parameter | Description | Default | Example Values |
435
+ |--------------------|-------------------------------------------------|---------------|-------------------------------------|
436
+ | `instanceIds` | CPU Instance Types (forces a CPU endpoint type) | `None` | `[CpuInstanceType.CPU5C_2_4]` |
437
+ | `imageName` | Custom Docker image (`CpuServerlessEndpoint` only) | Fixed for `LiveServerless` | `"python:3.11-slim"`, `"my-registry/custom:v1.0"` |
438
+
439
+ ### Resource class comparison
440
+
441
+ | Feature | LiveServerless | ServerlessEndpoint | CpuServerlessEndpoint |
442
+ |---------|----------------|-------------------|----------------------|
443
+ | **Remote code execution** | ✅ Full Python function execution | ❌ Dictionary payload only | ❌ Dictionary payload only |
444
+ | **Custom Docker images** | ❌ Fixed optimized images | ✅ Any Docker image | ✅ Any Docker image |
445
+ | **Use case** | Dynamic remote functions | Traditional API endpoints | Traditional CPU endpoints |
446
+ | **Function returns** | Any Python object | Dictionary only | Dictionary only |
447
+ | **@remote decorator** | Full functionality | Limited to payload passing | Limited to payload passing |
448
+
449
+ ### Available GPU types
450
+
451
+ Some common GPU groups available through `GpuGroup`:
452
+
453
+ - `GpuGroup.ANY` - Any available GPU (default)
454
+ - `GpuGroup.ADA_24` - NVIDIA GeForce RTX 4090
455
+ - `GpuGroup.AMPERE_80` - NVIDIA A100 80GB
456
+ - `GpuGroup.AMPERE_48` - NVIDIA A40, RTX A6000
457
+ - `GpuGroup.AMPERE_24` - NVIDIA RTX A5000, L4, RTX 3090
458
+
459
+
460
+ ### Available CPU instance types
461
+
462
+ - `CpuInstanceType.CPU3G_1_4` - (cpu3g-1-4) 3rd gen general purpose, 1 vCPU, 4GB RAM
463
+ - `CpuInstanceType.CPU3G_2_8` - (cpu3g-2-8) 3rd gen general purpose, 2 vCPU, 8GB RAM
464
+ - `CpuInstanceType.CPU3G_4_16` - (cpu3g-4-16) 3rd gen general purpose, 4 vCPU, 16GB RAM
465
+ - `CpuInstanceType.CPU3G_8_32` - (cpu3g-8-32) 3rd gen general purpose, 8 vCPU, 32GB RAM
466
+ - `CpuInstanceType.CPU3C_1_2` - (cpu3c-1-2) 3rd gen compute-optimized, 1 vCPU, 2GB RAM
467
+ - `CpuInstanceType.CPU3C_2_4` - (cpu3c-2-4) 3rd gen compute-optimized, 2 vCPU, 4GB RAM
468
+ - `CpuInstanceType.CPU3C_4_8` - (cpu3c-4-8) 3rd gen compute-optimized, 4 vCPU, 8GB RAM
469
+ - `CpuInstanceType.CPU3C_8_16` - (cpu3c-8-16) 3rd gen compute-optimized, 8 vCPU, 16GB RAM
470
+ - `CpuInstanceType.CPU5C_1_2` - (cpu5c-1-2) 5th gen compute-optimized, 1 vCPU, 2GB RAM
471
+ - `CpuInstanceType.CPU5C_2_4` - (cpu5c-2-4) 5th gen compute-optimized, 2 vCPU, 4GB RAM
472
+ - `CpuInstanceType.CPU5C_4_8` - (cpu5c-4-8) 5th gen compute-optimized, 4 vCPU, 8GB RAM
473
+ - `CpuInstanceType.CPU5C_8_16` - (cpu5c-8-16) 5th gen compute-optimized, 8 vCPU, 16GB RAM
474
+
475
+ ## Workflow examples
476
+
477
+ ### Basic GPU workflow
478
+
479
+ ```python
480
+ import asyncio
481
+ from tetra_rp import remote, LiveServerless
482
+
483
+ # Simple GPU configuration
484
+ gpu_config = LiveServerless(name="example-gpu-server")
485
+
486
+ @remote(
487
+ resource_config=gpu_config,
488
+ dependencies=["torch", "numpy"]
489
+ )
490
+ def gpu_compute(data):
491
+ import torch
492
+ import numpy as np
493
+
494
+ # Convert to tensor and perform computation on GPU
495
+ tensor = torch.tensor(data, device="cuda")
496
+ result = tensor.sum().item()
497
+
498
+ # Get GPU info
499
+ gpu_info = torch.cuda.get_device_properties(0)
500
+
501
+ return {
502
+ "result": result,
503
+ "gpu_name": gpu_info.name,
504
+ "cuda_version": torch.version.cuda
505
+ }
506
+
507
+ async def main():
508
+ result = await gpu_compute([1, 2, 3, 4, 5])
509
+ print(f"Result: {result['result']}")
510
+ print(f"Computed on: {result['gpu_name']} with CUDA {result['cuda_version']}")
511
+
512
+ if __name__ == "__main__":
513
+ asyncio.run(main())
514
+ ```
515
+
516
+ ### Advanced GPU workflow with template configuration
517
+
518
+ ```python
519
+ import asyncio
520
+ from tetra_rp import remote, LiveServerless, GpuGroup, PodTemplate
521
+ import base64
522
+
523
+ # Advanced GPU configuration with consolidated template overrides
524
+ sd_config = LiveServerless(
525
+ gpus=[GpuGroup.AMPERE_80], # A100 80GB GPUs
526
+ name="example_image_gen_server",
527
+ template=PodTemplate(containerDiskInGb=100), # Large disk for models
528
+ workersMax=3,
529
+ idleTimeout=10
530
+ )
531
+
532
+ @remote(
533
+ resource_config=sd_config,
534
+ dependencies=["diffusers", "transformers", "torch", "accelerate", "safetensors"]
535
+ )
536
+ def generate_image(prompt, width=512, height=512):
537
+ import torch
538
+ from diffusers import StableDiffusionPipeline
539
+ import io
540
+ import base64
541
+
542
+ # Load pipeline (benefits from large container disk)
543
+ pipeline = StableDiffusionPipeline.from_pretrained(
544
+ "runwayml/stable-diffusion-v1-5",
545
+ torch_dtype=torch.float16
546
+ )
547
+ pipeline = pipeline.to("cuda")
548
+
549
+ # Generate image
550
+ image = pipeline(prompt=prompt, width=width, height=height).images[0]
551
+
552
+ # Convert to base64 for return
553
+ buffered = io.BytesIO()
554
+ image.save(buffered, format="PNG")
555
+ img_str = base64.b64encode(buffered.getvalue()).decode()
556
+
557
+ return {"image": img_str, "prompt": prompt}
558
+
559
+ async def main():
560
+ result = await generate_image("A serene mountain landscape at sunset")
561
+ print(f"Generated image for: {result['prompt']}")
562
+ # Save image locally if needed
563
+ # img_data = base64.b64decode(result["image"])
564
+ # with open("output.png", "wb") as f:
565
+ # f.write(img_data)
566
+
567
+ if __name__ == "__main__":
568
+ asyncio.run(main())
569
+ ```
570
+
571
+ ### Basic CPU workflow
572
+
573
+ ```python
574
+ import asyncio
575
+ from tetra_rp import remote, LiveServerless, CpuInstanceType
576
+
577
+ # Simple CPU configuration
578
+ cpu_config = LiveServerless(
579
+ name="example-cpu-server",
580
+ instanceIds=[CpuInstanceType.CPU5G_2_8], # 2 vCPU, 8GB RAM
581
+ )
582
+
583
+ @remote(
584
+ resource_config=cpu_config,
585
+ dependencies=["pandas", "numpy"]
586
+ )
587
+ def cpu_data_processing(data):
588
+ import pandas as pd
589
+ import numpy as np
590
+ import platform
591
+
592
+ # Process data using CPU
593
+ df = pd.DataFrame(data)
594
+
595
+ return {
596
+ "row_count": len(df),
597
+ "column_count": len(df.columns) if not df.empty else 0,
598
+ "mean_values": df.select_dtypes(include=[np.number]).mean().to_dict(),
599
+ "system_info": platform.processor(),
600
+ "platform": platform.platform()
601
+ }
602
+
603
+ async def main():
604
+ sample_data = [
605
+ {"name": "Alice", "age": 30, "score": 85},
606
+ {"name": "Bob", "age": 25, "score": 92},
607
+ {"name": "Charlie", "age": 35, "score": 78}
608
+ ]
609
+
610
+ result = await cpu_data_processing(sample_data)
611
+ print(f"Processed {result['row_count']} rows on {result['platform']}")
612
+ print(f"Mean values: {result['mean_values']}")
613
+
614
+ if __name__ == "__main__":
615
+ asyncio.run(main())
616
+ ```
617
+
618
+ ### Advanced CPU workflow with template configuration
619
+
620
+ ```python
621
+ import asyncio
622
+ import base64
623
+ from tetra_rp import remote, LiveServerless, CpuInstanceType, PodTemplate
624
+
625
+ # Advanced CPU configuration with template overrides
626
+ data_processing_config = LiveServerless(
627
+ name="advanced-cpu-processor",
628
+ instanceIds=[CpuInstanceType.CPU5C_4_16, CpuInstanceType.CPU3C_4_8], # Fallback options
629
+ template=PodTemplate(
630
+ containerDiskInGb=20, # Extra disk space for data processing
631
+ env=[{"key": "PYTHONPATH", "value": "/workspace"}] # Custom environment
632
+ ),
633
+ workersMax=5,
634
+ idleTimeout=15,
635
+ env={"PROCESSING_MODE": "batch", "DEBUG": "false"} # Additional env vars
636
+ )
637
+
638
+ @remote(
639
+ resource_config=data_processing_config,
640
+ dependencies=["pandas", "numpy", "scipy", "scikit-learn"]
641
+ )
642
+ def advanced_data_analysis(dataset, analysis_type="full"):
643
+ import pandas as pd
644
+ import numpy as np
645
+ from sklearn.preprocessing import StandardScaler
646
+ from sklearn.decomposition import PCA
647
+ import platform
648
+
649
+ # Create DataFrame
650
+ df = pd.DataFrame(dataset)
651
+
652
+ # Perform analysis based on type
653
+ results = {
654
+ "platform": platform.platform(),
655
+ "dataset_shape": df.shape,
656
+ "memory_usage": df.memory_usage(deep=True).sum()
657
+ }
658
+
659
+ if analysis_type == "full":
660
+ # Advanced statistical analysis
661
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
662
+ if len(numeric_cols) > 0:
663
+ # Standardize data
664
+ scaler = StandardScaler()
665
+ scaled_data = scaler.fit_transform(df[numeric_cols])
666
+
667
+ # PCA analysis
668
+ pca = PCA(n_components=min(len(numeric_cols), 3))
669
+ pca_result = pca.fit_transform(scaled_data)
670
+
671
+ results.update({
672
+ "correlation_matrix": df[numeric_cols].corr().to_dict(),
673
+ "pca_explained_variance": pca.explained_variance_ratio_.tolist(),
674
+ "pca_shape": pca_result.shape
675
+ })
676
+
677
+ return results
678
+
679
+ async def main():
680
+ # Generate sample dataset
681
+ sample_data = [
682
+ {"feature1": np.random.randn(), "feature2": np.random.randn(),
683
+ "feature3": np.random.randn(), "category": f"cat_{i%3}"}
684
+ for i in range(1000)
685
+ ]
686
+
687
+ result = await advanced_data_analysis(sample_data, "full")
688
+ print(f"Processed dataset with shape: {result['dataset_shape']}")
689
+ print(f"Memory usage: {result['memory_usage']} bytes")
690
+ print(f"PCA explained variance: {result.get('pca_explained_variance', 'N/A')}")
691
+
692
+ if __name__ == "__main__":
693
+ asyncio.run(main())
694
+ ```
695
+
696
+ ### Hybrid GPU/CPU workflow
697
+
698
+ ```python
699
+ import asyncio
700
+ from tetra_rp import remote, LiveServerless, GpuGroup, CpuInstanceType, PodTemplate
701
+
702
+ # GPU configuration for model inference
703
+ gpu_config = LiveServerless(
704
+ name="ml-inference-gpu",
705
+ gpus=[GpuGroup.AMPERE_24], # RTX 3090/A5000
706
+ template=PodTemplate(containerDiskInGb=50), # Space for models
707
+ workersMax=2
708
+ )
709
+
710
+ # CPU configuration for data preprocessing
711
+ cpu_config = LiveServerless(
712
+ name="data-preprocessor",
713
+ instanceIds=[CpuInstanceType.CPU5C_4_16], # 4 vCPU, 16GB RAM
714
+ template=PodTemplate(
715
+ containerDiskInGb=30,
716
+ env=[{"key": "NUMPY_NUM_THREADS", "value": "4"}]
717
+ ),
718
+ workersMax=3
719
+ )
720
+
721
+ @remote(
722
+ resource_config=cpu_config,
723
+ dependencies=["pandas", "numpy", "scikit-learn"]
724
+ )
725
+ def preprocess_data(raw_data):
726
+ import pandas as pd
727
+ import numpy as np
728
+ from sklearn.preprocessing import StandardScaler
729
+
730
+ # Data cleaning and preprocessing
731
+ df = pd.DataFrame(raw_data)
732
+
733
+ # Handle missing values
734
+ df = df.fillna(df.mean(numeric_only=True))
735
+
736
+ # Normalize numeric features
737
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
738
+ if len(numeric_cols) > 0:
739
+ scaler = StandardScaler()
740
+ df[numeric_cols] = scaler.fit_transform(df[numeric_cols])
741
+
742
+ return {
743
+ "processed_data": df.to_dict('records'),
744
+ "shape": df.shape,
745
+ "columns": list(df.columns)
746
+ }
747
+
748
+ @remote(
749
+ resource_config=gpu_config,
750
+ dependencies=["torch", "transformers", "numpy"]
751
+ )
752
+ def run_inference(processed_data):
753
+ import torch
754
+ import numpy as np
755
+
756
+ # Simulate ML model inference on GPU
757
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
758
+
759
+ # Convert to tensor
760
+ data_array = np.array([list(item.values()) for item in processed_data["processed_data"]])
761
+ tensor = torch.tensor(data_array, dtype=torch.float32).to(device)
762
+
763
+ # Simple neural network simulation
764
+ with torch.no_grad():
765
+ # Simulate model computation
766
+ result = torch.nn.functional.softmax(tensor.mean(dim=1), dim=0)
767
+ predictions = result.cpu().numpy().tolist()
768
+
769
+ return {
770
+ "predictions": predictions,
771
+ "device_used": str(device),
772
+ "input_shape": tensor.shape
773
+ }
774
+
775
+ async def ml_pipeline(raw_dataset):
776
+ """Complete ML pipeline: CPU preprocessing -> GPU inference"""
777
+ print("Step 1: Preprocessing data on CPU...")
778
+ preprocessed = await preprocess_data(raw_dataset)
779
+ print(f"Preprocessed data shape: {preprocessed['shape']}")
780
+
781
+ print("Step 2: Running inference on GPU...")
782
+ results = await run_inference(preprocessed)
783
+ print(f"Inference completed on: {results['device_used']}")
784
+
785
+ return {
786
+ "preprocessing": preprocessed,
787
+ "inference": results
788
+ }
789
+
790
+ async def main():
791
+ # Sample dataset
792
+ raw_data = [
793
+ {"feature1": np.random.randn(), "feature2": np.random.randn(),
794
+ "feature3": np.random.randn(), "label": i % 2}
795
+ for i in range(100)
796
+ ]
797
+
798
+ # Run the complete pipeline
799
+ results = await ml_pipeline(raw_data)
800
+
801
+ print("\nPipeline Results:")
802
+ print(f"Data processed: {results['preprocessing']['shape']}")
803
+ print(f"Predictions generated: {len(results['inference']['predictions'])}")
804
+ print(f"GPU device: {results['inference']['device_used']}")
805
+
806
+ if __name__ == "__main__":
807
+ asyncio.run(main())
808
+ ```
809
+
810
+ ### Multi-stage ML pipeline example
811
+
812
+ ```python
813
+ import os
814
+ import asyncio
815
+ from tetra_rp import remote, LiveServerless
816
+
817
+ # Configure Runpod resources
818
+ runpod_config = LiveServerless(name="multi-stage-pipeline-server")
819
+
820
+ # Feature extraction on GPU
821
+ @remote(
822
+ resource_config=runpod_config,
823
+ dependencies=["torch", "transformers"]
824
+ )
825
+ def extract_features(texts):
826
+ import torch
827
+ from transformers import AutoTokenizer, AutoModel
828
+
829
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
830
+ model = AutoModel.from_pretrained("bert-base-uncased")
831
+ model.to("cuda")
832
+
833
+ features = []
834
+ for text in texts:
835
+ inputs = tokenizer(text, return_tensors="pt").to("cuda")
836
+ with torch.no_grad():
837
+ outputs = model(**inputs)
838
+ features.append(outputs.last_hidden_state[:, 0].cpu().numpy().tolist()[0])
839
+
840
+ return features
841
+
842
+ # Classification on GPU
843
+ @remote(
844
+ resource_config=runpod_config,
845
+ dependencies=["torch", "sklearn"]
846
+ )
847
+ def classify(features, labels=None):
848
+ import torch
849
+ import numpy as np
850
+ from sklearn.linear_model import LogisticRegression
851
+
852
+ features_np = np.array(features[1:] if labels is None and isinstance(features, list) and len(features)>0 and isinstance(features[0], dict) else features)
853
+
854
+ if labels is not None:
855
+ labels_np = np.array(labels)
856
+ classifier = LogisticRegression()
857
+ classifier.fit(features_np, labels_np)
858
+
859
+ coefficients = {
860
+ "coef": classifier.coef_.tolist(),
861
+ "intercept": classifier.intercept_.tolist(),
862
+ "classes": classifier.classes_.tolist()
863
+ }
864
+ return coefficients
865
+ else:
866
+ coefficients = features[0]
867
+
868
+ classifier = LogisticRegression()
869
+ classifier.coef_ = np.array(coefficients["coef"])
870
+ classifier.intercept_ = np.array(coefficients["intercept"])
871
+ classifier.classes_ = np.array(coefficients["classes"])
872
+
873
+ # Predict
874
+ predictions = classifier.predict(features_np)
875
+ probabilities = classifier.predict_proba(features_np)
876
+
877
+ return {
878
+ "predictions": predictions.tolist(),
879
+ "probabilities": probabilities.tolist()
880
+ }
881
+
882
+ # Complete pipeline
883
+ async def text_classification_pipeline(train_texts, train_labels, test_texts):
884
+ train_features = await extract_features(train_texts)
885
+ test_features = await extract_features(test_texts)
886
+
887
+ model_coeffs = await classify(train_features, train_labels)
888
+
889
+ # For inference, pass model coefficients along with test features
890
+ # The classify function expects a list where the first element is the model (coeffs)
891
+ # and subsequent elements are features for prediction.
892
+ predictions = await classify([model_coeffs] + test_features)
893
+
894
+ return predictions
895
+ ```
896
+
897
+ ### More examples
898
+
899
+ You can find many more examples in the [flash-examples repository](https://github.com/runpod/flash-examples).
900
+
901
+ ## Use cases
902
+
903
+ Flash is well-suited for a diverse range of AI and data processing workloads:
904
+
905
+ - **Multi-modal AI pipelines**: Orchestrate unified workflows combining text, image, and audio models with GPU acceleration.
906
+ - **Distributed model training**: Scale training operations across multiple GPU workers for faster model development.
907
+ - **AI research experimentation**: Rapidly prototype and test complex model combinations without infrastructure overhead.
908
+ - **Production inference systems**: Deploy sophisticated multi-stage inference pipelines for real-world applications.
909
+ - **Data processing workflows**: Efficiently process large datasets using CPU workers for general computation and GPU workers for accelerated tasks.
910
+ - **Hybrid GPU/CPU workflows**: Optimize cost and performance by combining CPU preprocessing with GPU inference.
911
+
912
+ ## Limitations
913
+
914
+ - Serverless deployments using Flash are currently restricted to the `EU-RO-1` datacenter.
915
+ - Flash is designed primarily for local development and live-testing workflows.
916
+ - While Flash supports provisioning traditional Serverless endpoints (non-Live endpoints), the interface for interacting with these resources will change in upcoming releases. For now, focus on using `LiveServerless` for the most stable development experience, as it provides full remote code execution without requiring custom Docker images.
917
+ - As you work through the Flash examples repository, you'll accumulate multiple endpoints in your Runpod account. These endpoints persist until manually deleted through the Runpod console. A `flash undeploy` command is in development to streamline cleanup, but for now, regular manual deletion of unused endpoints is recommended to avoid unnecessary charges.
918
+ - Finally, be aware of your account's maximum worker capacity limits. Flash can rapidly scale workers across multiple endpoints, and you may hit capacity constraints faster than with traditional deployment patterns. If you find yourself consistently reaching worker limits, contact Runpod support to increase your account's capacity allocation.
919
+
920
+ ## Contributing
921
+
922
+ We welcome contributions to Flash! Whether you're fixing bugs, adding features, or improving documentation, your help makes this project better.
923
+
924
+ ### Development setup
925
+
926
+ 1. Fork and clone the repository.
927
+ 2. Set up your development environment following the project guidelines.
928
+ 3. Make your changes following our coding standards.
929
+ 4. Test your changes thoroughly.
930
+ 5. Submit a pull request.
931
+
932
+ ### Release process
933
+
934
+ This project uses an automated release system built on Release Please. For detailed information about how releases work, including conventional commits, versioning, and the CI/CD pipeline, see our [Release System Documentation](RELEASE_SYSTEM.md).
935
+
936
+ **Quick reference for contributors:**
937
+ - Use conventional commits: `feat:`, `fix:`, `docs:`, etc.
938
+ - CI automatically runs quality checks on all PRs.
939
+ - Release PRs are created automatically when changes are merged to main.
940
+ - Releases are published to PyPI automatically when release PRs are merged.
941
+
942
+ ## Troubleshooting
943
+
944
+ ### Authentication errors
945
+
946
+ Verify your API key is set correctly:
947
+
948
+ ```bash
949
+ echo $RUNPOD_API_KEY # Should show your key
950
+ ```
951
+
952
+ ### Import errors in remote functions
953
+
954
+ Remember to import packages inside remote functions:
955
+
956
+ ```python
957
+ @remote(dependencies=["requests"])
958
+ def fetch_data(url):
959
+ import requests # Import here, not at top of file
960
+ return requests.get(url).json()
961
+ ```
962
+
963
+ ### Performance optimization
964
+
965
+ - Set `workersMin=1` to keep workers warm and avoid cold starts.
966
+ - Use `idleTimeout` to balance cost and responsiveness.
967
+ - Choose appropriate GPU types for your workload.
968
+
969
+ ## License
970
+
971
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
972
+
973
+ <p align="center">
974
+ <a href="https://github.com/runpod/tetra-rp">Flash</a> •
975
+ <a href="https://runpod.io">Runpod</a>
976
+ </p>