swarms 7.7.2__py3-none-any.whl → 7.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swarms/prompts/ag_prompt.py +51 -19
- swarms/prompts/agent_system_prompts.py +13 -4
- swarms/prompts/multi_agent_collab_prompt.py +18 -0
- swarms/prompts/prompt.py +6 -10
- swarms/schemas/__init__.py +0 -3
- swarms/structs/__init__.py +2 -4
- swarms/structs/agent.py +201 -160
- swarms/structs/aop.py +8 -1
- swarms/structs/auto_swarm_builder.py +271 -210
- swarms/structs/conversation.py +22 -65
- swarms/structs/hiearchical_swarm.py +94 -123
- swarms/structs/hybrid_hiearchical_peer_swarm.py +1 -1
- swarms/structs/ma_utils.py +96 -0
- swarms/structs/mixture_of_agents.py +20 -103
- swarms/structs/multi_agent_router.py +32 -95
- swarms/structs/multi_model_gpu_manager.py +1447 -0
- swarms/structs/output_types.py +3 -16
- swarms/structs/stopping_conditions.py +30 -0
- swarms/structs/swarm_arange.py +18 -15
- swarms/structs/swarm_router.py +56 -4
- swarms/structs/swarming_architectures.py +576 -185
- swarms/telemetry/main.py +1 -7
- swarms/tools/mcp_client.py +209 -53
- swarms/tools/mcp_integration.py +1 -53
- swarms/utils/generate_keys.py +64 -0
- swarms/utils/history_output_formatter.py +2 -0
- {swarms-7.7.2.dist-info → swarms-7.7.4.dist-info}/METADATA +98 -263
- {swarms-7.7.2.dist-info → swarms-7.7.4.dist-info}/RECORD +31 -34
- swarms/schemas/agent_input_schema.py +0 -149
- swarms/structs/agents_available.py +0 -87
- swarms/structs/graph_swarm.py +0 -612
- swarms/structs/queue_swarm.py +0 -193
- swarms/structs/swarm_builder.py +0 -395
- swarms/structs/swarm_output_type.py +0 -23
- {swarms-7.7.2.dist-info → swarms-7.7.4.dist-info}/LICENSE +0 -0
- {swarms-7.7.2.dist-info → swarms-7.7.4.dist-info}/WHEEL +0 -0
- {swarms-7.7.2.dist-info → swarms-7.7.4.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1447 @@
|
|
1
|
+
"""
|
2
|
+
GPU Model Manager
|
3
|
+
================
|
4
|
+
|
5
|
+
A production-grade utility for managing multiple PyTorch or Hugging Face models
|
6
|
+
across available GPUs. This module automatically calculates model memory requirements,
|
7
|
+
allocates models to appropriate GPUs, and provides a unified interface for running
|
8
|
+
inference tasks across all loaded models.
|
9
|
+
|
10
|
+
Features:
|
11
|
+
- Dynamic model memory calculation
|
12
|
+
- Optimal GPU memory allocation
|
13
|
+
- Multi-processing support for parallel model execution
|
14
|
+
- Customizable task execution for specific models
|
15
|
+
- Comprehensive logging and error handling
|
16
|
+
"""
|
17
|
+
|
18
|
+
import os
|
19
|
+
import queue
|
20
|
+
import sys
|
21
|
+
import time
|
22
|
+
import json
|
23
|
+
import uuid
|
24
|
+
import torch
|
25
|
+
import multiprocessing
|
26
|
+
from typing import Dict, List, Union, Optional, Any
|
27
|
+
from dataclasses import dataclass
|
28
|
+
from enum import Enum
|
29
|
+
from pathlib import Path
|
30
|
+
from loguru import logger
|
31
|
+
import numpy as np
|
32
|
+
from contextlib import contextmanager
|
33
|
+
|
34
|
+
# Try to import transformers, but don't fail if not available
|
35
|
+
try:
|
36
|
+
import transformers
|
37
|
+
from transformers import AutoModel, AutoTokenizer
|
38
|
+
|
39
|
+
TRANSFORMERS_AVAILABLE = True
|
40
|
+
except ImportError:
|
41
|
+
TRANSFORMERS_AVAILABLE = False
|
42
|
+
logger.warning(
|
43
|
+
"Transformers package not found. HuggingFace models will not be supported."
|
44
|
+
)
|
45
|
+
|
46
|
+
|
47
|
+
class ModelType(Enum):
|
48
|
+
"""Enum defining supported model types."""
|
49
|
+
|
50
|
+
PYTORCH = "pytorch"
|
51
|
+
HUGGINGFACE = "huggingface"
|
52
|
+
UNKNOWN = "unknown"
|
53
|
+
|
54
|
+
|
55
|
+
class GPUAllocationStrategy(Enum):
|
56
|
+
"""Enum defining GPU allocation strategies."""
|
57
|
+
|
58
|
+
FILL_GPU = "fill_gpu" # Fill each GPU before moving to next
|
59
|
+
DISTRIBUTE = "distribute" # Distribute models evenly across GPUs
|
60
|
+
MEMORY_OPTIMIZED = "memory_optimized" # Optimize for memory usage
|
61
|
+
|
62
|
+
|
63
|
+
@dataclass
|
64
|
+
class ModelMetadata:
|
65
|
+
"""Data class for storing model metadata."""
|
66
|
+
|
67
|
+
name: str
|
68
|
+
model_type: ModelType
|
69
|
+
memory_required: float # in GB
|
70
|
+
model: Any
|
71
|
+
device: Optional[torch.device] = None
|
72
|
+
process: Optional[multiprocessing.Process] = None
|
73
|
+
loaded: bool = False
|
74
|
+
|
75
|
+
|
76
|
+
@dataclass
|
77
|
+
class GPUMetadata:
|
78
|
+
"""Data class for storing GPU metadata."""
|
79
|
+
|
80
|
+
id: int
|
81
|
+
device: torch.device
|
82
|
+
total_memory: float # in GB
|
83
|
+
available_memory: float # in GB
|
84
|
+
models: List[str] = None
|
85
|
+
|
86
|
+
def __post_init__(self):
|
87
|
+
if self.models is None:
|
88
|
+
self.models = []
|
89
|
+
|
90
|
+
|
91
|
+
class ModelMemoryCalculator:
|
92
|
+
"""Utility class for calculating model memory requirements."""
|
93
|
+
|
94
|
+
@staticmethod
|
95
|
+
def get_pytorch_model_size(model: torch.nn.Module) -> float:
|
96
|
+
"""
|
97
|
+
Calculate the memory size of a PyTorch model in GB.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
model: PyTorch model object
|
101
|
+
|
102
|
+
Returns:
|
103
|
+
Memory size in GB
|
104
|
+
"""
|
105
|
+
try:
|
106
|
+
# Get model size in parameters
|
107
|
+
model_parameters = sum(
|
108
|
+
p.numel() for p in model.parameters()
|
109
|
+
)
|
110
|
+
|
111
|
+
# Calculate size based on dtype (default to float32)
|
112
|
+
if any(
|
113
|
+
p.dtype == torch.float16 for p in model.parameters()
|
114
|
+
):
|
115
|
+
bytes_per_param = 2 # float16
|
116
|
+
elif any(
|
117
|
+
p.dtype == torch.bfloat16 for p in model.parameters()
|
118
|
+
):
|
119
|
+
bytes_per_param = 2 # bfloat16
|
120
|
+
elif any(
|
121
|
+
p.dtype == torch.float64 for p in model.parameters()
|
122
|
+
):
|
123
|
+
bytes_per_param = 8 # float64
|
124
|
+
else:
|
125
|
+
bytes_per_param = 4 # float32
|
126
|
+
|
127
|
+
# Calculate raw model size in bytes
|
128
|
+
model_size_bytes = model_parameters * bytes_per_param
|
129
|
+
|
130
|
+
# Add 20% for optimizer states, gradients, and other overhead
|
131
|
+
model_size_bytes_with_overhead = model_size_bytes * 1.2
|
132
|
+
|
133
|
+
# Convert to GB
|
134
|
+
model_size_gb = model_size_bytes_with_overhead / (1024**3)
|
135
|
+
|
136
|
+
# Add a safety margin of 10%
|
137
|
+
model_size_gb_with_safety = model_size_gb * 1.1
|
138
|
+
|
139
|
+
return model_size_gb_with_safety
|
140
|
+
|
141
|
+
except Exception as e:
|
142
|
+
logger.error(
|
143
|
+
f"Error calculating PyTorch model size: {str(e)}"
|
144
|
+
)
|
145
|
+
# Fallback estimation
|
146
|
+
return 2.0 # Default estimation if calculation fails
|
147
|
+
|
148
|
+
@staticmethod
|
149
|
+
def get_huggingface_model_size(
|
150
|
+
model_or_path: Union[str, Any]
|
151
|
+
) -> float:
|
152
|
+
"""
|
153
|
+
Calculate the memory size of a Hugging Face model in GB.
|
154
|
+
Works with either model path or loaded model.
|
155
|
+
|
156
|
+
Args:
|
157
|
+
model_or_path: Hugging Face model object or path to model
|
158
|
+
|
159
|
+
Returns:
|
160
|
+
Memory size in GB
|
161
|
+
"""
|
162
|
+
if not TRANSFORMERS_AVAILABLE:
|
163
|
+
logger.error(
|
164
|
+
"Transformers package not available. Cannot calculate Hugging Face model size."
|
165
|
+
)
|
166
|
+
return 5.0 # Default fallback
|
167
|
+
|
168
|
+
try:
|
169
|
+
# If it's a path, we'll try to estimate without loading
|
170
|
+
if isinstance(model_or_path, str):
|
171
|
+
path = Path(model_or_path)
|
172
|
+
if path.exists():
|
173
|
+
# Check for model info in config
|
174
|
+
config_path = path / "config.json"
|
175
|
+
if config_path.exists():
|
176
|
+
with open(config_path, "r") as f:
|
177
|
+
config = json.load(f)
|
178
|
+
if "n_params" in config:
|
179
|
+
n_params = config["n_params"]
|
180
|
+
# Estimate with overhead
|
181
|
+
model_size_gb = (
|
182
|
+
n_params * 4 * 1.5
|
183
|
+
) / (1024**3)
|
184
|
+
return model_size_gb
|
185
|
+
|
186
|
+
# Alternatively, estimate from model files
|
187
|
+
pytorch_files = list(path.glob("*.bin"))
|
188
|
+
if pytorch_files:
|
189
|
+
total_size = sum(
|
190
|
+
f.stat().st_size for f in pytorch_files
|
191
|
+
)
|
192
|
+
model_size_gb = (total_size * 1.5) / (
|
193
|
+
1024**3
|
194
|
+
) # 50% overhead
|
195
|
+
return model_size_gb
|
196
|
+
|
197
|
+
# If we can't estimate, load the model and calculate
|
198
|
+
logger.info(
|
199
|
+
f"Loading model from {model_or_path} to calculate memory requirements..."
|
200
|
+
)
|
201
|
+
model = AutoModel.from_pretrained(model_or_path)
|
202
|
+
return ModelMemoryCalculator.get_pytorch_model_size(
|
203
|
+
model
|
204
|
+
)
|
205
|
+
else:
|
206
|
+
# If we already have the model loaded, calculate directly
|
207
|
+
return ModelMemoryCalculator.get_pytorch_model_size(
|
208
|
+
model_or_path
|
209
|
+
)
|
210
|
+
|
211
|
+
except Exception as e:
|
212
|
+
logger.error(
|
213
|
+
f"Error calculating Hugging Face model size: {str(e)}"
|
214
|
+
)
|
215
|
+
return 5.0 # Default estimation if calculation fails
|
216
|
+
|
217
|
+
|
218
|
+
class GPUManager:
|
219
|
+
"""Manages available GPUs and their memory."""
|
220
|
+
|
221
|
+
def __init__(self):
|
222
|
+
"""Initialize the GPU manager."""
|
223
|
+
self.gpus: List[GPUMetadata] = []
|
224
|
+
self._initialize_gpus()
|
225
|
+
|
226
|
+
def _initialize_gpus(self) -> None:
|
227
|
+
"""
|
228
|
+
Initialize available GPUs and collect their metadata.
|
229
|
+
"""
|
230
|
+
if not torch.cuda.is_available():
|
231
|
+
logger.warning("No CUDA-capable devices detected.")
|
232
|
+
return
|
233
|
+
|
234
|
+
num_gpus = torch.cuda.device_count()
|
235
|
+
logger.info(f"Found {num_gpus} CUDA-capable devices.")
|
236
|
+
|
237
|
+
for gpu_id in range(num_gpus):
|
238
|
+
device = torch.device(f"cuda:{gpu_id}")
|
239
|
+
|
240
|
+
# Get total memory
|
241
|
+
total_memory = torch.cuda.get_device_properties(
|
242
|
+
gpu_id
|
243
|
+
).total_memory
|
244
|
+
total_memory_gb = total_memory / (1024**3)
|
245
|
+
|
246
|
+
# Get available memory
|
247
|
+
torch.cuda.set_device(device)
|
248
|
+
torch.cuda.empty_cache()
|
249
|
+
available_memory = torch.cuda.mem_get_info(device)[0]
|
250
|
+
available_memory_gb = available_memory / (1024**3)
|
251
|
+
|
252
|
+
# Create GPU metadata
|
253
|
+
gpu_metadata = GPUMetadata(
|
254
|
+
id=gpu_id,
|
255
|
+
device=device,
|
256
|
+
total_memory=total_memory_gb,
|
257
|
+
available_memory=available_memory_gb,
|
258
|
+
)
|
259
|
+
|
260
|
+
self.gpus.append(gpu_metadata)
|
261
|
+
logger.info(
|
262
|
+
f"GPU {gpu_id}: {total_memory_gb:.2f} GB total, {available_memory_gb:.2f} GB available"
|
263
|
+
)
|
264
|
+
|
265
|
+
def update_gpu_memory_info(self) -> None:
|
266
|
+
"""
|
267
|
+
Update the available memory information for all GPUs.
|
268
|
+
"""
|
269
|
+
if not self.gpus:
|
270
|
+
logger.warning(
|
271
|
+
"No GPUs available to update memory information."
|
272
|
+
)
|
273
|
+
return
|
274
|
+
|
275
|
+
for gpu in self.gpus:
|
276
|
+
torch.cuda.set_device(gpu.device)
|
277
|
+
torch.cuda.empty_cache()
|
278
|
+
available_memory = torch.cuda.mem_get_info(gpu.device)[0]
|
279
|
+
gpu.available_memory = available_memory / (1024**3)
|
280
|
+
logger.debug(
|
281
|
+
f"Updated GPU {gpu.id}: {gpu.available_memory:.2f} GB available"
|
282
|
+
)
|
283
|
+
|
284
|
+
|
285
|
+
class ModelGrid:
|
286
|
+
"""
|
287
|
+
Main class for managing multiple models across available GPUs.
|
288
|
+
|
289
|
+
This class handles:
|
290
|
+
- Loading and unloading models
|
291
|
+
- Allocating models to appropriate GPUs based on memory requirements
|
292
|
+
- Running inference tasks on specific models
|
293
|
+
- Managing model lifecycle through multiple processes
|
294
|
+
"""
|
295
|
+
|
296
|
+
def __init__(
|
297
|
+
self,
|
298
|
+
allocation_strategy: GPUAllocationStrategy = GPUAllocationStrategy.MEMORY_OPTIMIZED,
|
299
|
+
memory_buffer: float = 0.5, # GB buffer to leave on each GPU
|
300
|
+
max_cpu_models: int = 0, # Maximum models to keep on CPU if no GPU space
|
301
|
+
use_multiprocessing: bool = True,
|
302
|
+
log_level: str = "INFO",
|
303
|
+
):
|
304
|
+
"""
|
305
|
+
Initialize the model manager.
|
306
|
+
|
307
|
+
Args:
|
308
|
+
allocation_strategy: Strategy for allocating models to GPUs
|
309
|
+
memory_buffer: Memory buffer to leave on each GPU (in GB)
|
310
|
+
max_cpu_models: Maximum number of models to keep on CPU if no GPU space
|
311
|
+
use_multiprocessing: Whether to use multiprocessing for model execution
|
312
|
+
log_level: Logging level
|
313
|
+
"""
|
314
|
+
# Set log level
|
315
|
+
logger.remove()
|
316
|
+
logger.add(sys.stderr, level=log_level)
|
317
|
+
logger.add(
|
318
|
+
"gpu_model_manager.log",
|
319
|
+
rotation="100 MB",
|
320
|
+
retention="1 week",
|
321
|
+
level=log_level,
|
322
|
+
)
|
323
|
+
|
324
|
+
self.models: Dict[str, ModelMetadata] = {}
|
325
|
+
self.gpu_manager = GPUManager()
|
326
|
+
self.allocation_strategy = allocation_strategy
|
327
|
+
self.memory_buffer = memory_buffer
|
328
|
+
self.max_cpu_models = max_cpu_models
|
329
|
+
self.use_multiprocessing = use_multiprocessing
|
330
|
+
|
331
|
+
# Initialize locks and queues for multiprocessing
|
332
|
+
self.manager = (
|
333
|
+
multiprocessing.Manager() if use_multiprocessing else None
|
334
|
+
)
|
335
|
+
self.task_queues: Dict[str, Any] = (
|
336
|
+
self.manager.dict() if use_multiprocessing else {}
|
337
|
+
)
|
338
|
+
self.result_queues: Dict[str, Any] = (
|
339
|
+
self.manager.dict() if use_multiprocessing else {}
|
340
|
+
)
|
341
|
+
self.model_locks: Dict[str, Any] = {}
|
342
|
+
|
343
|
+
logger.info(
|
344
|
+
f"ModelGrid initialized with {len(self.gpu_manager.gpus)} GPUs"
|
345
|
+
)
|
346
|
+
logger.info(
|
347
|
+
f"Using allocation strategy: {allocation_strategy.value}"
|
348
|
+
)
|
349
|
+
|
350
|
+
def add_model(
|
351
|
+
self,
|
352
|
+
model_name: str,
|
353
|
+
model: Any,
|
354
|
+
model_type: Optional[ModelType] = None,
|
355
|
+
memory_override: Optional[float] = None,
|
356
|
+
) -> bool:
|
357
|
+
"""
|
358
|
+
Add a model to the manager.
|
359
|
+
|
360
|
+
Args:
|
361
|
+
model_name: Unique name for the model
|
362
|
+
model: The model object or path
|
363
|
+
model_type: Type of the model (will be auto-detected if not provided)
|
364
|
+
memory_override: Override the automatic memory calculation (in GB)
|
365
|
+
|
366
|
+
Returns:
|
367
|
+
Success status
|
368
|
+
"""
|
369
|
+
if model_name in self.models:
|
370
|
+
logger.warning(
|
371
|
+
f"Model '{model_name}' already exists. Use update_model to replace it."
|
372
|
+
)
|
373
|
+
return False
|
374
|
+
|
375
|
+
# Auto-detect model type if not provided
|
376
|
+
if model_type is None:
|
377
|
+
if isinstance(model, str):
|
378
|
+
if os.path.exists(model) and TRANSFORMERS_AVAILABLE:
|
379
|
+
model_type = ModelType.HUGGINGFACE
|
380
|
+
else:
|
381
|
+
model_type = ModelType.UNKNOWN
|
382
|
+
elif isinstance(model, torch.nn.Module):
|
383
|
+
model_type = ModelType.PYTORCH
|
384
|
+
elif TRANSFORMERS_AVAILABLE and isinstance(
|
385
|
+
model, transformers.PreTrainedModel
|
386
|
+
):
|
387
|
+
model_type = ModelType.HUGGINGFACE
|
388
|
+
else:
|
389
|
+
model_type = ModelType.UNKNOWN
|
390
|
+
|
391
|
+
# Calculate memory requirements
|
392
|
+
if memory_override is not None:
|
393
|
+
memory_required = memory_override
|
394
|
+
else:
|
395
|
+
if model_type == ModelType.PYTORCH:
|
396
|
+
memory_required = (
|
397
|
+
ModelMemoryCalculator.get_pytorch_model_size(
|
398
|
+
model
|
399
|
+
)
|
400
|
+
)
|
401
|
+
elif model_type == ModelType.HUGGINGFACE:
|
402
|
+
memory_required = (
|
403
|
+
ModelMemoryCalculator.get_huggingface_model_size(
|
404
|
+
model
|
405
|
+
)
|
406
|
+
)
|
407
|
+
else:
|
408
|
+
logger.warning(
|
409
|
+
f"Unknown model type for '{model_name}'. Using default memory estimation."
|
410
|
+
)
|
411
|
+
memory_required = 2.0 # Default estimation
|
412
|
+
|
413
|
+
# Create model metadata
|
414
|
+
model_metadata = ModelMetadata(
|
415
|
+
name=model_name,
|
416
|
+
model_type=model_type,
|
417
|
+
memory_required=memory_required,
|
418
|
+
model=model,
|
419
|
+
loaded=False,
|
420
|
+
)
|
421
|
+
|
422
|
+
self.models[model_name] = model_metadata
|
423
|
+
logger.info(
|
424
|
+
f"Added model '{model_name}' ({model_type.value}) with {memory_required:.2f} GB memory requirement"
|
425
|
+
)
|
426
|
+
|
427
|
+
# Initialize multiprocessing resources for this model
|
428
|
+
if self.use_multiprocessing:
|
429
|
+
self.task_queues[model_name] = self.manager.Queue()
|
430
|
+
self.result_queues[model_name] = self.manager.Queue()
|
431
|
+
self.model_locks[model_name] = self.manager.Lock()
|
432
|
+
|
433
|
+
return True
|
434
|
+
|
435
|
+
def remove_model(self, model_name: str) -> bool:
|
436
|
+
"""
|
437
|
+
Remove a model from the manager.
|
438
|
+
|
439
|
+
Args:
|
440
|
+
model_name: Name of the model to remove
|
441
|
+
|
442
|
+
Returns:
|
443
|
+
Success status
|
444
|
+
"""
|
445
|
+
if model_name not in self.models:
|
446
|
+
logger.warning(f"Model '{model_name}' does not exist.")
|
447
|
+
return False
|
448
|
+
|
449
|
+
model_metadata = self.models[model_name]
|
450
|
+
|
451
|
+
# Terminate the model process if running
|
452
|
+
if (
|
453
|
+
model_metadata.process is not None
|
454
|
+
and model_metadata.process.is_alive()
|
455
|
+
):
|
456
|
+
logger.info(
|
457
|
+
f"Terminating process for model '{model_name}'"
|
458
|
+
)
|
459
|
+
model_metadata.process.terminate()
|
460
|
+
model_metadata.process.join(timeout=5)
|
461
|
+
if model_metadata.process.is_alive():
|
462
|
+
logger.warning(
|
463
|
+
f"Process for model '{model_name}' did not terminate gracefully. Killing..."
|
464
|
+
)
|
465
|
+
model_metadata.process.kill()
|
466
|
+
|
467
|
+
# Remove from GPU if loaded
|
468
|
+
if (
|
469
|
+
model_metadata.loaded
|
470
|
+
and model_metadata.device is not None
|
471
|
+
):
|
472
|
+
gpu_id = model_metadata.device.index
|
473
|
+
for gpu in self.gpu_manager.gpus:
|
474
|
+
if gpu.id == gpu_id and model_name in gpu.models:
|
475
|
+
gpu.models.remove(model_name)
|
476
|
+
logger.info(
|
477
|
+
f"Removed model '{model_name}' from GPU {gpu_id}"
|
478
|
+
)
|
479
|
+
|
480
|
+
# Update GPU memory info
|
481
|
+
self.gpu_manager.update_gpu_memory_info()
|
482
|
+
|
483
|
+
# Clean up multiprocessing resources
|
484
|
+
if self.use_multiprocessing:
|
485
|
+
if model_name in self.task_queues:
|
486
|
+
del self.task_queues[model_name]
|
487
|
+
if model_name in self.result_queues:
|
488
|
+
del self.result_queues[model_name]
|
489
|
+
if model_name in self.model_locks:
|
490
|
+
del self.model_locks[model_name]
|
491
|
+
|
492
|
+
# Remove model metadata
|
493
|
+
del self.models[model_name]
|
494
|
+
|
495
|
+
logger.info(f"Removed model '{model_name}'")
|
496
|
+
return True
|
497
|
+
|
498
|
+
def _find_best_gpu_for_model(
|
499
|
+
self, model_metadata: ModelMetadata
|
500
|
+
) -> Optional[GPUMetadata]:
|
501
|
+
"""
|
502
|
+
Find the best GPU for a given model based on the allocation strategy.
|
503
|
+
|
504
|
+
Args:
|
505
|
+
model_metadata: Metadata for the model
|
506
|
+
|
507
|
+
Returns:
|
508
|
+
Best GPU metadata or None if no suitable GPU found
|
509
|
+
"""
|
510
|
+
model_memory = (
|
511
|
+
model_metadata.memory_required + self.memory_buffer
|
512
|
+
)
|
513
|
+
|
514
|
+
# Update GPU memory info before allocation
|
515
|
+
self.gpu_manager.update_gpu_memory_info()
|
516
|
+
|
517
|
+
# Find available GPUs that can fit the model
|
518
|
+
available_gpus = [
|
519
|
+
gpu
|
520
|
+
for gpu in self.gpu_manager.gpus
|
521
|
+
if gpu.available_memory >= model_memory
|
522
|
+
]
|
523
|
+
|
524
|
+
if not available_gpus:
|
525
|
+
logger.warning(
|
526
|
+
f"No GPU with sufficient memory for model '{model_metadata.name}' "
|
527
|
+
f"(requires {model_memory:.2f} GB)"
|
528
|
+
)
|
529
|
+
return None
|
530
|
+
|
531
|
+
# Apply allocation strategy
|
532
|
+
if self.allocation_strategy == GPUAllocationStrategy.FILL_GPU:
|
533
|
+
# Sort by number of models (ascending) and then by available memory (descending)
|
534
|
+
return sorted(
|
535
|
+
available_gpus,
|
536
|
+
key=lambda g: (len(g.models), -g.available_memory),
|
537
|
+
)[0]
|
538
|
+
|
539
|
+
elif (
|
540
|
+
self.allocation_strategy
|
541
|
+
== GPUAllocationStrategy.DISTRIBUTE
|
542
|
+
):
|
543
|
+
# Sort by number of models (ascending)
|
544
|
+
return sorted(
|
545
|
+
available_gpus, key=lambda g: len(g.models)
|
546
|
+
)[0]
|
547
|
+
|
548
|
+
elif (
|
549
|
+
self.allocation_strategy
|
550
|
+
== GPUAllocationStrategy.MEMORY_OPTIMIZED
|
551
|
+
):
|
552
|
+
# Sort by available memory (ascending) but ensure it fits
|
553
|
+
return sorted(
|
554
|
+
available_gpus, key=lambda g: g.available_memory
|
555
|
+
)[0]
|
556
|
+
|
557
|
+
# Default fallback
|
558
|
+
return available_gpus[0]
|
559
|
+
|
560
|
+
def allocate_all_models(self) -> Dict[str, Optional[int]]:
|
561
|
+
"""
|
562
|
+
Allocate all models to GPUs based on the allocation strategy.
|
563
|
+
|
564
|
+
Returns:
|
565
|
+
Dict mapping model names to allocated GPU IDs (or None if on CPU)
|
566
|
+
"""
|
567
|
+
# Sort models by memory requirement (descending)
|
568
|
+
sorted_models = sorted(
|
569
|
+
self.models.values(),
|
570
|
+
key=lambda m: m.memory_required,
|
571
|
+
reverse=True,
|
572
|
+
)
|
573
|
+
|
574
|
+
allocations = {}
|
575
|
+
|
576
|
+
for model_metadata in sorted_models:
|
577
|
+
best_gpu = self._find_best_gpu_for_model(model_metadata)
|
578
|
+
|
579
|
+
if best_gpu is not None:
|
580
|
+
# Allocate model to GPU
|
581
|
+
gpu_id = best_gpu.id
|
582
|
+
model_metadata.device = best_gpu.device
|
583
|
+
best_gpu.models.append(model_metadata.name)
|
584
|
+
best_gpu.available_memory -= (
|
585
|
+
model_metadata.memory_required
|
586
|
+
+ self.memory_buffer
|
587
|
+
)
|
588
|
+
|
589
|
+
allocations[model_metadata.name] = gpu_id
|
590
|
+
logger.info(
|
591
|
+
f"Allocated model '{model_metadata.name}' to GPU {gpu_id} "
|
592
|
+
f"({best_gpu.available_memory:.2f} GB remaining)"
|
593
|
+
)
|
594
|
+
else:
|
595
|
+
# No suitable GPU found, keep model on CPU if allowed
|
596
|
+
if (
|
597
|
+
len(
|
598
|
+
[m for m in allocations.values() if m is None]
|
599
|
+
)
|
600
|
+
< self.max_cpu_models
|
601
|
+
):
|
602
|
+
model_metadata.device = None
|
603
|
+
allocations[model_metadata.name] = None
|
604
|
+
logger.info(
|
605
|
+
f"Keeping model '{model_metadata.name}' on CPU (no suitable GPU)"
|
606
|
+
)
|
607
|
+
else:
|
608
|
+
logger.warning(
|
609
|
+
f"Cannot allocate model '{model_metadata.name}'. "
|
610
|
+
f"No GPU space available and max_cpu_models limit reached."
|
611
|
+
)
|
612
|
+
|
613
|
+
return allocations
|
614
|
+
|
615
|
+
def load_model(self, model_name: str) -> bool:
|
616
|
+
"""
|
617
|
+
Load a specific model to its allocated device.
|
618
|
+
|
619
|
+
Args:
|
620
|
+
model_name: Name of the model to load
|
621
|
+
|
622
|
+
Returns:
|
623
|
+
Success status
|
624
|
+
"""
|
625
|
+
if model_name not in self.models:
|
626
|
+
logger.warning(f"Model '{model_name}' does not exist.")
|
627
|
+
return False
|
628
|
+
|
629
|
+
model_metadata = self.models[model_name]
|
630
|
+
|
631
|
+
# Skip if already loaded
|
632
|
+
if model_metadata.loaded:
|
633
|
+
logger.info(f"Model '{model_name}' is already loaded.")
|
634
|
+
return True
|
635
|
+
|
636
|
+
# Allocate to GPU if not already allocated
|
637
|
+
if model_metadata.device is None:
|
638
|
+
best_gpu = self._find_best_gpu_for_model(model_metadata)
|
639
|
+
if best_gpu is not None:
|
640
|
+
model_metadata.device = best_gpu.device
|
641
|
+
best_gpu.models.append(model_name)
|
642
|
+
best_gpu.available_memory -= (
|
643
|
+
model_metadata.memory_required
|
644
|
+
+ self.memory_buffer
|
645
|
+
)
|
646
|
+
logger.info(
|
647
|
+
f"Allocated model '{model_name}' to GPU {best_gpu.id} "
|
648
|
+
f"({best_gpu.available_memory:.2f} GB remaining)"
|
649
|
+
)
|
650
|
+
|
651
|
+
try:
|
652
|
+
device_str = (
|
653
|
+
"cpu"
|
654
|
+
if model_metadata.device is None
|
655
|
+
else str(model_metadata.device)
|
656
|
+
)
|
657
|
+
logger.info(
|
658
|
+
f"Loading model '{model_name}' to {device_str}"
|
659
|
+
)
|
660
|
+
|
661
|
+
# Load based on model type
|
662
|
+
if model_metadata.model_type == ModelType.PYTORCH:
|
663
|
+
if isinstance(model_metadata.model, torch.nn.Module):
|
664
|
+
model_metadata.model.to(
|
665
|
+
model_metadata.device or "cpu"
|
666
|
+
)
|
667
|
+
else:
|
668
|
+
logger.error(
|
669
|
+
f"Model '{model_name}' is not a valid PyTorch module."
|
670
|
+
)
|
671
|
+
return False
|
672
|
+
|
673
|
+
elif model_metadata.model_type == ModelType.HUGGINGFACE:
|
674
|
+
if TRANSFORMERS_AVAILABLE:
|
675
|
+
if isinstance(model_metadata.model, str):
|
676
|
+
# Load from path
|
677
|
+
logger.info(
|
678
|
+
f"Loading HuggingFace model from {model_metadata.model}"
|
679
|
+
)
|
680
|
+
loaded_model = AutoModel.from_pretrained(
|
681
|
+
model_metadata.model
|
682
|
+
)
|
683
|
+
loaded_model.to(
|
684
|
+
model_metadata.device or "cpu"
|
685
|
+
)
|
686
|
+
model_metadata.model = loaded_model
|
687
|
+
elif isinstance(
|
688
|
+
model_metadata.model,
|
689
|
+
transformers.PreTrainedModel,
|
690
|
+
):
|
691
|
+
# Move existing model to device
|
692
|
+
model_metadata.model.to(
|
693
|
+
model_metadata.device or "cpu"
|
694
|
+
)
|
695
|
+
else:
|
696
|
+
logger.error(
|
697
|
+
f"Model '{model_name}' is not a valid HuggingFace model."
|
698
|
+
)
|
699
|
+
return False
|
700
|
+
else:
|
701
|
+
logger.error(
|
702
|
+
"Transformers package not available. Cannot load HuggingFace model."
|
703
|
+
)
|
704
|
+
return False
|
705
|
+
else:
|
706
|
+
logger.error(
|
707
|
+
f"Unknown model type for '{model_name}'."
|
708
|
+
)
|
709
|
+
return False
|
710
|
+
|
711
|
+
model_metadata.loaded = True
|
712
|
+
|
713
|
+
# Start model process if using multiprocessing
|
714
|
+
if self.use_multiprocessing:
|
715
|
+
self._start_model_process(model_name)
|
716
|
+
|
717
|
+
logger.info(f"Successfully loaded model '{model_name}'")
|
718
|
+
return True
|
719
|
+
|
720
|
+
except Exception as e:
|
721
|
+
logger.error(
|
722
|
+
f"Error loading model '{model_name}': {str(e)}"
|
723
|
+
)
|
724
|
+
# Try to clean up GPU allocation if failed
|
725
|
+
if model_metadata.device is not None:
|
726
|
+
gpu_id = model_metadata.device.index
|
727
|
+
for gpu in self.gpu_manager.gpus:
|
728
|
+
if gpu.id == gpu_id and model_name in gpu.models:
|
729
|
+
gpu.models.remove(model_name)
|
730
|
+
gpu.available_memory += (
|
731
|
+
model_metadata.memory_required
|
732
|
+
+ self.memory_buffer
|
733
|
+
)
|
734
|
+
model_metadata.device = None
|
735
|
+
|
736
|
+
self.gpu_manager.update_gpu_memory_info()
|
737
|
+
return False
|
738
|
+
|
739
|
+
def unload_model(self, model_name: str) -> bool:
|
740
|
+
"""
|
741
|
+
Unload a specific model from its device.
|
742
|
+
|
743
|
+
Args:
|
744
|
+
model_name: Name of the model to unload
|
745
|
+
|
746
|
+
Returns:
|
747
|
+
Success status
|
748
|
+
"""
|
749
|
+
if model_name not in self.models:
|
750
|
+
logger.warning(f"Model '{model_name}' does not exist.")
|
751
|
+
return False
|
752
|
+
|
753
|
+
model_metadata = self.models[model_name]
|
754
|
+
|
755
|
+
# Skip if not loaded
|
756
|
+
if not model_metadata.loaded:
|
757
|
+
logger.info(f"Model '{model_name}' is not loaded.")
|
758
|
+
return True
|
759
|
+
|
760
|
+
try:
|
761
|
+
# Stop model process if using multiprocessing
|
762
|
+
if (
|
763
|
+
self.use_multiprocessing
|
764
|
+
and model_metadata.process is not None
|
765
|
+
):
|
766
|
+
logger.info(
|
767
|
+
f"Stopping process for model '{model_name}'"
|
768
|
+
)
|
769
|
+
model_metadata.process.terminate()
|
770
|
+
model_metadata.process.join(timeout=5)
|
771
|
+
if model_metadata.process.is_alive():
|
772
|
+
logger.warning(
|
773
|
+
f"Process for model '{model_name}' did not terminate gracefully. Killing..."
|
774
|
+
)
|
775
|
+
model_metadata.process.kill()
|
776
|
+
model_metadata.process = None
|
777
|
+
|
778
|
+
# Move model to CPU and clean up
|
779
|
+
if (
|
780
|
+
model_metadata.device is not None
|
781
|
+
and model_metadata.device.type == "cuda"
|
782
|
+
):
|
783
|
+
logger.info(
|
784
|
+
f"Unloading model '{model_name}' from {model_metadata.device}"
|
785
|
+
)
|
786
|
+
|
787
|
+
# Update GPU allocation
|
788
|
+
gpu_id = model_metadata.device.index
|
789
|
+
for gpu in self.gpu_manager.gpus:
|
790
|
+
if gpu.id == gpu_id and model_name in gpu.models:
|
791
|
+
gpu.models.remove(model_name)
|
792
|
+
gpu.available_memory += (
|
793
|
+
model_metadata.memory_required
|
794
|
+
+ self.memory_buffer
|
795
|
+
)
|
796
|
+
|
797
|
+
# Move model to CPU if it's a PyTorch module
|
798
|
+
if isinstance(model_metadata.model, torch.nn.Module):
|
799
|
+
model_metadata.model.to("cpu")
|
800
|
+
|
801
|
+
# Clear CUDA cache
|
802
|
+
if torch.cuda.is_available():
|
803
|
+
torch.cuda.empty_cache()
|
804
|
+
|
805
|
+
model_metadata.device = None
|
806
|
+
model_metadata.loaded = False
|
807
|
+
|
808
|
+
# Update GPU memory info
|
809
|
+
self.gpu_manager.update_gpu_memory_info()
|
810
|
+
|
811
|
+
logger.info(f"Successfully unloaded model '{model_name}'")
|
812
|
+
return True
|
813
|
+
|
814
|
+
except Exception as e:
|
815
|
+
logger.error(
|
816
|
+
f"Error unloading model '{model_name}': {str(e)}"
|
817
|
+
)
|
818
|
+
return False
|
819
|
+
|
820
|
+
def load_all_models(self) -> Dict[str, bool]:
|
821
|
+
"""
|
822
|
+
Load all models to their allocated devices.
|
823
|
+
|
824
|
+
Returns:
|
825
|
+
Dict mapping model names to load success status
|
826
|
+
"""
|
827
|
+
# First allocate all models
|
828
|
+
self.allocate_all_models()
|
829
|
+
|
830
|
+
# Then load each model
|
831
|
+
results = {}
|
832
|
+
for model_name in self.models:
|
833
|
+
results[model_name] = self.load_model(model_name)
|
834
|
+
|
835
|
+
return results
|
836
|
+
|
837
|
+
def unload_all_models(self) -> Dict[str, bool]:
|
838
|
+
"""
|
839
|
+
Unload all models from their devices.
|
840
|
+
|
841
|
+
Returns:
|
842
|
+
Dict mapping model names to unload success status
|
843
|
+
"""
|
844
|
+
results = {}
|
845
|
+
for model_name in self.models:
|
846
|
+
results[model_name] = self.unload_model(model_name)
|
847
|
+
|
848
|
+
return results
|
849
|
+
|
850
|
+
def _start_model_process(self, model_name: str) -> bool:
|
851
|
+
"""
|
852
|
+
Start a dedicated process for a model.
|
853
|
+
|
854
|
+
Args:
|
855
|
+
model_name: Name of the model
|
856
|
+
|
857
|
+
Returns:
|
858
|
+
Success status
|
859
|
+
"""
|
860
|
+
if not self.use_multiprocessing:
|
861
|
+
logger.warning(
|
862
|
+
"Multiprocessing is disabled. Cannot start model process."
|
863
|
+
)
|
864
|
+
return False
|
865
|
+
|
866
|
+
if model_name not in self.models:
|
867
|
+
logger.warning(f"Model '{model_name}' does not exist.")
|
868
|
+
return False
|
869
|
+
|
870
|
+
model_metadata = self.models[model_name]
|
871
|
+
|
872
|
+
if (
|
873
|
+
model_metadata.process is not None
|
874
|
+
and model_metadata.process.is_alive()
|
875
|
+
):
|
876
|
+
logger.info(
|
877
|
+
f"Process for model '{model_name}' is already running."
|
878
|
+
)
|
879
|
+
return True
|
880
|
+
|
881
|
+
try:
|
882
|
+
# Create a new process for the model
|
883
|
+
process = multiprocessing.Process(
|
884
|
+
target=self._model_process_worker,
|
885
|
+
args=(
|
886
|
+
model_name,
|
887
|
+
model_metadata.model_type,
|
888
|
+
self.task_queues[model_name],
|
889
|
+
self.result_queues[model_name],
|
890
|
+
(
|
891
|
+
model_metadata.device.index
|
892
|
+
if model_metadata.device is not None
|
893
|
+
else None
|
894
|
+
),
|
895
|
+
),
|
896
|
+
daemon=True,
|
897
|
+
)
|
898
|
+
|
899
|
+
process.start()
|
900
|
+
model_metadata.process = process
|
901
|
+
|
902
|
+
logger.info(
|
903
|
+
f"Started process for model '{model_name}' (PID: {process.pid})"
|
904
|
+
)
|
905
|
+
return True
|
906
|
+
|
907
|
+
except Exception as e:
|
908
|
+
logger.error(
|
909
|
+
f"Error starting process for model '{model_name}': {str(e)}"
|
910
|
+
)
|
911
|
+
return False
|
912
|
+
|
913
|
+
def _model_process_worker(
|
914
|
+
self,
|
915
|
+
model_name: str,
|
916
|
+
model_type: ModelType,
|
917
|
+
task_queue: multiprocessing.Queue,
|
918
|
+
result_queue: multiprocessing.Queue,
|
919
|
+
gpu_id: Optional[int],
|
920
|
+
) -> None:
|
921
|
+
"""
|
922
|
+
Worker function for model processes.
|
923
|
+
|
924
|
+
Args:
|
925
|
+
model_name: Name of the model
|
926
|
+
model_type: Type of the model
|
927
|
+
task_queue: Queue for receiving tasks
|
928
|
+
result_queue: Queue for sending results
|
929
|
+
gpu_id: GPU device ID or None for CPU
|
930
|
+
"""
|
931
|
+
try:
|
932
|
+
# Configure device
|
933
|
+
if gpu_id is not None:
|
934
|
+
device = torch.device(f"cuda:{gpu_id}")
|
935
|
+
torch.cuda.set_device(device)
|
936
|
+
else:
|
937
|
+
device = torch.device("cpu")
|
938
|
+
|
939
|
+
logger.info(
|
940
|
+
f"Model process for '{model_name}' started on {device}"
|
941
|
+
)
|
942
|
+
|
943
|
+
# Process tasks from the queue
|
944
|
+
while True:
|
945
|
+
try:
|
946
|
+
# Get task from queue with timeout
|
947
|
+
task_id, task_type, task_data = task_queue.get(
|
948
|
+
timeout=1.0
|
949
|
+
)
|
950
|
+
|
951
|
+
logger.debug(
|
952
|
+
f"Model '{model_name}' processing task {task_id}: {task_type}"
|
953
|
+
)
|
954
|
+
|
955
|
+
# Process task based on task_type
|
956
|
+
try:
|
957
|
+
if task_type == "run_model":
|
958
|
+
# Run the model on the task data
|
959
|
+
# This would be implemented based on the specific model type
|
960
|
+
result = {
|
961
|
+
"status": "success",
|
962
|
+
"result": "Model output placeholder",
|
963
|
+
}
|
964
|
+
else:
|
965
|
+
result = {
|
966
|
+
"status": "error",
|
967
|
+
"error": f"Unknown task type: {task_type}",
|
968
|
+
}
|
969
|
+
except Exception as e:
|
970
|
+
logger.error(
|
971
|
+
f"Error processing task {task_id} for model '{model_name}': {str(e)}"
|
972
|
+
)
|
973
|
+
result = {"status": "error", "error": str(e)}
|
974
|
+
|
975
|
+
# Send result back
|
976
|
+
result_queue.put((task_id, result))
|
977
|
+
logger.debug(
|
978
|
+
f"Model '{model_name}' completed task {task_id}"
|
979
|
+
)
|
980
|
+
|
981
|
+
except queue.Empty:
|
982
|
+
# No tasks in queue, just continue
|
983
|
+
continue
|
984
|
+
|
985
|
+
except KeyboardInterrupt:
|
986
|
+
logger.info(
|
987
|
+
f"Model process for '{model_name}' interrupted"
|
988
|
+
)
|
989
|
+
except Exception as e:
|
990
|
+
logger.error(
|
991
|
+
f"Error in model process for '{model_name}': {str(e)}"
|
992
|
+
)
|
993
|
+
finally:
|
994
|
+
logger.info(f"Model process for '{model_name}' exiting")
|
995
|
+
|
996
|
+
@contextmanager
|
997
|
+
def _model_lock(self, model_name: str) -> None:
|
998
|
+
"""
|
999
|
+
Context manager for acquiring model lock.
|
1000
|
+
|
1001
|
+
Args:
|
1002
|
+
model_name: Name of the model
|
1003
|
+
"""
|
1004
|
+
if (
|
1005
|
+
not self.use_multiprocessing
|
1006
|
+
or model_name not in self.model_locks
|
1007
|
+
):
|
1008
|
+
# No-op if not using multiprocessing
|
1009
|
+
yield
|
1010
|
+
return
|
1011
|
+
|
1012
|
+
lock = self.model_locks[model_name]
|
1013
|
+
try:
|
1014
|
+
lock.acquire()
|
1015
|
+
yield
|
1016
|
+
finally:
|
1017
|
+
lock.release()
|
1018
|
+
|
1019
|
+
def run(
|
1020
|
+
self,
|
1021
|
+
task: Union[str, List[str]],
|
1022
|
+
model_names: Optional[List[str]] = None,
|
1023
|
+
input_data: Any = None,
|
1024
|
+
timeout: float = 30.0,
|
1025
|
+
) -> Dict[str, Any]:
|
1026
|
+
"""
|
1027
|
+
Run a task on specific models or all models.
|
1028
|
+
|
1029
|
+
Args:
|
1030
|
+
task: Task name or list of task names to run
|
1031
|
+
model_names: List of model names to run the task on (None for all loaded models)
|
1032
|
+
input_data: Input data for the task
|
1033
|
+
timeout: Timeout in seconds
|
1034
|
+
|
1035
|
+
Returns:
|
1036
|
+
Dict mapping model names to results
|
1037
|
+
"""
|
1038
|
+
# Normalize task to list
|
1039
|
+
if isinstance(task, str):
|
1040
|
+
tasks = [task]
|
1041
|
+
else:
|
1042
|
+
tasks = task
|
1043
|
+
|
1044
|
+
# Determine which models to run on
|
1045
|
+
if model_names is None:
|
1046
|
+
target_models = [
|
1047
|
+
name
|
1048
|
+
for name, meta in self.models.items()
|
1049
|
+
if meta.loaded
|
1050
|
+
]
|
1051
|
+
else:
|
1052
|
+
target_models = [
|
1053
|
+
name
|
1054
|
+
for name in model_names
|
1055
|
+
if name in self.models and self.models[name].loaded
|
1056
|
+
]
|
1057
|
+
|
1058
|
+
if not target_models:
|
1059
|
+
logger.warning(
|
1060
|
+
"No loaded models available for running tasks."
|
1061
|
+
)
|
1062
|
+
return {}
|
1063
|
+
|
1064
|
+
logger.info(
|
1065
|
+
f"Running tasks {tasks} on models: {', '.join(target_models)}"
|
1066
|
+
)
|
1067
|
+
|
1068
|
+
results = {}
|
1069
|
+
|
1070
|
+
# Run tasks on each model
|
1071
|
+
for model_name in target_models:
|
1072
|
+
model_metadata = self.models[model_name]
|
1073
|
+
|
1074
|
+
try:
|
1075
|
+
if (
|
1076
|
+
self.use_multiprocessing
|
1077
|
+
and model_metadata.process is not None
|
1078
|
+
):
|
1079
|
+
# Run in separate process
|
1080
|
+
results[model_name] = self._run_in_process(
|
1081
|
+
model_name, tasks, input_data, timeout
|
1082
|
+
)
|
1083
|
+
else:
|
1084
|
+
# Run in current process
|
1085
|
+
results[model_name] = (
|
1086
|
+
self._run_in_current_process(
|
1087
|
+
model_name, tasks, input_data
|
1088
|
+
)
|
1089
|
+
)
|
1090
|
+
|
1091
|
+
except Exception as e:
|
1092
|
+
logger.error(
|
1093
|
+
f"Error running tasks on model '{model_name}': {str(e)}"
|
1094
|
+
)
|
1095
|
+
results[model_name] = {
|
1096
|
+
"status": "error",
|
1097
|
+
"error": str(e),
|
1098
|
+
}
|
1099
|
+
|
1100
|
+
return results
|
1101
|
+
|
1102
|
+
def _run_in_process(
|
1103
|
+
self,
|
1104
|
+
model_name: str,
|
1105
|
+
tasks: List[str],
|
1106
|
+
input_data: Any,
|
1107
|
+
timeout: float,
|
1108
|
+
) -> Dict[str, Any]:
|
1109
|
+
"""
|
1110
|
+
Run tasks on a model in a separate process.
|
1111
|
+
|
1112
|
+
Args:
|
1113
|
+
model_name: Name of the model
|
1114
|
+
tasks: List of tasks to run
|
1115
|
+
input_data: Input data for the tasks
|
1116
|
+
timeout: Timeout in seconds
|
1117
|
+
|
1118
|
+
Returns:
|
1119
|
+
Task results
|
1120
|
+
"""
|
1121
|
+
task_id = str(uuid.uuid4())
|
1122
|
+
task_queue = self.task_queues[model_name]
|
1123
|
+
result_queue = self.result_queues[model_name]
|
1124
|
+
|
1125
|
+
# Send task to model process
|
1126
|
+
task_queue.put((task_id, tasks[0], input_data))
|
1127
|
+
|
1128
|
+
# Wait for result
|
1129
|
+
start_time = time.time()
|
1130
|
+
while time.time() - start_time < timeout:
|
1131
|
+
try:
|
1132
|
+
# Check if result is available
|
1133
|
+
result_task_id, result = result_queue.get(block=False)
|
1134
|
+
|
1135
|
+
if result_task_id == task_id:
|
1136
|
+
return result
|
1137
|
+
else:
|
1138
|
+
# Put back other task results
|
1139
|
+
result_queue.put((result_task_id, result))
|
1140
|
+
|
1141
|
+
except queue.Empty:
|
1142
|
+
# No results yet, wait a bit
|
1143
|
+
time.sleep(0.1)
|
1144
|
+
|
1145
|
+
# Timeout
|
1146
|
+
logger.warning(
|
1147
|
+
f"Timeout waiting for tasks on model '{model_name}'"
|
1148
|
+
)
|
1149
|
+
return {"status": "error", "error": "Timeout"}
|
1150
|
+
|
1151
|
+
def _run_in_current_process(
|
1152
|
+
self, model_name: str, tasks: List[str], input_data: Any
|
1153
|
+
) -> Dict[str, Any]:
|
1154
|
+
"""
|
1155
|
+
Run tasks on a model in the current process.
|
1156
|
+
|
1157
|
+
Args:
|
1158
|
+
model_name: Name of the model
|
1159
|
+
tasks: List of tasks to run
|
1160
|
+
input_data: Input data for the tasks
|
1161
|
+
|
1162
|
+
Returns:
|
1163
|
+
Task results
|
1164
|
+
"""
|
1165
|
+
model_metadata = self.models[model_name]
|
1166
|
+
|
1167
|
+
with self._model_lock(model_name):
|
1168
|
+
try:
|
1169
|
+
# This would need to be implemented based on the specific model types
|
1170
|
+
# and tasks supported. Here's a simple placeholder:
|
1171
|
+
model = model_metadata.model
|
1172
|
+
|
1173
|
+
if model_metadata.model_type == ModelType.PYTORCH:
|
1174
|
+
# Run PyTorch model
|
1175
|
+
return {
|
1176
|
+
"status": "success",
|
1177
|
+
"result": "PyTorch model output placeholder",
|
1178
|
+
}
|
1179
|
+
|
1180
|
+
elif (
|
1181
|
+
model_metadata.model_type == ModelType.HUGGINGFACE
|
1182
|
+
):
|
1183
|
+
# Run Hugging Face model
|
1184
|
+
return {
|
1185
|
+
"status": "success",
|
1186
|
+
"result": "Hugging Face model output placeholder",
|
1187
|
+
}
|
1188
|
+
|
1189
|
+
else:
|
1190
|
+
return {
|
1191
|
+
"status": "error",
|
1192
|
+
"error": f"Unsupported model type: {model_metadata.model_type}",
|
1193
|
+
}
|
1194
|
+
|
1195
|
+
except Exception as e:
|
1196
|
+
logger.error(
|
1197
|
+
f"Error running tasks on model '{model_name}': {str(e)}"
|
1198
|
+
)
|
1199
|
+
return {"status": "error", "error": str(e)}
|
1200
|
+
|
1201
|
+
def get_gpu_status(self) -> List[Dict[str, Any]]:
|
1202
|
+
"""
|
1203
|
+
Get status information for all GPUs.
|
1204
|
+
|
1205
|
+
Returns:
|
1206
|
+
List of GPU status dictionaries
|
1207
|
+
"""
|
1208
|
+
# Update GPU memory info
|
1209
|
+
self.gpu_manager.update_gpu_memory_info()
|
1210
|
+
|
1211
|
+
gpu_status = []
|
1212
|
+
for gpu in self.gpu_manager.gpus:
|
1213
|
+
status = {
|
1214
|
+
"id": gpu.id,
|
1215
|
+
"total_memory": gpu.total_memory,
|
1216
|
+
"available_memory": gpu.available_memory,
|
1217
|
+
"used_memory": gpu.total_memory
|
1218
|
+
- gpu.available_memory,
|
1219
|
+
"utilization": (
|
1220
|
+
gpu.total_memory - gpu.available_memory
|
1221
|
+
)
|
1222
|
+
/ gpu.total_memory,
|
1223
|
+
"models": gpu.models,
|
1224
|
+
}
|
1225
|
+
gpu_status.append(status)
|
1226
|
+
|
1227
|
+
return gpu_status
|
1228
|
+
|
1229
|
+
def get_model_status(self) -> Dict[str, Dict[str, Any]]:
|
1230
|
+
"""
|
1231
|
+
Get status information for all models.
|
1232
|
+
|
1233
|
+
Returns:
|
1234
|
+
Dict mapping model names to status dictionaries
|
1235
|
+
"""
|
1236
|
+
model_status = {}
|
1237
|
+
for name, metadata in self.models.items():
|
1238
|
+
status = {
|
1239
|
+
"name": name,
|
1240
|
+
"type": metadata.model_type.value,
|
1241
|
+
"memory_required": metadata.memory_required,
|
1242
|
+
"loaded": metadata.loaded,
|
1243
|
+
"device": (
|
1244
|
+
str(metadata.device)
|
1245
|
+
if metadata.device is not None
|
1246
|
+
else "cpu"
|
1247
|
+
),
|
1248
|
+
"process_running": metadata.process is not None
|
1249
|
+
and metadata.process.is_alive(),
|
1250
|
+
}
|
1251
|
+
model_status[name] = status
|
1252
|
+
|
1253
|
+
return model_status
|
1254
|
+
|
1255
|
+
|
1256
|
+
class ModelWithCustomRunMethod:
|
1257
|
+
"""
|
1258
|
+
Base class for models with custom run methods.
|
1259
|
+
|
1260
|
+
Extend this class to implement custom run methods for specific model types.
|
1261
|
+
"""
|
1262
|
+
|
1263
|
+
def __init__(
|
1264
|
+
self, model: Any, device: Optional[torch.device] = None
|
1265
|
+
):
|
1266
|
+
"""
|
1267
|
+
Initialize the model wrapper.
|
1268
|
+
|
1269
|
+
Args:
|
1270
|
+
model: The model object
|
1271
|
+
device: Device to run the model on
|
1272
|
+
"""
|
1273
|
+
self.model = model
|
1274
|
+
self.device = device
|
1275
|
+
|
1276
|
+
def run(self, task: str, input_data: Any) -> Any:
|
1277
|
+
"""
|
1278
|
+
Run a task on the model.
|
1279
|
+
|
1280
|
+
Args:
|
1281
|
+
task: Task name
|
1282
|
+
input_data: Input data for the task
|
1283
|
+
|
1284
|
+
Returns:
|
1285
|
+
Task result
|
1286
|
+
"""
|
1287
|
+
raise NotImplementedError(
|
1288
|
+
"Subclasses must implement this method"
|
1289
|
+
)
|
1290
|
+
|
1291
|
+
|
1292
|
+
class PyTorchModelWrapper(ModelWithCustomRunMethod):
|
1293
|
+
"""
|
1294
|
+
Wrapper for PyTorch models with custom run methods.
|
1295
|
+
"""
|
1296
|
+
|
1297
|
+
def run(self, task: str, input_data: Any) -> Any:
|
1298
|
+
"""
|
1299
|
+
Run a task on a PyTorch model.
|
1300
|
+
|
1301
|
+
Args:
|
1302
|
+
task: Task name
|
1303
|
+
input_data: Input data for the task
|
1304
|
+
|
1305
|
+
Returns:
|
1306
|
+
Task result
|
1307
|
+
"""
|
1308
|
+
# Example implementation for common PyTorch tasks
|
1309
|
+
if task == "forward":
|
1310
|
+
# Ensure model is in eval mode
|
1311
|
+
self.model.eval()
|
1312
|
+
|
1313
|
+
# Convert input to tensor if needed
|
1314
|
+
if not isinstance(input_data, torch.Tensor):
|
1315
|
+
if isinstance(input_data, np.ndarray):
|
1316
|
+
input_tensor = torch.from_numpy(input_data).to(
|
1317
|
+
self.device
|
1318
|
+
)
|
1319
|
+
else:
|
1320
|
+
input_tensor = torch.tensor(input_data).to(
|
1321
|
+
self.device
|
1322
|
+
)
|
1323
|
+
else:
|
1324
|
+
input_tensor = input_data.to(self.device)
|
1325
|
+
|
1326
|
+
# Run forward pass
|
1327
|
+
with torch.no_grad():
|
1328
|
+
output = self.model(input_tensor)
|
1329
|
+
|
1330
|
+
# Convert output to numpy if needed
|
1331
|
+
if isinstance(output, torch.Tensor):
|
1332
|
+
return output.cpu().numpy()
|
1333
|
+
else:
|
1334
|
+
return output
|
1335
|
+
|
1336
|
+
elif task == "predict":
|
1337
|
+
# Similar to forward but with different post-processing
|
1338
|
+
self.model.eval()
|
1339
|
+
|
1340
|
+
# Convert input to tensor if needed
|
1341
|
+
if not isinstance(input_data, torch.Tensor):
|
1342
|
+
if isinstance(input_data, np.ndarray):
|
1343
|
+
input_tensor = torch.from_numpy(input_data).to(
|
1344
|
+
self.device
|
1345
|
+
)
|
1346
|
+
else:
|
1347
|
+
input_tensor = torch.tensor(input_data).to(
|
1348
|
+
self.device
|
1349
|
+
)
|
1350
|
+
else:
|
1351
|
+
input_tensor = input_data.to(self.device)
|
1352
|
+
|
1353
|
+
# Run prediction
|
1354
|
+
with torch.no_grad():
|
1355
|
+
output = self.model(input_tensor)
|
1356
|
+
|
1357
|
+
# Apply softmax if output is logits
|
1358
|
+
if len(output.shape) > 1 and output.shape[1] > 1:
|
1359
|
+
probs = torch.nn.functional.softmax(output, dim=1)
|
1360
|
+
predicted_class = torch.argmax(probs, dim=1)
|
1361
|
+
return {
|
1362
|
+
"probabilities": probs.cpu().numpy(),
|
1363
|
+
"predicted_class": predicted_class.cpu().numpy(),
|
1364
|
+
}
|
1365
|
+
else:
|
1366
|
+
return output.cpu().numpy()
|
1367
|
+
else:
|
1368
|
+
raise ValueError(f"Unsupported task: {task}")
|
1369
|
+
|
1370
|
+
|
1371
|
+
class HuggingFaceModelWrapper(ModelWithCustomRunMethod):
|
1372
|
+
"""
|
1373
|
+
Wrapper for Hugging Face models with custom run methods.
|
1374
|
+
"""
|
1375
|
+
|
1376
|
+
def run(self, task: str, input_data: Any) -> Any:
|
1377
|
+
"""
|
1378
|
+
Run a task on a Hugging Face model.
|
1379
|
+
|
1380
|
+
Args:
|
1381
|
+
task: Task name
|
1382
|
+
input_data: Input data for the task
|
1383
|
+
|
1384
|
+
Returns:
|
1385
|
+
Task result
|
1386
|
+
"""
|
1387
|
+
if not TRANSFORMERS_AVAILABLE:
|
1388
|
+
raise ImportError("Transformers package not available.")
|
1389
|
+
|
1390
|
+
# Example implementation for common Hugging Face tasks
|
1391
|
+
if task == "generate":
|
1392
|
+
# Generate text
|
1393
|
+
return self.model.generate(**input_data)
|
1394
|
+
|
1395
|
+
elif task == "encode":
|
1396
|
+
# Encode text
|
1397
|
+
return self.model.encode(input_data)
|
1398
|
+
|
1399
|
+
elif task == "predict":
|
1400
|
+
# Make predictions
|
1401
|
+
return self.model(**input_data)
|
1402
|
+
|
1403
|
+
else:
|
1404
|
+
raise ValueError(f"Unsupported task: {task}")
|
1405
|
+
|
1406
|
+
|
1407
|
+
# # Example usage
|
1408
|
+
# if __name__ == "__main__":
|
1409
|
+
# # Initialize model manager
|
1410
|
+
# manager = ModelGrid(
|
1411
|
+
# allocation_strategy=GPUAllocationStrategy.MEMORY_OPTIMIZED,
|
1412
|
+
# memory_buffer=0.5,
|
1413
|
+
# max_cpu_models=1,
|
1414
|
+
# use_multiprocessing=True,
|
1415
|
+
# log_level="INFO",
|
1416
|
+
# )
|
1417
|
+
|
1418
|
+
# # # Add models
|
1419
|
+
# model1 = torch.nn.Sequential(
|
1420
|
+
# torch.nn.Linear(10, 10),
|
1421
|
+
# torch.nn.ReLU(),
|
1422
|
+
# torch.nn.Linear(10, 2),
|
1423
|
+
# )
|
1424
|
+
# manager.add_model("small_model", model1, ModelType.PYTORCH)
|
1425
|
+
|
1426
|
+
# # Add more models if available
|
1427
|
+
# if TRANSFORMERS_AVAILABLE:
|
1428
|
+
# manager.add_model(
|
1429
|
+
# "bert_model", "bert-base-uncased", ModelType.HUGGINGFACE
|
1430
|
+
# )
|
1431
|
+
|
1432
|
+
# # Allocate and load models
|
1433
|
+
# manager.load_all_models()
|
1434
|
+
|
1435
|
+
# # Print GPU status
|
1436
|
+
# print("GPU Status:")
|
1437
|
+
# for gpu in manager.get_gpu_status():
|
1438
|
+
# print(
|
1439
|
+
# f"GPU {gpu['id']}: {gpu['available_memory']:.2f} GB / {gpu['total_memory']:.2f} GB"
|
1440
|
+
# )
|
1441
|
+
# print(f" Models: {', '.join(gpu['models'])}")
|
1442
|
+
|
1443
|
+
# # Run a task on all models
|
1444
|
+
# results = manager.run("forward", input_data=torch.randn(1, 10))
|
1445
|
+
|
1446
|
+
# # Unload all models
|
1447
|
+
# manager.unload_all_models()
|