swarms 7.7.3__py3-none-any.whl → 7.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1447 @@
1
+ """
2
+ GPU Model Manager
3
+ ================
4
+
5
+ A production-grade utility for managing multiple PyTorch or Hugging Face models
6
+ across available GPUs. This module automatically calculates model memory requirements,
7
+ allocates models to appropriate GPUs, and provides a unified interface for running
8
+ inference tasks across all loaded models.
9
+
10
+ Features:
11
+ - Dynamic model memory calculation
12
+ - Optimal GPU memory allocation
13
+ - Multi-processing support for parallel model execution
14
+ - Customizable task execution for specific models
15
+ - Comprehensive logging and error handling
16
+ """
17
+
18
+ import os
19
+ import queue
20
+ import sys
21
+ import time
22
+ import json
23
+ import uuid
24
+ import torch
25
+ import multiprocessing
26
+ from typing import Dict, List, Union, Optional, Any
27
+ from dataclasses import dataclass
28
+ from enum import Enum
29
+ from pathlib import Path
30
+ from loguru import logger
31
+ import numpy as np
32
+ from contextlib import contextmanager
33
+
34
+ # Try to import transformers, but don't fail if not available
35
+ try:
36
+ import transformers
37
+ from transformers import AutoModel, AutoTokenizer
38
+
39
+ TRANSFORMERS_AVAILABLE = True
40
+ except ImportError:
41
+ TRANSFORMERS_AVAILABLE = False
42
+ logger.warning(
43
+ "Transformers package not found. HuggingFace models will not be supported."
44
+ )
45
+
46
+
47
+ class ModelType(Enum):
48
+ """Enum defining supported model types."""
49
+
50
+ PYTORCH = "pytorch"
51
+ HUGGINGFACE = "huggingface"
52
+ UNKNOWN = "unknown"
53
+
54
+
55
+ class GPUAllocationStrategy(Enum):
56
+ """Enum defining GPU allocation strategies."""
57
+
58
+ FILL_GPU = "fill_gpu" # Fill each GPU before moving to next
59
+ DISTRIBUTE = "distribute" # Distribute models evenly across GPUs
60
+ MEMORY_OPTIMIZED = "memory_optimized" # Optimize for memory usage
61
+
62
+
63
+ @dataclass
64
+ class ModelMetadata:
65
+ """Data class for storing model metadata."""
66
+
67
+ name: str
68
+ model_type: ModelType
69
+ memory_required: float # in GB
70
+ model: Any
71
+ device: Optional[torch.device] = None
72
+ process: Optional[multiprocessing.Process] = None
73
+ loaded: bool = False
74
+
75
+
76
+ @dataclass
77
+ class GPUMetadata:
78
+ """Data class for storing GPU metadata."""
79
+
80
+ id: int
81
+ device: torch.device
82
+ total_memory: float # in GB
83
+ available_memory: float # in GB
84
+ models: List[str] = None
85
+
86
+ def __post_init__(self):
87
+ if self.models is None:
88
+ self.models = []
89
+
90
+
91
+ class ModelMemoryCalculator:
92
+ """Utility class for calculating model memory requirements."""
93
+
94
+ @staticmethod
95
+ def get_pytorch_model_size(model: torch.nn.Module) -> float:
96
+ """
97
+ Calculate the memory size of a PyTorch model in GB.
98
+
99
+ Args:
100
+ model: PyTorch model object
101
+
102
+ Returns:
103
+ Memory size in GB
104
+ """
105
+ try:
106
+ # Get model size in parameters
107
+ model_parameters = sum(
108
+ p.numel() for p in model.parameters()
109
+ )
110
+
111
+ # Calculate size based on dtype (default to float32)
112
+ if any(
113
+ p.dtype == torch.float16 for p in model.parameters()
114
+ ):
115
+ bytes_per_param = 2 # float16
116
+ elif any(
117
+ p.dtype == torch.bfloat16 for p in model.parameters()
118
+ ):
119
+ bytes_per_param = 2 # bfloat16
120
+ elif any(
121
+ p.dtype == torch.float64 for p in model.parameters()
122
+ ):
123
+ bytes_per_param = 8 # float64
124
+ else:
125
+ bytes_per_param = 4 # float32
126
+
127
+ # Calculate raw model size in bytes
128
+ model_size_bytes = model_parameters * bytes_per_param
129
+
130
+ # Add 20% for optimizer states, gradients, and other overhead
131
+ model_size_bytes_with_overhead = model_size_bytes * 1.2
132
+
133
+ # Convert to GB
134
+ model_size_gb = model_size_bytes_with_overhead / (1024**3)
135
+
136
+ # Add a safety margin of 10%
137
+ model_size_gb_with_safety = model_size_gb * 1.1
138
+
139
+ return model_size_gb_with_safety
140
+
141
+ except Exception as e:
142
+ logger.error(
143
+ f"Error calculating PyTorch model size: {str(e)}"
144
+ )
145
+ # Fallback estimation
146
+ return 2.0 # Default estimation if calculation fails
147
+
148
+ @staticmethod
149
+ def get_huggingface_model_size(
150
+ model_or_path: Union[str, Any]
151
+ ) -> float:
152
+ """
153
+ Calculate the memory size of a Hugging Face model in GB.
154
+ Works with either model path or loaded model.
155
+
156
+ Args:
157
+ model_or_path: Hugging Face model object or path to model
158
+
159
+ Returns:
160
+ Memory size in GB
161
+ """
162
+ if not TRANSFORMERS_AVAILABLE:
163
+ logger.error(
164
+ "Transformers package not available. Cannot calculate Hugging Face model size."
165
+ )
166
+ return 5.0 # Default fallback
167
+
168
+ try:
169
+ # If it's a path, we'll try to estimate without loading
170
+ if isinstance(model_or_path, str):
171
+ path = Path(model_or_path)
172
+ if path.exists():
173
+ # Check for model info in config
174
+ config_path = path / "config.json"
175
+ if config_path.exists():
176
+ with open(config_path, "r") as f:
177
+ config = json.load(f)
178
+ if "n_params" in config:
179
+ n_params = config["n_params"]
180
+ # Estimate with overhead
181
+ model_size_gb = (
182
+ n_params * 4 * 1.5
183
+ ) / (1024**3)
184
+ return model_size_gb
185
+
186
+ # Alternatively, estimate from model files
187
+ pytorch_files = list(path.glob("*.bin"))
188
+ if pytorch_files:
189
+ total_size = sum(
190
+ f.stat().st_size for f in pytorch_files
191
+ )
192
+ model_size_gb = (total_size * 1.5) / (
193
+ 1024**3
194
+ ) # 50% overhead
195
+ return model_size_gb
196
+
197
+ # If we can't estimate, load the model and calculate
198
+ logger.info(
199
+ f"Loading model from {model_or_path} to calculate memory requirements..."
200
+ )
201
+ model = AutoModel.from_pretrained(model_or_path)
202
+ return ModelMemoryCalculator.get_pytorch_model_size(
203
+ model
204
+ )
205
+ else:
206
+ # If we already have the model loaded, calculate directly
207
+ return ModelMemoryCalculator.get_pytorch_model_size(
208
+ model_or_path
209
+ )
210
+
211
+ except Exception as e:
212
+ logger.error(
213
+ f"Error calculating Hugging Face model size: {str(e)}"
214
+ )
215
+ return 5.0 # Default estimation if calculation fails
216
+
217
+
218
+ class GPUManager:
219
+ """Manages available GPUs and their memory."""
220
+
221
+ def __init__(self):
222
+ """Initialize the GPU manager."""
223
+ self.gpus: List[GPUMetadata] = []
224
+ self._initialize_gpus()
225
+
226
+ def _initialize_gpus(self) -> None:
227
+ """
228
+ Initialize available GPUs and collect their metadata.
229
+ """
230
+ if not torch.cuda.is_available():
231
+ logger.warning("No CUDA-capable devices detected.")
232
+ return
233
+
234
+ num_gpus = torch.cuda.device_count()
235
+ logger.info(f"Found {num_gpus} CUDA-capable devices.")
236
+
237
+ for gpu_id in range(num_gpus):
238
+ device = torch.device(f"cuda:{gpu_id}")
239
+
240
+ # Get total memory
241
+ total_memory = torch.cuda.get_device_properties(
242
+ gpu_id
243
+ ).total_memory
244
+ total_memory_gb = total_memory / (1024**3)
245
+
246
+ # Get available memory
247
+ torch.cuda.set_device(device)
248
+ torch.cuda.empty_cache()
249
+ available_memory = torch.cuda.mem_get_info(device)[0]
250
+ available_memory_gb = available_memory / (1024**3)
251
+
252
+ # Create GPU metadata
253
+ gpu_metadata = GPUMetadata(
254
+ id=gpu_id,
255
+ device=device,
256
+ total_memory=total_memory_gb,
257
+ available_memory=available_memory_gb,
258
+ )
259
+
260
+ self.gpus.append(gpu_metadata)
261
+ logger.info(
262
+ f"GPU {gpu_id}: {total_memory_gb:.2f} GB total, {available_memory_gb:.2f} GB available"
263
+ )
264
+
265
+ def update_gpu_memory_info(self) -> None:
266
+ """
267
+ Update the available memory information for all GPUs.
268
+ """
269
+ if not self.gpus:
270
+ logger.warning(
271
+ "No GPUs available to update memory information."
272
+ )
273
+ return
274
+
275
+ for gpu in self.gpus:
276
+ torch.cuda.set_device(gpu.device)
277
+ torch.cuda.empty_cache()
278
+ available_memory = torch.cuda.mem_get_info(gpu.device)[0]
279
+ gpu.available_memory = available_memory / (1024**3)
280
+ logger.debug(
281
+ f"Updated GPU {gpu.id}: {gpu.available_memory:.2f} GB available"
282
+ )
283
+
284
+
285
+ class ModelGrid:
286
+ """
287
+ Main class for managing multiple models across available GPUs.
288
+
289
+ This class handles:
290
+ - Loading and unloading models
291
+ - Allocating models to appropriate GPUs based on memory requirements
292
+ - Running inference tasks on specific models
293
+ - Managing model lifecycle through multiple processes
294
+ """
295
+
296
+ def __init__(
297
+ self,
298
+ allocation_strategy: GPUAllocationStrategy = GPUAllocationStrategy.MEMORY_OPTIMIZED,
299
+ memory_buffer: float = 0.5, # GB buffer to leave on each GPU
300
+ max_cpu_models: int = 0, # Maximum models to keep on CPU if no GPU space
301
+ use_multiprocessing: bool = True,
302
+ log_level: str = "INFO",
303
+ ):
304
+ """
305
+ Initialize the model manager.
306
+
307
+ Args:
308
+ allocation_strategy: Strategy for allocating models to GPUs
309
+ memory_buffer: Memory buffer to leave on each GPU (in GB)
310
+ max_cpu_models: Maximum number of models to keep on CPU if no GPU space
311
+ use_multiprocessing: Whether to use multiprocessing for model execution
312
+ log_level: Logging level
313
+ """
314
+ # Set log level
315
+ logger.remove()
316
+ logger.add(sys.stderr, level=log_level)
317
+ logger.add(
318
+ "gpu_model_manager.log",
319
+ rotation="100 MB",
320
+ retention="1 week",
321
+ level=log_level,
322
+ )
323
+
324
+ self.models: Dict[str, ModelMetadata] = {}
325
+ self.gpu_manager = GPUManager()
326
+ self.allocation_strategy = allocation_strategy
327
+ self.memory_buffer = memory_buffer
328
+ self.max_cpu_models = max_cpu_models
329
+ self.use_multiprocessing = use_multiprocessing
330
+
331
+ # Initialize locks and queues for multiprocessing
332
+ self.manager = (
333
+ multiprocessing.Manager() if use_multiprocessing else None
334
+ )
335
+ self.task_queues: Dict[str, Any] = (
336
+ self.manager.dict() if use_multiprocessing else {}
337
+ )
338
+ self.result_queues: Dict[str, Any] = (
339
+ self.manager.dict() if use_multiprocessing else {}
340
+ )
341
+ self.model_locks: Dict[str, Any] = {}
342
+
343
+ logger.info(
344
+ f"ModelGrid initialized with {len(self.gpu_manager.gpus)} GPUs"
345
+ )
346
+ logger.info(
347
+ f"Using allocation strategy: {allocation_strategy.value}"
348
+ )
349
+
350
+ def add_model(
351
+ self,
352
+ model_name: str,
353
+ model: Any,
354
+ model_type: Optional[ModelType] = None,
355
+ memory_override: Optional[float] = None,
356
+ ) -> bool:
357
+ """
358
+ Add a model to the manager.
359
+
360
+ Args:
361
+ model_name: Unique name for the model
362
+ model: The model object or path
363
+ model_type: Type of the model (will be auto-detected if not provided)
364
+ memory_override: Override the automatic memory calculation (in GB)
365
+
366
+ Returns:
367
+ Success status
368
+ """
369
+ if model_name in self.models:
370
+ logger.warning(
371
+ f"Model '{model_name}' already exists. Use update_model to replace it."
372
+ )
373
+ return False
374
+
375
+ # Auto-detect model type if not provided
376
+ if model_type is None:
377
+ if isinstance(model, str):
378
+ if os.path.exists(model) and TRANSFORMERS_AVAILABLE:
379
+ model_type = ModelType.HUGGINGFACE
380
+ else:
381
+ model_type = ModelType.UNKNOWN
382
+ elif isinstance(model, torch.nn.Module):
383
+ model_type = ModelType.PYTORCH
384
+ elif TRANSFORMERS_AVAILABLE and isinstance(
385
+ model, transformers.PreTrainedModel
386
+ ):
387
+ model_type = ModelType.HUGGINGFACE
388
+ else:
389
+ model_type = ModelType.UNKNOWN
390
+
391
+ # Calculate memory requirements
392
+ if memory_override is not None:
393
+ memory_required = memory_override
394
+ else:
395
+ if model_type == ModelType.PYTORCH:
396
+ memory_required = (
397
+ ModelMemoryCalculator.get_pytorch_model_size(
398
+ model
399
+ )
400
+ )
401
+ elif model_type == ModelType.HUGGINGFACE:
402
+ memory_required = (
403
+ ModelMemoryCalculator.get_huggingface_model_size(
404
+ model
405
+ )
406
+ )
407
+ else:
408
+ logger.warning(
409
+ f"Unknown model type for '{model_name}'. Using default memory estimation."
410
+ )
411
+ memory_required = 2.0 # Default estimation
412
+
413
+ # Create model metadata
414
+ model_metadata = ModelMetadata(
415
+ name=model_name,
416
+ model_type=model_type,
417
+ memory_required=memory_required,
418
+ model=model,
419
+ loaded=False,
420
+ )
421
+
422
+ self.models[model_name] = model_metadata
423
+ logger.info(
424
+ f"Added model '{model_name}' ({model_type.value}) with {memory_required:.2f} GB memory requirement"
425
+ )
426
+
427
+ # Initialize multiprocessing resources for this model
428
+ if self.use_multiprocessing:
429
+ self.task_queues[model_name] = self.manager.Queue()
430
+ self.result_queues[model_name] = self.manager.Queue()
431
+ self.model_locks[model_name] = self.manager.Lock()
432
+
433
+ return True
434
+
435
+ def remove_model(self, model_name: str) -> bool:
436
+ """
437
+ Remove a model from the manager.
438
+
439
+ Args:
440
+ model_name: Name of the model to remove
441
+
442
+ Returns:
443
+ Success status
444
+ """
445
+ if model_name not in self.models:
446
+ logger.warning(f"Model '{model_name}' does not exist.")
447
+ return False
448
+
449
+ model_metadata = self.models[model_name]
450
+
451
+ # Terminate the model process if running
452
+ if (
453
+ model_metadata.process is not None
454
+ and model_metadata.process.is_alive()
455
+ ):
456
+ logger.info(
457
+ f"Terminating process for model '{model_name}'"
458
+ )
459
+ model_metadata.process.terminate()
460
+ model_metadata.process.join(timeout=5)
461
+ if model_metadata.process.is_alive():
462
+ logger.warning(
463
+ f"Process for model '{model_name}' did not terminate gracefully. Killing..."
464
+ )
465
+ model_metadata.process.kill()
466
+
467
+ # Remove from GPU if loaded
468
+ if (
469
+ model_metadata.loaded
470
+ and model_metadata.device is not None
471
+ ):
472
+ gpu_id = model_metadata.device.index
473
+ for gpu in self.gpu_manager.gpus:
474
+ if gpu.id == gpu_id and model_name in gpu.models:
475
+ gpu.models.remove(model_name)
476
+ logger.info(
477
+ f"Removed model '{model_name}' from GPU {gpu_id}"
478
+ )
479
+
480
+ # Update GPU memory info
481
+ self.gpu_manager.update_gpu_memory_info()
482
+
483
+ # Clean up multiprocessing resources
484
+ if self.use_multiprocessing:
485
+ if model_name in self.task_queues:
486
+ del self.task_queues[model_name]
487
+ if model_name in self.result_queues:
488
+ del self.result_queues[model_name]
489
+ if model_name in self.model_locks:
490
+ del self.model_locks[model_name]
491
+
492
+ # Remove model metadata
493
+ del self.models[model_name]
494
+
495
+ logger.info(f"Removed model '{model_name}'")
496
+ return True
497
+
498
+ def _find_best_gpu_for_model(
499
+ self, model_metadata: ModelMetadata
500
+ ) -> Optional[GPUMetadata]:
501
+ """
502
+ Find the best GPU for a given model based on the allocation strategy.
503
+
504
+ Args:
505
+ model_metadata: Metadata for the model
506
+
507
+ Returns:
508
+ Best GPU metadata or None if no suitable GPU found
509
+ """
510
+ model_memory = (
511
+ model_metadata.memory_required + self.memory_buffer
512
+ )
513
+
514
+ # Update GPU memory info before allocation
515
+ self.gpu_manager.update_gpu_memory_info()
516
+
517
+ # Find available GPUs that can fit the model
518
+ available_gpus = [
519
+ gpu
520
+ for gpu in self.gpu_manager.gpus
521
+ if gpu.available_memory >= model_memory
522
+ ]
523
+
524
+ if not available_gpus:
525
+ logger.warning(
526
+ f"No GPU with sufficient memory for model '{model_metadata.name}' "
527
+ f"(requires {model_memory:.2f} GB)"
528
+ )
529
+ return None
530
+
531
+ # Apply allocation strategy
532
+ if self.allocation_strategy == GPUAllocationStrategy.FILL_GPU:
533
+ # Sort by number of models (ascending) and then by available memory (descending)
534
+ return sorted(
535
+ available_gpus,
536
+ key=lambda g: (len(g.models), -g.available_memory),
537
+ )[0]
538
+
539
+ elif (
540
+ self.allocation_strategy
541
+ == GPUAllocationStrategy.DISTRIBUTE
542
+ ):
543
+ # Sort by number of models (ascending)
544
+ return sorted(
545
+ available_gpus, key=lambda g: len(g.models)
546
+ )[0]
547
+
548
+ elif (
549
+ self.allocation_strategy
550
+ == GPUAllocationStrategy.MEMORY_OPTIMIZED
551
+ ):
552
+ # Sort by available memory (ascending) but ensure it fits
553
+ return sorted(
554
+ available_gpus, key=lambda g: g.available_memory
555
+ )[0]
556
+
557
+ # Default fallback
558
+ return available_gpus[0]
559
+
560
+ def allocate_all_models(self) -> Dict[str, Optional[int]]:
561
+ """
562
+ Allocate all models to GPUs based on the allocation strategy.
563
+
564
+ Returns:
565
+ Dict mapping model names to allocated GPU IDs (or None if on CPU)
566
+ """
567
+ # Sort models by memory requirement (descending)
568
+ sorted_models = sorted(
569
+ self.models.values(),
570
+ key=lambda m: m.memory_required,
571
+ reverse=True,
572
+ )
573
+
574
+ allocations = {}
575
+
576
+ for model_metadata in sorted_models:
577
+ best_gpu = self._find_best_gpu_for_model(model_metadata)
578
+
579
+ if best_gpu is not None:
580
+ # Allocate model to GPU
581
+ gpu_id = best_gpu.id
582
+ model_metadata.device = best_gpu.device
583
+ best_gpu.models.append(model_metadata.name)
584
+ best_gpu.available_memory -= (
585
+ model_metadata.memory_required
586
+ + self.memory_buffer
587
+ )
588
+
589
+ allocations[model_metadata.name] = gpu_id
590
+ logger.info(
591
+ f"Allocated model '{model_metadata.name}' to GPU {gpu_id} "
592
+ f"({best_gpu.available_memory:.2f} GB remaining)"
593
+ )
594
+ else:
595
+ # No suitable GPU found, keep model on CPU if allowed
596
+ if (
597
+ len(
598
+ [m for m in allocations.values() if m is None]
599
+ )
600
+ < self.max_cpu_models
601
+ ):
602
+ model_metadata.device = None
603
+ allocations[model_metadata.name] = None
604
+ logger.info(
605
+ f"Keeping model '{model_metadata.name}' on CPU (no suitable GPU)"
606
+ )
607
+ else:
608
+ logger.warning(
609
+ f"Cannot allocate model '{model_metadata.name}'. "
610
+ f"No GPU space available and max_cpu_models limit reached."
611
+ )
612
+
613
+ return allocations
614
+
615
+ def load_model(self, model_name: str) -> bool:
616
+ """
617
+ Load a specific model to its allocated device.
618
+
619
+ Args:
620
+ model_name: Name of the model to load
621
+
622
+ Returns:
623
+ Success status
624
+ """
625
+ if model_name not in self.models:
626
+ logger.warning(f"Model '{model_name}' does not exist.")
627
+ return False
628
+
629
+ model_metadata = self.models[model_name]
630
+
631
+ # Skip if already loaded
632
+ if model_metadata.loaded:
633
+ logger.info(f"Model '{model_name}' is already loaded.")
634
+ return True
635
+
636
+ # Allocate to GPU if not already allocated
637
+ if model_metadata.device is None:
638
+ best_gpu = self._find_best_gpu_for_model(model_metadata)
639
+ if best_gpu is not None:
640
+ model_metadata.device = best_gpu.device
641
+ best_gpu.models.append(model_name)
642
+ best_gpu.available_memory -= (
643
+ model_metadata.memory_required
644
+ + self.memory_buffer
645
+ )
646
+ logger.info(
647
+ f"Allocated model '{model_name}' to GPU {best_gpu.id} "
648
+ f"({best_gpu.available_memory:.2f} GB remaining)"
649
+ )
650
+
651
+ try:
652
+ device_str = (
653
+ "cpu"
654
+ if model_metadata.device is None
655
+ else str(model_metadata.device)
656
+ )
657
+ logger.info(
658
+ f"Loading model '{model_name}' to {device_str}"
659
+ )
660
+
661
+ # Load based on model type
662
+ if model_metadata.model_type == ModelType.PYTORCH:
663
+ if isinstance(model_metadata.model, torch.nn.Module):
664
+ model_metadata.model.to(
665
+ model_metadata.device or "cpu"
666
+ )
667
+ else:
668
+ logger.error(
669
+ f"Model '{model_name}' is not a valid PyTorch module."
670
+ )
671
+ return False
672
+
673
+ elif model_metadata.model_type == ModelType.HUGGINGFACE:
674
+ if TRANSFORMERS_AVAILABLE:
675
+ if isinstance(model_metadata.model, str):
676
+ # Load from path
677
+ logger.info(
678
+ f"Loading HuggingFace model from {model_metadata.model}"
679
+ )
680
+ loaded_model = AutoModel.from_pretrained(
681
+ model_metadata.model
682
+ )
683
+ loaded_model.to(
684
+ model_metadata.device or "cpu"
685
+ )
686
+ model_metadata.model = loaded_model
687
+ elif isinstance(
688
+ model_metadata.model,
689
+ transformers.PreTrainedModel,
690
+ ):
691
+ # Move existing model to device
692
+ model_metadata.model.to(
693
+ model_metadata.device or "cpu"
694
+ )
695
+ else:
696
+ logger.error(
697
+ f"Model '{model_name}' is not a valid HuggingFace model."
698
+ )
699
+ return False
700
+ else:
701
+ logger.error(
702
+ "Transformers package not available. Cannot load HuggingFace model."
703
+ )
704
+ return False
705
+ else:
706
+ logger.error(
707
+ f"Unknown model type for '{model_name}'."
708
+ )
709
+ return False
710
+
711
+ model_metadata.loaded = True
712
+
713
+ # Start model process if using multiprocessing
714
+ if self.use_multiprocessing:
715
+ self._start_model_process(model_name)
716
+
717
+ logger.info(f"Successfully loaded model '{model_name}'")
718
+ return True
719
+
720
+ except Exception as e:
721
+ logger.error(
722
+ f"Error loading model '{model_name}': {str(e)}"
723
+ )
724
+ # Try to clean up GPU allocation if failed
725
+ if model_metadata.device is not None:
726
+ gpu_id = model_metadata.device.index
727
+ for gpu in self.gpu_manager.gpus:
728
+ if gpu.id == gpu_id and model_name in gpu.models:
729
+ gpu.models.remove(model_name)
730
+ gpu.available_memory += (
731
+ model_metadata.memory_required
732
+ + self.memory_buffer
733
+ )
734
+ model_metadata.device = None
735
+
736
+ self.gpu_manager.update_gpu_memory_info()
737
+ return False
738
+
739
+ def unload_model(self, model_name: str) -> bool:
740
+ """
741
+ Unload a specific model from its device.
742
+
743
+ Args:
744
+ model_name: Name of the model to unload
745
+
746
+ Returns:
747
+ Success status
748
+ """
749
+ if model_name not in self.models:
750
+ logger.warning(f"Model '{model_name}' does not exist.")
751
+ return False
752
+
753
+ model_metadata = self.models[model_name]
754
+
755
+ # Skip if not loaded
756
+ if not model_metadata.loaded:
757
+ logger.info(f"Model '{model_name}' is not loaded.")
758
+ return True
759
+
760
+ try:
761
+ # Stop model process if using multiprocessing
762
+ if (
763
+ self.use_multiprocessing
764
+ and model_metadata.process is not None
765
+ ):
766
+ logger.info(
767
+ f"Stopping process for model '{model_name}'"
768
+ )
769
+ model_metadata.process.terminate()
770
+ model_metadata.process.join(timeout=5)
771
+ if model_metadata.process.is_alive():
772
+ logger.warning(
773
+ f"Process for model '{model_name}' did not terminate gracefully. Killing..."
774
+ )
775
+ model_metadata.process.kill()
776
+ model_metadata.process = None
777
+
778
+ # Move model to CPU and clean up
779
+ if (
780
+ model_metadata.device is not None
781
+ and model_metadata.device.type == "cuda"
782
+ ):
783
+ logger.info(
784
+ f"Unloading model '{model_name}' from {model_metadata.device}"
785
+ )
786
+
787
+ # Update GPU allocation
788
+ gpu_id = model_metadata.device.index
789
+ for gpu in self.gpu_manager.gpus:
790
+ if gpu.id == gpu_id and model_name in gpu.models:
791
+ gpu.models.remove(model_name)
792
+ gpu.available_memory += (
793
+ model_metadata.memory_required
794
+ + self.memory_buffer
795
+ )
796
+
797
+ # Move model to CPU if it's a PyTorch module
798
+ if isinstance(model_metadata.model, torch.nn.Module):
799
+ model_metadata.model.to("cpu")
800
+
801
+ # Clear CUDA cache
802
+ if torch.cuda.is_available():
803
+ torch.cuda.empty_cache()
804
+
805
+ model_metadata.device = None
806
+ model_metadata.loaded = False
807
+
808
+ # Update GPU memory info
809
+ self.gpu_manager.update_gpu_memory_info()
810
+
811
+ logger.info(f"Successfully unloaded model '{model_name}'")
812
+ return True
813
+
814
+ except Exception as e:
815
+ logger.error(
816
+ f"Error unloading model '{model_name}': {str(e)}"
817
+ )
818
+ return False
819
+
820
+ def load_all_models(self) -> Dict[str, bool]:
821
+ """
822
+ Load all models to their allocated devices.
823
+
824
+ Returns:
825
+ Dict mapping model names to load success status
826
+ """
827
+ # First allocate all models
828
+ self.allocate_all_models()
829
+
830
+ # Then load each model
831
+ results = {}
832
+ for model_name in self.models:
833
+ results[model_name] = self.load_model(model_name)
834
+
835
+ return results
836
+
837
+ def unload_all_models(self) -> Dict[str, bool]:
838
+ """
839
+ Unload all models from their devices.
840
+
841
+ Returns:
842
+ Dict mapping model names to unload success status
843
+ """
844
+ results = {}
845
+ for model_name in self.models:
846
+ results[model_name] = self.unload_model(model_name)
847
+
848
+ return results
849
+
850
+ def _start_model_process(self, model_name: str) -> bool:
851
+ """
852
+ Start a dedicated process for a model.
853
+
854
+ Args:
855
+ model_name: Name of the model
856
+
857
+ Returns:
858
+ Success status
859
+ """
860
+ if not self.use_multiprocessing:
861
+ logger.warning(
862
+ "Multiprocessing is disabled. Cannot start model process."
863
+ )
864
+ return False
865
+
866
+ if model_name not in self.models:
867
+ logger.warning(f"Model '{model_name}' does not exist.")
868
+ return False
869
+
870
+ model_metadata = self.models[model_name]
871
+
872
+ if (
873
+ model_metadata.process is not None
874
+ and model_metadata.process.is_alive()
875
+ ):
876
+ logger.info(
877
+ f"Process for model '{model_name}' is already running."
878
+ )
879
+ return True
880
+
881
+ try:
882
+ # Create a new process for the model
883
+ process = multiprocessing.Process(
884
+ target=self._model_process_worker,
885
+ args=(
886
+ model_name,
887
+ model_metadata.model_type,
888
+ self.task_queues[model_name],
889
+ self.result_queues[model_name],
890
+ (
891
+ model_metadata.device.index
892
+ if model_metadata.device is not None
893
+ else None
894
+ ),
895
+ ),
896
+ daemon=True,
897
+ )
898
+
899
+ process.start()
900
+ model_metadata.process = process
901
+
902
+ logger.info(
903
+ f"Started process for model '{model_name}' (PID: {process.pid})"
904
+ )
905
+ return True
906
+
907
+ except Exception as e:
908
+ logger.error(
909
+ f"Error starting process for model '{model_name}': {str(e)}"
910
+ )
911
+ return False
912
+
913
+ def _model_process_worker(
914
+ self,
915
+ model_name: str,
916
+ model_type: ModelType,
917
+ task_queue: multiprocessing.Queue,
918
+ result_queue: multiprocessing.Queue,
919
+ gpu_id: Optional[int],
920
+ ) -> None:
921
+ """
922
+ Worker function for model processes.
923
+
924
+ Args:
925
+ model_name: Name of the model
926
+ model_type: Type of the model
927
+ task_queue: Queue for receiving tasks
928
+ result_queue: Queue for sending results
929
+ gpu_id: GPU device ID or None for CPU
930
+ """
931
+ try:
932
+ # Configure device
933
+ if gpu_id is not None:
934
+ device = torch.device(f"cuda:{gpu_id}")
935
+ torch.cuda.set_device(device)
936
+ else:
937
+ device = torch.device("cpu")
938
+
939
+ logger.info(
940
+ f"Model process for '{model_name}' started on {device}"
941
+ )
942
+
943
+ # Process tasks from the queue
944
+ while True:
945
+ try:
946
+ # Get task from queue with timeout
947
+ task_id, task_type, task_data = task_queue.get(
948
+ timeout=1.0
949
+ )
950
+
951
+ logger.debug(
952
+ f"Model '{model_name}' processing task {task_id}: {task_type}"
953
+ )
954
+
955
+ # Process task based on task_type
956
+ try:
957
+ if task_type == "run_model":
958
+ # Run the model on the task data
959
+ # This would be implemented based on the specific model type
960
+ result = {
961
+ "status": "success",
962
+ "result": "Model output placeholder",
963
+ }
964
+ else:
965
+ result = {
966
+ "status": "error",
967
+ "error": f"Unknown task type: {task_type}",
968
+ }
969
+ except Exception as e:
970
+ logger.error(
971
+ f"Error processing task {task_id} for model '{model_name}': {str(e)}"
972
+ )
973
+ result = {"status": "error", "error": str(e)}
974
+
975
+ # Send result back
976
+ result_queue.put((task_id, result))
977
+ logger.debug(
978
+ f"Model '{model_name}' completed task {task_id}"
979
+ )
980
+
981
+ except queue.Empty:
982
+ # No tasks in queue, just continue
983
+ continue
984
+
985
+ except KeyboardInterrupt:
986
+ logger.info(
987
+ f"Model process for '{model_name}' interrupted"
988
+ )
989
+ except Exception as e:
990
+ logger.error(
991
+ f"Error in model process for '{model_name}': {str(e)}"
992
+ )
993
+ finally:
994
+ logger.info(f"Model process for '{model_name}' exiting")
995
+
996
+ @contextmanager
997
+ def _model_lock(self, model_name: str) -> None:
998
+ """
999
+ Context manager for acquiring model lock.
1000
+
1001
+ Args:
1002
+ model_name: Name of the model
1003
+ """
1004
+ if (
1005
+ not self.use_multiprocessing
1006
+ or model_name not in self.model_locks
1007
+ ):
1008
+ # No-op if not using multiprocessing
1009
+ yield
1010
+ return
1011
+
1012
+ lock = self.model_locks[model_name]
1013
+ try:
1014
+ lock.acquire()
1015
+ yield
1016
+ finally:
1017
+ lock.release()
1018
+
1019
+ def run(
1020
+ self,
1021
+ task: Union[str, List[str]],
1022
+ model_names: Optional[List[str]] = None,
1023
+ input_data: Any = None,
1024
+ timeout: float = 30.0,
1025
+ ) -> Dict[str, Any]:
1026
+ """
1027
+ Run a task on specific models or all models.
1028
+
1029
+ Args:
1030
+ task: Task name or list of task names to run
1031
+ model_names: List of model names to run the task on (None for all loaded models)
1032
+ input_data: Input data for the task
1033
+ timeout: Timeout in seconds
1034
+
1035
+ Returns:
1036
+ Dict mapping model names to results
1037
+ """
1038
+ # Normalize task to list
1039
+ if isinstance(task, str):
1040
+ tasks = [task]
1041
+ else:
1042
+ tasks = task
1043
+
1044
+ # Determine which models to run on
1045
+ if model_names is None:
1046
+ target_models = [
1047
+ name
1048
+ for name, meta in self.models.items()
1049
+ if meta.loaded
1050
+ ]
1051
+ else:
1052
+ target_models = [
1053
+ name
1054
+ for name in model_names
1055
+ if name in self.models and self.models[name].loaded
1056
+ ]
1057
+
1058
+ if not target_models:
1059
+ logger.warning(
1060
+ "No loaded models available for running tasks."
1061
+ )
1062
+ return {}
1063
+
1064
+ logger.info(
1065
+ f"Running tasks {tasks} on models: {', '.join(target_models)}"
1066
+ )
1067
+
1068
+ results = {}
1069
+
1070
+ # Run tasks on each model
1071
+ for model_name in target_models:
1072
+ model_metadata = self.models[model_name]
1073
+
1074
+ try:
1075
+ if (
1076
+ self.use_multiprocessing
1077
+ and model_metadata.process is not None
1078
+ ):
1079
+ # Run in separate process
1080
+ results[model_name] = self._run_in_process(
1081
+ model_name, tasks, input_data, timeout
1082
+ )
1083
+ else:
1084
+ # Run in current process
1085
+ results[model_name] = (
1086
+ self._run_in_current_process(
1087
+ model_name, tasks, input_data
1088
+ )
1089
+ )
1090
+
1091
+ except Exception as e:
1092
+ logger.error(
1093
+ f"Error running tasks on model '{model_name}': {str(e)}"
1094
+ )
1095
+ results[model_name] = {
1096
+ "status": "error",
1097
+ "error": str(e),
1098
+ }
1099
+
1100
+ return results
1101
+
1102
+ def _run_in_process(
1103
+ self,
1104
+ model_name: str,
1105
+ tasks: List[str],
1106
+ input_data: Any,
1107
+ timeout: float,
1108
+ ) -> Dict[str, Any]:
1109
+ """
1110
+ Run tasks on a model in a separate process.
1111
+
1112
+ Args:
1113
+ model_name: Name of the model
1114
+ tasks: List of tasks to run
1115
+ input_data: Input data for the tasks
1116
+ timeout: Timeout in seconds
1117
+
1118
+ Returns:
1119
+ Task results
1120
+ """
1121
+ task_id = str(uuid.uuid4())
1122
+ task_queue = self.task_queues[model_name]
1123
+ result_queue = self.result_queues[model_name]
1124
+
1125
+ # Send task to model process
1126
+ task_queue.put((task_id, tasks[0], input_data))
1127
+
1128
+ # Wait for result
1129
+ start_time = time.time()
1130
+ while time.time() - start_time < timeout:
1131
+ try:
1132
+ # Check if result is available
1133
+ result_task_id, result = result_queue.get(block=False)
1134
+
1135
+ if result_task_id == task_id:
1136
+ return result
1137
+ else:
1138
+ # Put back other task results
1139
+ result_queue.put((result_task_id, result))
1140
+
1141
+ except queue.Empty:
1142
+ # No results yet, wait a bit
1143
+ time.sleep(0.1)
1144
+
1145
+ # Timeout
1146
+ logger.warning(
1147
+ f"Timeout waiting for tasks on model '{model_name}'"
1148
+ )
1149
+ return {"status": "error", "error": "Timeout"}
1150
+
1151
+ def _run_in_current_process(
1152
+ self, model_name: str, tasks: List[str], input_data: Any
1153
+ ) -> Dict[str, Any]:
1154
+ """
1155
+ Run tasks on a model in the current process.
1156
+
1157
+ Args:
1158
+ model_name: Name of the model
1159
+ tasks: List of tasks to run
1160
+ input_data: Input data for the tasks
1161
+
1162
+ Returns:
1163
+ Task results
1164
+ """
1165
+ model_metadata = self.models[model_name]
1166
+
1167
+ with self._model_lock(model_name):
1168
+ try:
1169
+ # This would need to be implemented based on the specific model types
1170
+ # and tasks supported. Here's a simple placeholder:
1171
+ model = model_metadata.model
1172
+
1173
+ if model_metadata.model_type == ModelType.PYTORCH:
1174
+ # Run PyTorch model
1175
+ return {
1176
+ "status": "success",
1177
+ "result": "PyTorch model output placeholder",
1178
+ }
1179
+
1180
+ elif (
1181
+ model_metadata.model_type == ModelType.HUGGINGFACE
1182
+ ):
1183
+ # Run Hugging Face model
1184
+ return {
1185
+ "status": "success",
1186
+ "result": "Hugging Face model output placeholder",
1187
+ }
1188
+
1189
+ else:
1190
+ return {
1191
+ "status": "error",
1192
+ "error": f"Unsupported model type: {model_metadata.model_type}",
1193
+ }
1194
+
1195
+ except Exception as e:
1196
+ logger.error(
1197
+ f"Error running tasks on model '{model_name}': {str(e)}"
1198
+ )
1199
+ return {"status": "error", "error": str(e)}
1200
+
1201
+ def get_gpu_status(self) -> List[Dict[str, Any]]:
1202
+ """
1203
+ Get status information for all GPUs.
1204
+
1205
+ Returns:
1206
+ List of GPU status dictionaries
1207
+ """
1208
+ # Update GPU memory info
1209
+ self.gpu_manager.update_gpu_memory_info()
1210
+
1211
+ gpu_status = []
1212
+ for gpu in self.gpu_manager.gpus:
1213
+ status = {
1214
+ "id": gpu.id,
1215
+ "total_memory": gpu.total_memory,
1216
+ "available_memory": gpu.available_memory,
1217
+ "used_memory": gpu.total_memory
1218
+ - gpu.available_memory,
1219
+ "utilization": (
1220
+ gpu.total_memory - gpu.available_memory
1221
+ )
1222
+ / gpu.total_memory,
1223
+ "models": gpu.models,
1224
+ }
1225
+ gpu_status.append(status)
1226
+
1227
+ return gpu_status
1228
+
1229
+ def get_model_status(self) -> Dict[str, Dict[str, Any]]:
1230
+ """
1231
+ Get status information for all models.
1232
+
1233
+ Returns:
1234
+ Dict mapping model names to status dictionaries
1235
+ """
1236
+ model_status = {}
1237
+ for name, metadata in self.models.items():
1238
+ status = {
1239
+ "name": name,
1240
+ "type": metadata.model_type.value,
1241
+ "memory_required": metadata.memory_required,
1242
+ "loaded": metadata.loaded,
1243
+ "device": (
1244
+ str(metadata.device)
1245
+ if metadata.device is not None
1246
+ else "cpu"
1247
+ ),
1248
+ "process_running": metadata.process is not None
1249
+ and metadata.process.is_alive(),
1250
+ }
1251
+ model_status[name] = status
1252
+
1253
+ return model_status
1254
+
1255
+
1256
+ class ModelWithCustomRunMethod:
1257
+ """
1258
+ Base class for models with custom run methods.
1259
+
1260
+ Extend this class to implement custom run methods for specific model types.
1261
+ """
1262
+
1263
+ def __init__(
1264
+ self, model: Any, device: Optional[torch.device] = None
1265
+ ):
1266
+ """
1267
+ Initialize the model wrapper.
1268
+
1269
+ Args:
1270
+ model: The model object
1271
+ device: Device to run the model on
1272
+ """
1273
+ self.model = model
1274
+ self.device = device
1275
+
1276
+ def run(self, task: str, input_data: Any) -> Any:
1277
+ """
1278
+ Run a task on the model.
1279
+
1280
+ Args:
1281
+ task: Task name
1282
+ input_data: Input data for the task
1283
+
1284
+ Returns:
1285
+ Task result
1286
+ """
1287
+ raise NotImplementedError(
1288
+ "Subclasses must implement this method"
1289
+ )
1290
+
1291
+
1292
+ class PyTorchModelWrapper(ModelWithCustomRunMethod):
1293
+ """
1294
+ Wrapper for PyTorch models with custom run methods.
1295
+ """
1296
+
1297
+ def run(self, task: str, input_data: Any) -> Any:
1298
+ """
1299
+ Run a task on a PyTorch model.
1300
+
1301
+ Args:
1302
+ task: Task name
1303
+ input_data: Input data for the task
1304
+
1305
+ Returns:
1306
+ Task result
1307
+ """
1308
+ # Example implementation for common PyTorch tasks
1309
+ if task == "forward":
1310
+ # Ensure model is in eval mode
1311
+ self.model.eval()
1312
+
1313
+ # Convert input to tensor if needed
1314
+ if not isinstance(input_data, torch.Tensor):
1315
+ if isinstance(input_data, np.ndarray):
1316
+ input_tensor = torch.from_numpy(input_data).to(
1317
+ self.device
1318
+ )
1319
+ else:
1320
+ input_tensor = torch.tensor(input_data).to(
1321
+ self.device
1322
+ )
1323
+ else:
1324
+ input_tensor = input_data.to(self.device)
1325
+
1326
+ # Run forward pass
1327
+ with torch.no_grad():
1328
+ output = self.model(input_tensor)
1329
+
1330
+ # Convert output to numpy if needed
1331
+ if isinstance(output, torch.Tensor):
1332
+ return output.cpu().numpy()
1333
+ else:
1334
+ return output
1335
+
1336
+ elif task == "predict":
1337
+ # Similar to forward but with different post-processing
1338
+ self.model.eval()
1339
+
1340
+ # Convert input to tensor if needed
1341
+ if not isinstance(input_data, torch.Tensor):
1342
+ if isinstance(input_data, np.ndarray):
1343
+ input_tensor = torch.from_numpy(input_data).to(
1344
+ self.device
1345
+ )
1346
+ else:
1347
+ input_tensor = torch.tensor(input_data).to(
1348
+ self.device
1349
+ )
1350
+ else:
1351
+ input_tensor = input_data.to(self.device)
1352
+
1353
+ # Run prediction
1354
+ with torch.no_grad():
1355
+ output = self.model(input_tensor)
1356
+
1357
+ # Apply softmax if output is logits
1358
+ if len(output.shape) > 1 and output.shape[1] > 1:
1359
+ probs = torch.nn.functional.softmax(output, dim=1)
1360
+ predicted_class = torch.argmax(probs, dim=1)
1361
+ return {
1362
+ "probabilities": probs.cpu().numpy(),
1363
+ "predicted_class": predicted_class.cpu().numpy(),
1364
+ }
1365
+ else:
1366
+ return output.cpu().numpy()
1367
+ else:
1368
+ raise ValueError(f"Unsupported task: {task}")
1369
+
1370
+
1371
+ class HuggingFaceModelWrapper(ModelWithCustomRunMethod):
1372
+ """
1373
+ Wrapper for Hugging Face models with custom run methods.
1374
+ """
1375
+
1376
+ def run(self, task: str, input_data: Any) -> Any:
1377
+ """
1378
+ Run a task on a Hugging Face model.
1379
+
1380
+ Args:
1381
+ task: Task name
1382
+ input_data: Input data for the task
1383
+
1384
+ Returns:
1385
+ Task result
1386
+ """
1387
+ if not TRANSFORMERS_AVAILABLE:
1388
+ raise ImportError("Transformers package not available.")
1389
+
1390
+ # Example implementation for common Hugging Face tasks
1391
+ if task == "generate":
1392
+ # Generate text
1393
+ return self.model.generate(**input_data)
1394
+
1395
+ elif task == "encode":
1396
+ # Encode text
1397
+ return self.model.encode(input_data)
1398
+
1399
+ elif task == "predict":
1400
+ # Make predictions
1401
+ return self.model(**input_data)
1402
+
1403
+ else:
1404
+ raise ValueError(f"Unsupported task: {task}")
1405
+
1406
+
1407
+ # # Example usage
1408
+ # if __name__ == "__main__":
1409
+ # # Initialize model manager
1410
+ # manager = ModelGrid(
1411
+ # allocation_strategy=GPUAllocationStrategy.MEMORY_OPTIMIZED,
1412
+ # memory_buffer=0.5,
1413
+ # max_cpu_models=1,
1414
+ # use_multiprocessing=True,
1415
+ # log_level="INFO",
1416
+ # )
1417
+
1418
+ # # # Add models
1419
+ # model1 = torch.nn.Sequential(
1420
+ # torch.nn.Linear(10, 10),
1421
+ # torch.nn.ReLU(),
1422
+ # torch.nn.Linear(10, 2),
1423
+ # )
1424
+ # manager.add_model("small_model", model1, ModelType.PYTORCH)
1425
+
1426
+ # # Add more models if available
1427
+ # if TRANSFORMERS_AVAILABLE:
1428
+ # manager.add_model(
1429
+ # "bert_model", "bert-base-uncased", ModelType.HUGGINGFACE
1430
+ # )
1431
+
1432
+ # # Allocate and load models
1433
+ # manager.load_all_models()
1434
+
1435
+ # # Print GPU status
1436
+ # print("GPU Status:")
1437
+ # for gpu in manager.get_gpu_status():
1438
+ # print(
1439
+ # f"GPU {gpu['id']}: {gpu['available_memory']:.2f} GB / {gpu['total_memory']:.2f} GB"
1440
+ # )
1441
+ # print(f" Models: {', '.join(gpu['models'])}")
1442
+
1443
+ # # Run a task on all models
1444
+ # results = manager.run("forward", input_data=torch.randn(1, 10))
1445
+
1446
+ # # Unload all models
1447
+ # manager.unload_all_models()