unifiedefficientloader 0.3.2__cp39-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unifiedefficientloader/__init__.py +66 -0
- unifiedefficientloader/gpu_buffer_pool.py +40 -0
- unifiedefficientloader/logging_utils.py +117 -0
- unifiedefficientloader/memory_efficient_loader.py +639 -0
- unifiedefficientloader/pinned_buffer_pool.py +39 -0
- unifiedefficientloader/pinned_transfer.py +100 -0
- unifiedefficientloader/tensor_utils.py +57 -0
- unifiedefficientloader/uel/__init__.py +4 -0
- unifiedefficientloader/uel/control.py +67 -0
- unifiedefficientloader/uel/host_buffer.py +28 -0
- unifiedefficientloader/uel/model_mmap.py +59 -0
- unifiedefficientloader/uel/model_vbar.py +147 -0
- unifiedefficientloader/uel/torch.py +60 -0
- unifiedefficientloader/uel/uel.dll +0 -0
- unifiedefficientloader-0.3.2.dist-info/LICENSE +21 -0
- unifiedefficientloader-0.3.2.dist-info/METADATA +156 -0
- unifiedefficientloader-0.3.2.dist-info/RECORD +19 -0
- unifiedefficientloader-0.3.2.dist-info/WHEEL +5 -0
- unifiedefficientloader-0.3.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
|
|
3
|
+
def check_dependencies(*packages):
|
|
4
|
+
"""
|
|
5
|
+
Check if required packages are installed.
|
|
6
|
+
Throws a descriptive error if not.
|
|
7
|
+
"""
|
|
8
|
+
missing = []
|
|
9
|
+
for pkg in packages:
|
|
10
|
+
if importlib.util.find_spec(pkg) is None:
|
|
11
|
+
missing.append(pkg)
|
|
12
|
+
|
|
13
|
+
if missing:
|
|
14
|
+
missing_str = ", ".join(missing)
|
|
15
|
+
raise ImportError(
|
|
16
|
+
f"Missing required packages for unifiedefficientloader: {missing_str}. "
|
|
17
|
+
f"Please install them using: pip install {missing_str}"
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Pre-check torch as it is the foundation of most of these tools
|
|
21
|
+
check_dependencies("torch")
|
|
22
|
+
|
|
23
|
+
from .memory_efficient_loader import UnifiedSafetensorsLoader, MemoryEfficientSafeOpen
|
|
24
|
+
from .tensor_utils import dict_to_tensor, tensor_to_dict
|
|
25
|
+
from .pinned_transfer import transfer_to_gpu_pinned, set_verbose, get_pinned_transfer_stats, reset_pinned_transfer_stats
|
|
26
|
+
from .gpu_buffer_pool import GpuBufferPool
|
|
27
|
+
from .pinned_buffer_pool import PinnedBufferPool
|
|
28
|
+
from .logging_utils import (
|
|
29
|
+
setup_logging,
|
|
30
|
+
MINIMAL_LEVEL,
|
|
31
|
+
NORMAL_LEVEL,
|
|
32
|
+
VERBOSE_LEVEL,
|
|
33
|
+
DEBUG_LEVEL,
|
|
34
|
+
debug,
|
|
35
|
+
verbose,
|
|
36
|
+
normal,
|
|
37
|
+
info,
|
|
38
|
+
minimal,
|
|
39
|
+
warning,
|
|
40
|
+
error
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
__all__ = [
|
|
44
|
+
"UnifiedSafetensorsLoader",
|
|
45
|
+
"MemoryEfficientSafeOpen",
|
|
46
|
+
"dict_to_tensor",
|
|
47
|
+
"tensor_to_dict",
|
|
48
|
+
"transfer_to_gpu_pinned",
|
|
49
|
+
"set_verbose",
|
|
50
|
+
"get_pinned_transfer_stats",
|
|
51
|
+
"reset_pinned_transfer_stats",
|
|
52
|
+
"GpuBufferPool",
|
|
53
|
+
"PinnedBufferPool",
|
|
54
|
+
"setup_logging",
|
|
55
|
+
"MINIMAL_LEVEL",
|
|
56
|
+
"NORMAL_LEVEL",
|
|
57
|
+
"VERBOSE_LEVEL",
|
|
58
|
+
"DEBUG_LEVEL",
|
|
59
|
+
"debug",
|
|
60
|
+
"verbose",
|
|
61
|
+
"normal",
|
|
62
|
+
"info",
|
|
63
|
+
"minimal",
|
|
64
|
+
"warning",
|
|
65
|
+
"error",
|
|
66
|
+
]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GPU memory buffer pool for direct-to-GPU streaming.
|
|
3
|
+
|
|
4
|
+
Maintains a pool of pre-allocated GPU tensors to avoid allocation overhead
|
|
5
|
+
and ensure strictly bounded VRAM usage during streaming.
|
|
6
|
+
"""
|
|
7
|
+
from typing import Tuple, Optional
|
|
8
|
+
import torch
|
|
9
|
+
from . import logging_utils
|
|
10
|
+
|
|
11
|
+
logger = logging_utils.get_logger(__name__)
|
|
12
|
+
|
|
13
|
+
class GpuBufferPool:
|
|
14
|
+
"""Manages a pool of fixed-size GPU memory buffers."""
|
|
15
|
+
def __init__(self, size_bytes: int, num_buffers: int, device: str = "cuda"):
|
|
16
|
+
import torch
|
|
17
|
+
import queue
|
|
18
|
+
self.device = device
|
|
19
|
+
self.size_bytes = size_bytes
|
|
20
|
+
self.num_buffers = num_buffers
|
|
21
|
+
|
|
22
|
+
logging_utils.verbose(f"Initializing GpuBufferPool: {num_buffers} buffers of {size_bytes / (1024**2):.2f} MB each on {device}.")
|
|
23
|
+
|
|
24
|
+
self.buffers = []
|
|
25
|
+
for _ in range(num_buffers):
|
|
26
|
+
buf = torch.empty(size_bytes, dtype=torch.uint8, device=device)
|
|
27
|
+
self.buffers.append(buf)
|
|
28
|
+
|
|
29
|
+
self.free_queue = queue.Queue()
|
|
30
|
+
for i in range(num_buffers):
|
|
31
|
+
self.free_queue.put(i)
|
|
32
|
+
|
|
33
|
+
def acquire(self) -> Tuple[int, 'torch.Tensor']:
|
|
34
|
+
"""Acquire a free buffer. Blocks if empty."""
|
|
35
|
+
idx = self.free_queue.get()
|
|
36
|
+
return idx, self.buffers[idx]
|
|
37
|
+
|
|
38
|
+
def release(self, idx: int):
|
|
39
|
+
"""Release buffer back to pool."""
|
|
40
|
+
self.free_queue.put(idx)
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
import functools
|
|
4
|
+
|
|
5
|
+
# Custom Levels
|
|
6
|
+
# MINIMAL (30): WARNING+ (Reduced)
|
|
7
|
+
# NORMAL (20): INFO+ (Default)
|
|
8
|
+
# VERBOSE (15): Custom+ (Increased)
|
|
9
|
+
# DEBUG (10): DEBUG+ (Every function call)
|
|
10
|
+
|
|
11
|
+
MINIMAL_LEVEL = 30 # Use logging.WARNING
|
|
12
|
+
NORMAL_LEVEL = 20 # Use logging.INFO
|
|
13
|
+
VERBOSE_LEVEL = 15 # Custom level between INFO and DEBUG
|
|
14
|
+
DEBUG_LEVEL = 10 # logging.DEBUG
|
|
15
|
+
|
|
16
|
+
logging.addLevelName(VERBOSE_LEVEL, "VERBOSE")
|
|
17
|
+
logging.addLevelName(MINIMAL_LEVEL, "MINIMAL")
|
|
18
|
+
|
|
19
|
+
class CustomFormatter(logging.Formatter):
|
|
20
|
+
def format(self, record):
|
|
21
|
+
# Save original format to restore it later
|
|
22
|
+
orig_fmt = self._style._fmt
|
|
23
|
+
|
|
24
|
+
if record.levelno <= DEBUG_LEVEL:
|
|
25
|
+
# Debug: Full trace info
|
|
26
|
+
self._style._fmt = "[%(levelname)s] %(name)s:%(lineno)d - %(message)s"
|
|
27
|
+
elif record.levelno <= VERBOSE_LEVEL:
|
|
28
|
+
# Verbose: Detail
|
|
29
|
+
self._style._fmt = "[%(levelname)s] %(message)s"
|
|
30
|
+
elif record.levelno <= NORMAL_LEVEL:
|
|
31
|
+
# Normal: Standard output
|
|
32
|
+
self._style._fmt = "%(message)s"
|
|
33
|
+
else:
|
|
34
|
+
# Minimal/Warning
|
|
35
|
+
self._style._fmt = "[%(levelname)s] %(message)s"
|
|
36
|
+
|
|
37
|
+
result = super().format(record)
|
|
38
|
+
|
|
39
|
+
# Restore original format
|
|
40
|
+
self._style._fmt = orig_fmt
|
|
41
|
+
return result
|
|
42
|
+
|
|
43
|
+
def setup_logging(verbose_arg: str = "NORMAL"):
|
|
44
|
+
"""
|
|
45
|
+
Setup logging based on verbosity name.
|
|
46
|
+
"""
|
|
47
|
+
level_map = {
|
|
48
|
+
"DEBUG": DEBUG_LEVEL,
|
|
49
|
+
"VERBOSE": VERBOSE_LEVEL,
|
|
50
|
+
"NORMAL": NORMAL_LEVEL,
|
|
51
|
+
"MINIMAL": MINIMAL_LEVEL
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
level = level_map.get(verbose_arg.upper(), NORMAL_LEVEL)
|
|
55
|
+
|
|
56
|
+
logger = logging.getLogger("unifiedefficientloader")
|
|
57
|
+
logger.setLevel(level)
|
|
58
|
+
|
|
59
|
+
# Clear existing handlers to prevent duplicates
|
|
60
|
+
if logger.handlers:
|
|
61
|
+
logger.handlers.clear()
|
|
62
|
+
|
|
63
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
64
|
+
handler.setFormatter(CustomFormatter())
|
|
65
|
+
logger.addHandler(handler)
|
|
66
|
+
|
|
67
|
+
return logger
|
|
68
|
+
|
|
69
|
+
def get_logger(name=None):
|
|
70
|
+
if name:
|
|
71
|
+
if not name.startswith("unifiedefficientloader"):
|
|
72
|
+
name = f"unifiedefficientloader.{name}"
|
|
73
|
+
return logging.getLogger(name)
|
|
74
|
+
return logging.getLogger("unifiedefficientloader")
|
|
75
|
+
|
|
76
|
+
# Decorator for DEBUG level tracing
|
|
77
|
+
def log_debug(func):
|
|
78
|
+
"""Decorator to log function entry/exit with args (DEBUG level only)."""
|
|
79
|
+
@functools.wraps(func)
|
|
80
|
+
def wrapper(*args, **kwargs):
|
|
81
|
+
# We only want to construct the string if debug is enabled to save perf
|
|
82
|
+
logger = get_logger(func.__module__.split('.')[-1])
|
|
83
|
+
if logger.isEnabledFor(DEBUG_LEVEL):
|
|
84
|
+
arg_str = ", ".join([repr(a) for a in args])
|
|
85
|
+
kw_str = ", ".join([f"{k}={v!r}" for k, v in kwargs.items()])
|
|
86
|
+
all_args = ", ".join(filter(None, [arg_str, kw_str]))
|
|
87
|
+
logger.log(DEBUG_LEVEL, f"CALL {func.__name__}({all_args})")
|
|
88
|
+
|
|
89
|
+
result = func(*args, **kwargs)
|
|
90
|
+
|
|
91
|
+
if logger.isEnabledFor(DEBUG_LEVEL):
|
|
92
|
+
logger.log(DEBUG_LEVEL, f"RET {func.__name__} -> {type(result)}")
|
|
93
|
+
return result
|
|
94
|
+
return wrapper
|
|
95
|
+
|
|
96
|
+
# Convenience wrappers
|
|
97
|
+
def debug(msg, *args, **kwargs):
|
|
98
|
+
get_logger().log(DEBUG_LEVEL, msg, *args, **kwargs)
|
|
99
|
+
|
|
100
|
+
def verbose(msg, *args, **kwargs):
|
|
101
|
+
get_logger().log(VERBOSE_LEVEL, msg, *args, **kwargs)
|
|
102
|
+
|
|
103
|
+
def normal(msg, *args, **kwargs):
|
|
104
|
+
get_logger().log(NORMAL_LEVEL, msg, *args, **kwargs)
|
|
105
|
+
|
|
106
|
+
def info(msg, *args, **kwargs):
|
|
107
|
+
"""Alias for normal/INFO level logging."""
|
|
108
|
+
normal(msg, *args, **kwargs)
|
|
109
|
+
|
|
110
|
+
def minimal(msg, *args, **kwargs):
|
|
111
|
+
get_logger().log(MINIMAL_LEVEL, msg, *args, **kwargs)
|
|
112
|
+
|
|
113
|
+
def warning(msg, *args, **kwargs):
|
|
114
|
+
get_logger().warning(msg, *args, **kwargs)
|
|
115
|
+
|
|
116
|
+
def error(msg, *args, **kwargs):
|
|
117
|
+
get_logger().error(msg, *args, **kwargs)
|