torch-memory-saver 0.0.1__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {torch_memory_saver-0.0.1/torch_memory_saver.egg-info → torch_memory_saver-0.0.3}/PKG-INFO +1 -1
- torch_memory_saver-0.0.3/README.md +29 -0
- torch_memory_saver-0.0.3/setup.py +19 -0
- {torch_memory_saver-0.0.1 → torch_memory_saver-0.0.3}/torch_memory_saver/__init__.py +42 -20
- {torch_memory_saver-0.0.1 → torch_memory_saver-0.0.3/torch_memory_saver.egg-info}/PKG-INFO +1 -1
- torch_memory_saver-0.0.1/README.md +0 -11
- torch_memory_saver-0.0.1/setup.py +0 -19
- {torch_memory_saver-0.0.1 → torch_memory_saver-0.0.3}/LICENSE +0 -0
- {torch_memory_saver-0.0.1 → torch_memory_saver-0.0.3}/csrc/torch_memory_saver.cpp +0 -0
- {torch_memory_saver-0.0.1 → torch_memory_saver-0.0.3}/setup.cfg +0 -0
- {torch_memory_saver-0.0.1 → torch_memory_saver-0.0.3}/torch_memory_saver.egg-info/SOURCES.txt +0 -0
- {torch_memory_saver-0.0.1 → torch_memory_saver-0.0.3}/torch_memory_saver.egg-info/dependency_links.txt +0 -0
- {torch_memory_saver-0.0.1 → torch_memory_saver-0.0.3}/torch_memory_saver.egg-info/top_level.txt +0 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
# torch_memory_saver
|
2
|
+
|
3
|
+
Allow torch tensor memory to be released and resumed later.
|
4
|
+
|
5
|
+
API:
|
6
|
+
|
7
|
+
```python
|
8
|
+
memory_saver = TorchMemorySaver()
|
9
|
+
|
10
|
+
# 1. For tensors that wants to be paused, create them within `region`
|
11
|
+
with memory_saver.region():
|
12
|
+
x = torch.full((1_000_000_000,), 100, dtype=torch.uint8, device='cuda')
|
13
|
+
|
14
|
+
# 2. After `pause`, CUDA memory is released for those tensors.
|
15
|
+
# For example, check `nvidia-smi`'s memory usage to verify.
|
16
|
+
memory_saver.pause()
|
17
|
+
|
18
|
+
# 3. After `resume`, CUDA memory is re-occupied for those tensors.
|
19
|
+
memory_saver.resume()
|
20
|
+
```
|
21
|
+
|
22
|
+
Please refer to https://github.com/sgl-project/sglang/issues/2542#issuecomment-2563641647 for details.
|
23
|
+
|
24
|
+
TODO:
|
25
|
+
|
26
|
+
- [x] Implementation
|
27
|
+
- [x] Publish to pypi
|
28
|
+
- [ ] More tests and infra
|
29
|
+
- [ ] Documentation
|
@@ -0,0 +1,19 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
import setuptools
|
4
|
+
from setuptools import setup
|
5
|
+
|
6
|
+
logger = logging.getLogger(__name__)
|
7
|
+
|
8
|
+
setup(
|
9
|
+
name='torch_memory_saver',
|
10
|
+
version='0.0.3',
|
11
|
+
ext_modules=[setuptools.Extension(
|
12
|
+
'torch_memory_saver_cpp',
|
13
|
+
['csrc/torch_memory_saver.cpp'],
|
14
|
+
extra_compile_args=['-I/usr/local/cuda/include'],
|
15
|
+
extra_link_args=['-lcuda'],
|
16
|
+
)],
|
17
|
+
python_requires=">=3.9",
|
18
|
+
packages=['torch_memory_saver'],
|
19
|
+
)
|
@@ -2,6 +2,7 @@ import ctypes
|
|
2
2
|
import logging
|
3
3
|
import os
|
4
4
|
from contextlib import contextmanager
|
5
|
+
from dataclasses import dataclass
|
5
6
|
from pathlib import Path
|
6
7
|
from typing import Optional
|
7
8
|
|
@@ -18,31 +19,58 @@ class TorchMemorySaver:
|
|
18
19
|
|
19
20
|
@contextmanager
|
20
21
|
def region(self):
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
22
|
+
if _global_info.binary_info.enabled:
|
23
|
+
with torch.cuda.use_mem_pool(self._mem_pool):
|
24
|
+
_global_info.binary_info.cdll.tms_region_enter()
|
25
|
+
try:
|
26
|
+
yield
|
27
|
+
finally:
|
28
|
+
_global_info.binary_info.cdll.tms_region_leave()
|
29
|
+
else:
|
30
|
+
yield
|
27
31
|
|
28
32
|
def pause(self):
|
29
|
-
_global_info.
|
33
|
+
if _global_info.binary_info.enabled:
|
34
|
+
_global_info.binary_info.cdll.tms_pause()
|
30
35
|
|
31
36
|
def resume(self):
|
32
|
-
_global_info.
|
37
|
+
if _global_info.binary_info.enabled:
|
38
|
+
_global_info.binary_info.cdll.tms_resume()
|
39
|
+
|
40
|
+
@property
|
41
|
+
def enabled(self):
|
42
|
+
return _global_info.binary_info.enabled
|
43
|
+
|
44
|
+
|
45
|
+
@dataclass
|
46
|
+
class _BinaryInfo:
|
47
|
+
cdll: Optional[ctypes.CDLL]
|
48
|
+
|
49
|
+
@property
|
50
|
+
def enabled(self):
|
51
|
+
return self.cdll is not None
|
52
|
+
|
53
|
+
@staticmethod
|
54
|
+
def compute():
|
55
|
+
env_ld_preload = os.environ.get('LD_PRELOAD', '')
|
56
|
+
if 'torch_memory_saver' in env_ld_preload:
|
57
|
+
return _BinaryInfo(cdll=ctypes.CDLL(env_ld_preload))
|
58
|
+
else:
|
59
|
+
logger.warning(
|
60
|
+
f'TorchMemorySaver is disabled for the current process because invalid LD_PRELOAD="{env_ld_preload}" (process_id={os.getpid()})')
|
61
|
+
return _BinaryInfo(cdll=None)
|
33
62
|
|
34
63
|
|
35
64
|
class _GlobalInfo:
|
36
65
|
def __init__(self):
|
37
|
-
self.
|
66
|
+
self._binary_info: Optional[_BinaryInfo] = None
|
38
67
|
self._last_id = 0
|
39
68
|
|
40
69
|
@property
|
41
|
-
def
|
42
|
-
if self.
|
43
|
-
self.
|
44
|
-
|
45
|
-
return self._cdll
|
70
|
+
def binary_info(self):
|
71
|
+
if self._binary_info is None:
|
72
|
+
self._binary_info = _BinaryInfo.compute()
|
73
|
+
return self._binary_info
|
46
74
|
|
47
75
|
def next_id(self):
|
48
76
|
self._last_id += 1
|
@@ -52,12 +80,6 @@ class _GlobalInfo:
|
|
52
80
|
_global_info = _GlobalInfo()
|
53
81
|
|
54
82
|
|
55
|
-
def _compute_cdll():
|
56
|
-
env_ld_preload = os.environ.get('LD_PRELOAD', '')
|
57
|
-
assert 'torch_memory_saver' in env_ld_preload, f'Please specify correct LD_PRELOAD (currently: {env_ld_preload})'
|
58
|
-
return ctypes.CDLL(env_ld_preload)
|
59
|
-
|
60
|
-
|
61
83
|
def get_binary_path():
|
62
84
|
dir_package = Path(__file__).parent
|
63
85
|
candidates = [
|
@@ -1,11 +0,0 @@
|
|
1
|
-
# torch_memory_saver
|
2
|
-
|
3
|
-
Allow torch tensor memory to be released and resumed later
|
4
|
-
|
5
|
-
Please refer to https://github.com/sgl-project/sglang/issues/2542#issuecomment-2563641647 for details.
|
6
|
-
|
7
|
-
TODO:
|
8
|
-
|
9
|
-
- [x] Implementation
|
10
|
-
- [ ] More tests and infra
|
11
|
-
- [ ] Publish to pypi
|
@@ -1,19 +0,0 @@
|
|
1
|
-
from setuptools import setup
|
2
|
-
from torch.utils import cpp_extension
|
3
|
-
|
4
|
-
ext_module = cpp_extension.CppExtension(
|
5
|
-
'torch_memory_saver_cpp',
|
6
|
-
['csrc/torch_memory_saver.cpp'],
|
7
|
-
extra_compile_args=['-I/usr/local/cuda/include'],
|
8
|
-
extra_link_args=['-lcuda'],
|
9
|
-
)
|
10
|
-
|
11
|
-
setup(
|
12
|
-
name='torch_memory_saver',
|
13
|
-
version='0.0.1',
|
14
|
-
# https://pytorch.org/tutorials/advanced/cpp_extension.html#writing-a-c-extension
|
15
|
-
ext_modules=[ext_module],
|
16
|
-
cmdclass={'build_ext': cpp_extension.BuildExtension},
|
17
|
-
python_requires=">=3.9",
|
18
|
-
packages=['torch_memory_saver'],
|
19
|
-
)
|
File without changes
|
File without changes
|
File without changes
|
{torch_memory_saver-0.0.1 → torch_memory_saver-0.0.3}/torch_memory_saver.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
{torch_memory_saver-0.0.1 → torch_memory_saver-0.0.3}/torch_memory_saver.egg-info/top_level.txt
RENAMED
File without changes
|