torch-memory-saver 0.0.1__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: torch_memory_saver
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Requires-Python: >=3.9
5
5
  License-File: LICENSE
@@ -0,0 +1,29 @@
1
+ # torch_memory_saver
2
+
3
+ Allow torch tensor memory to be released and resumed later.
4
+
5
+ API:
6
+
7
+ ```python
8
+ memory_saver = TorchMemorySaver()
9
+
10
+ # 1. For tensors that wants to be paused, create them within `region`
11
+ with memory_saver.region():
12
+ x = torch.full((1_000_000_000,), 100, dtype=torch.uint8, device='cuda')
13
+
14
+ # 2. After `pause`, CUDA memory is released for those tensors.
15
+ # For example, check `nvidia-smi`'s memory usage to verify.
16
+ memory_saver.pause()
17
+
18
+ # 3. After `resume`, CUDA memory is re-occupied for those tensors.
19
+ memory_saver.resume()
20
+ ```
21
+
22
+ Please refer to https://github.com/sgl-project/sglang/issues/2542#issuecomment-2563641647 for details.
23
+
24
+ TODO:
25
+
26
+ - [x] Implementation
27
+ - [x] Publish to pypi
28
+ - [ ] More tests and infra
29
+ - [ ] Documentation
@@ -0,0 +1,19 @@
1
+ import logging
2
+
3
+ import setuptools
4
+ from setuptools import setup
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ setup(
9
+ name='torch_memory_saver',
10
+ version='0.0.2',
11
+ ext_modules=[setuptools.Extension(
12
+ 'torch_memory_saver_cpp',
13
+ ['csrc/torch_memory_saver.cpp'],
14
+ extra_compile_args=['-I/usr/local/cuda/include'],
15
+ extra_link_args=['-lcuda'],
16
+ )],
17
+ python_requires=">=3.9",
18
+ packages=['torch_memory_saver'],
19
+ )
@@ -2,6 +2,7 @@ import ctypes
2
2
  import logging
3
3
  import os
4
4
  from contextlib import contextmanager
5
+ from dataclasses import dataclass
5
6
  from pathlib import Path
6
7
  from typing import Optional
7
8
 
@@ -18,31 +19,54 @@ class TorchMemorySaver:
18
19
 
19
20
  @contextmanager
20
21
  def region(self):
21
- with torch.cuda.use_mem_pool(self._mem_pool):
22
- _global_info.cdll.tms_region_enter()
23
- try:
24
- yield
25
- finally:
26
- _global_info.cdll.tms_region_leave()
22
+ if _global_info.binary_info.enabled:
23
+ with torch.cuda.use_mem_pool(self._mem_pool):
24
+ _global_info.binary_info.cdll.tms_region_enter()
25
+ try:
26
+ yield
27
+ finally:
28
+ _global_info.binary_info.cdll.tms_region_leave()
29
+ else:
30
+ yield
27
31
 
28
32
  def pause(self):
29
- _global_info.cdll.tms_pause()
33
+ if _global_info.binary_info.enabled:
34
+ _global_info.binary_info.cdll.tms_pause()
30
35
 
31
36
  def resume(self):
32
- _global_info.cdll.tms_resume()
37
+ if _global_info.binary_info.enabled:
38
+ _global_info.binary_info.cdll.tms_resume()
39
+
40
+
41
+ @dataclass
42
+ class _BinaryInfo:
43
+ cdll: Optional[ctypes.CDLL]
44
+
45
+ @property
46
+ def enabled(self):
47
+ return self.cdll is not None
48
+
49
+ @staticmethod
50
+ def compute():
51
+ env_ld_preload = os.environ.get('LD_PRELOAD', '')
52
+ if 'torch_memory_saver' in env_ld_preload:
53
+ return _BinaryInfo(cdll=ctypes.CDLL(env_ld_preload))
54
+ else:
55
+ logger.warning(
56
+ f'TorchMemorySaver is disabled for the current process because invalid LD_PRELOAD="{env_ld_preload}" (process_id={os.getpid()})')
57
+ return _BinaryInfo(cdll=None)
33
58
 
34
59
 
35
60
  class _GlobalInfo:
36
61
  def __init__(self):
37
- self._cdll: Optional[ctypes.CDLL] = None
62
+ self._binary_info: Optional[_BinaryInfo] = None
38
63
  self._last_id = 0
39
64
 
40
65
  @property
41
- def cdll(self):
42
- if self._cdll is None:
43
- self._cdll = _compute_cdll()
44
- logger.debug(f'Use cdll={self._cdll}')
45
- return self._cdll
66
+ def binary_info(self):
67
+ if self._binary_info is None:
68
+ self._binary_info = _BinaryInfo.compute()
69
+ return self._binary_info
46
70
 
47
71
  def next_id(self):
48
72
  self._last_id += 1
@@ -52,12 +76,6 @@ class _GlobalInfo:
52
76
  _global_info = _GlobalInfo()
53
77
 
54
78
 
55
- def _compute_cdll():
56
- env_ld_preload = os.environ.get('LD_PRELOAD', '')
57
- assert 'torch_memory_saver' in env_ld_preload, f'Please specify correct LD_PRELOAD (currently: {env_ld_preload})'
58
- return ctypes.CDLL(env_ld_preload)
59
-
60
-
61
79
  def get_binary_path():
62
80
  dir_package = Path(__file__).parent
63
81
  candidates = [
@@ -1,5 +1,5 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: torch_memory_saver
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Requires-Python: >=3.9
5
5
  License-File: LICENSE
@@ -1,11 +0,0 @@
1
- # torch_memory_saver
2
-
3
- Allow torch tensor memory to be released and resumed later
4
-
5
- Please refer to https://github.com/sgl-project/sglang/issues/2542#issuecomment-2563641647 for details.
6
-
7
- TODO:
8
-
9
- - [x] Implementation
10
- - [ ] More tests and infra
11
- - [ ] Publish to pypi
@@ -1,19 +0,0 @@
1
- from setuptools import setup
2
- from torch.utils import cpp_extension
3
-
4
- ext_module = cpp_extension.CppExtension(
5
- 'torch_memory_saver_cpp',
6
- ['csrc/torch_memory_saver.cpp'],
7
- extra_compile_args=['-I/usr/local/cuda/include'],
8
- extra_link_args=['-lcuda'],
9
- )
10
-
11
- setup(
12
- name='torch_memory_saver',
13
- version='0.0.1',
14
- # https://pytorch.org/tutorials/advanced/cpp_extension.html#writing-a-c-extension
15
- ext_modules=[ext_module],
16
- cmdclass={'build_ext': cpp_extension.BuildExtension},
17
- python_requires=">=3.9",
18
- packages=['torch_memory_saver'],
19
- )