stormlog 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gpumemprof/__init__.py +150 -0
- gpumemprof/_version.py +34 -0
- gpumemprof/analyzer.py +895 -0
- gpumemprof/cli.py +967 -0
- gpumemprof/collective_attribution.py +603 -0
- gpumemprof/context_profiler.py +349 -0
- gpumemprof/cpu_profiler.py +472 -0
- gpumemprof/device_collectors.py +244 -0
- gpumemprof/diagnose.py +310 -0
- gpumemprof/distributed_analysis.py +481 -0
- gpumemprof/gap_analysis.py +234 -0
- gpumemprof/oom_flight_recorder.py +226 -0
- gpumemprof/profiler.py +493 -0
- gpumemprof/telemetry.py +814 -0
- gpumemprof/tracker.py +882 -0
- gpumemprof/tui/__init__.py +21 -0
- gpumemprof/tui/app.py +1617 -0
- gpumemprof/tui/builders.py +256 -0
- gpumemprof/tui/commands.py +73 -0
- gpumemprof/tui/distributed_diagnostics.py +896 -0
- gpumemprof/tui/monitor.py +358 -0
- gpumemprof/tui/profiles.py +126 -0
- gpumemprof/tui/styles.py +275 -0
- gpumemprof/tui/widgets/__init__.py +26 -0
- gpumemprof/tui/widgets/panels.py +21 -0
- gpumemprof/tui/widgets/tables.py +218 -0
- gpumemprof/tui/widgets/timeline.py +168 -0
- gpumemprof/tui/widgets/welcome.py +73 -0
- gpumemprof/tui/workloads.py +84 -0
- gpumemprof/utils.py +545 -0
- gpumemprof/visualizer.py +824 -0
- stormlog-0.2.3.dist-info/METADATA +368 -0
- stormlog-0.2.3.dist-info/RECORD +47 -0
- stormlog-0.2.3.dist-info/WHEEL +5 -0
- stormlog-0.2.3.dist-info/entry_points.txt +4 -0
- stormlog-0.2.3.dist-info/licenses/LICENSE +21 -0
- stormlog-0.2.3.dist-info/top_level.txt +2 -0
- tfmemprof/__init__.py +28 -0
- tfmemprof/analyzer.py +452 -0
- tfmemprof/cli.py +644 -0
- tfmemprof/context_profiler.py +369 -0
- tfmemprof/diagnose.py +282 -0
- tfmemprof/profiler.py +466 -0
- tfmemprof/tf_env.py +8 -0
- tfmemprof/tracker.py +505 -0
- tfmemprof/utils.py +600 -0
- tfmemprof/visualizer.py +329 -0
gpumemprof/__init__.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Stormlog - A comprehensive memory profiling tool for PyTorch."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
__version__ = "0.2.0"
|
|
9
|
+
__author__ = "Stormlog Team"
|
|
10
|
+
|
|
11
|
+
_TORCH_INSTALL_GUIDANCE = (
|
|
12
|
+
"PyTorch is required for this feature. Install with "
|
|
13
|
+
"`pip install 'stormlog[torch]'` "
|
|
14
|
+
"or follow https://pytorch.org/get-started/locally/."
|
|
15
|
+
)
|
|
16
|
+
_VIZ_INSTALL_GUIDANCE = (
|
|
17
|
+
"MemoryVisualizer requires optional visualization dependencies. "
|
|
18
|
+
"Install with `pip install stormlog[viz]`."
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
_SYMBOL_TO_MODULE = {
|
|
22
|
+
"GPUMemoryProfiler": (".profiler", "GPUMemoryProfiler"),
|
|
23
|
+
"MemorySnapshot": (".profiler", "MemorySnapshot"),
|
|
24
|
+
"ProfileResult": (".profiler", "ProfileResult"),
|
|
25
|
+
"profile_context": (".context_profiler", "profile_context"),
|
|
26
|
+
"profile_function": (".context_profiler", "profile_function"),
|
|
27
|
+
"MemoryAnalyzer": (".analyzer", "MemoryAnalyzer"),
|
|
28
|
+
"GapFinding": (".analyzer", "GapFinding"),
|
|
29
|
+
"MemoryTracker": (".tracker", "MemoryTracker"),
|
|
30
|
+
"OOMFlightRecorder": (".oom_flight_recorder", "OOMFlightRecorder"),
|
|
31
|
+
"OOMFlightRecorderConfig": (".oom_flight_recorder", "OOMFlightRecorderConfig"),
|
|
32
|
+
"OOMExceptionClassification": (
|
|
33
|
+
".oom_flight_recorder",
|
|
34
|
+
"OOMExceptionClassification",
|
|
35
|
+
),
|
|
36
|
+
"classify_oom_exception": (".oom_flight_recorder", "classify_oom_exception"),
|
|
37
|
+
"TelemetryEventV2": (".telemetry", "TelemetryEventV2"),
|
|
38
|
+
"DeviceMemoryCollector": (".device_collectors", "DeviceMemoryCollector"),
|
|
39
|
+
"DeviceMemorySample": (".device_collectors", "DeviceMemorySample"),
|
|
40
|
+
"build_device_memory_collector": (
|
|
41
|
+
".device_collectors",
|
|
42
|
+
"build_device_memory_collector",
|
|
43
|
+
),
|
|
44
|
+
"detect_torch_runtime_backend": (
|
|
45
|
+
".device_collectors",
|
|
46
|
+
"detect_torch_runtime_backend",
|
|
47
|
+
),
|
|
48
|
+
"CPUMemoryProfiler": (".cpu_profiler", "CPUMemoryProfiler"),
|
|
49
|
+
"CPUMemoryTracker": (".cpu_profiler", "CPUMemoryTracker"),
|
|
50
|
+
"telemetry_event_from_record": (".telemetry", "telemetry_event_from_record"),
|
|
51
|
+
"telemetry_event_to_dict": (".telemetry", "telemetry_event_to_dict"),
|
|
52
|
+
"validate_telemetry_record": (".telemetry", "validate_telemetry_record"),
|
|
53
|
+
"load_telemetry_events": (".telemetry", "load_telemetry_events"),
|
|
54
|
+
"resolve_distributed_identity": (".telemetry", "resolve_distributed_identity"),
|
|
55
|
+
"get_gpu_info": (".utils", "get_gpu_info"),
|
|
56
|
+
"format_bytes": (".utils", "format_bytes"),
|
|
57
|
+
"convert_bytes": (".utils", "convert_bytes"),
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _is_torch_missing(exc: BaseException) -> bool:
|
|
62
|
+
current: BaseException | None = exc
|
|
63
|
+
visited: set[int] = set()
|
|
64
|
+
while current is not None and id(current) not in visited:
|
|
65
|
+
visited.add(id(current))
|
|
66
|
+
if isinstance(current, ModuleNotFoundError) and current.name == "torch":
|
|
67
|
+
return True
|
|
68
|
+
next_exc = current.__cause__
|
|
69
|
+
if next_exc is None and not current.__suppress_context__:
|
|
70
|
+
next_exc = current.__context__
|
|
71
|
+
current = next_exc
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _resolve_symbol(name: str) -> Any:
|
|
76
|
+
module_name, symbol_name = _SYMBOL_TO_MODULE[name]
|
|
77
|
+
try:
|
|
78
|
+
module = importlib.import_module(module_name, __name__)
|
|
79
|
+
except Exception as exc:
|
|
80
|
+
if _is_torch_missing(exc):
|
|
81
|
+
raise ImportError(_TORCH_INSTALL_GUIDANCE) from exc
|
|
82
|
+
raise
|
|
83
|
+
|
|
84
|
+
value = getattr(module, symbol_name)
|
|
85
|
+
globals()[name] = value
|
|
86
|
+
return value
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _resolve_memory_visualizer() -> Any:
|
|
90
|
+
try:
|
|
91
|
+
module = importlib.import_module(".visualizer", __name__)
|
|
92
|
+
value = getattr(module, "MemoryVisualizer")
|
|
93
|
+
except ImportError as exc:
|
|
94
|
+
if _is_torch_missing(exc):
|
|
95
|
+
raise ImportError(_TORCH_INSTALL_GUIDANCE) from exc
|
|
96
|
+
import_error = exc
|
|
97
|
+
|
|
98
|
+
class MemoryVisualizer:
|
|
99
|
+
"""Fallback placeholder when optional visualization dependencies are missing."""
|
|
100
|
+
|
|
101
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
102
|
+
raise ImportError(_VIZ_INSTALL_GUIDANCE) from import_error
|
|
103
|
+
|
|
104
|
+
value = MemoryVisualizer
|
|
105
|
+
globals()["MemoryVisualizer"] = value
|
|
106
|
+
return value
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def __getattr__(name: str) -> Any:
|
|
110
|
+
if name == "MemoryVisualizer":
|
|
111
|
+
return _resolve_memory_visualizer()
|
|
112
|
+
if name in _SYMBOL_TO_MODULE:
|
|
113
|
+
return _resolve_symbol(name)
|
|
114
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def __dir__() -> list[str]:
|
|
118
|
+
return sorted(list(globals().keys()) + __all__)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
__all__ = [
|
|
122
|
+
"GPUMemoryProfiler",
|
|
123
|
+
"MemorySnapshot",
|
|
124
|
+
"ProfileResult",
|
|
125
|
+
"profile_context",
|
|
126
|
+
"profile_function",
|
|
127
|
+
"MemoryVisualizer",
|
|
128
|
+
"MemoryAnalyzer",
|
|
129
|
+
"GapFinding",
|
|
130
|
+
"MemoryTracker",
|
|
131
|
+
"OOMFlightRecorder",
|
|
132
|
+
"OOMFlightRecorderConfig",
|
|
133
|
+
"OOMExceptionClassification",
|
|
134
|
+
"classify_oom_exception",
|
|
135
|
+
"TelemetryEventV2",
|
|
136
|
+
"DeviceMemoryCollector",
|
|
137
|
+
"DeviceMemorySample",
|
|
138
|
+
"build_device_memory_collector",
|
|
139
|
+
"detect_torch_runtime_backend",
|
|
140
|
+
"CPUMemoryProfiler",
|
|
141
|
+
"CPUMemoryTracker",
|
|
142
|
+
"telemetry_event_from_record",
|
|
143
|
+
"telemetry_event_to_dict",
|
|
144
|
+
"validate_telemetry_record",
|
|
145
|
+
"load_telemetry_events",
|
|
146
|
+
"resolve_distributed_identity",
|
|
147
|
+
"get_gpu_info",
|
|
148
|
+
"format_bytes",
|
|
149
|
+
"convert_bytes",
|
|
150
|
+
]
|
gpumemprof/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.2.3'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 2, 3)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|