stormlog 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. gpumemprof/__init__.py +150 -0
  2. gpumemprof/_version.py +34 -0
  3. gpumemprof/analyzer.py +895 -0
  4. gpumemprof/cli.py +967 -0
  5. gpumemprof/collective_attribution.py +603 -0
  6. gpumemprof/context_profiler.py +349 -0
  7. gpumemprof/cpu_profiler.py +472 -0
  8. gpumemprof/device_collectors.py +244 -0
  9. gpumemprof/diagnose.py +310 -0
  10. gpumemprof/distributed_analysis.py +481 -0
  11. gpumemprof/gap_analysis.py +234 -0
  12. gpumemprof/oom_flight_recorder.py +226 -0
  13. gpumemprof/profiler.py +493 -0
  14. gpumemprof/telemetry.py +814 -0
  15. gpumemprof/tracker.py +882 -0
  16. gpumemprof/tui/__init__.py +21 -0
  17. gpumemprof/tui/app.py +1617 -0
  18. gpumemprof/tui/builders.py +256 -0
  19. gpumemprof/tui/commands.py +73 -0
  20. gpumemprof/tui/distributed_diagnostics.py +896 -0
  21. gpumemprof/tui/monitor.py +358 -0
  22. gpumemprof/tui/profiles.py +126 -0
  23. gpumemprof/tui/styles.py +275 -0
  24. gpumemprof/tui/widgets/__init__.py +26 -0
  25. gpumemprof/tui/widgets/panels.py +21 -0
  26. gpumemprof/tui/widgets/tables.py +218 -0
  27. gpumemprof/tui/widgets/timeline.py +168 -0
  28. gpumemprof/tui/widgets/welcome.py +73 -0
  29. gpumemprof/tui/workloads.py +84 -0
  30. gpumemprof/utils.py +545 -0
  31. gpumemprof/visualizer.py +824 -0
  32. stormlog-0.2.3.dist-info/METADATA +368 -0
  33. stormlog-0.2.3.dist-info/RECORD +47 -0
  34. stormlog-0.2.3.dist-info/WHEEL +5 -0
  35. stormlog-0.2.3.dist-info/entry_points.txt +4 -0
  36. stormlog-0.2.3.dist-info/licenses/LICENSE +21 -0
  37. stormlog-0.2.3.dist-info/top_level.txt +2 -0
  38. tfmemprof/__init__.py +28 -0
  39. tfmemprof/analyzer.py +452 -0
  40. tfmemprof/cli.py +644 -0
  41. tfmemprof/context_profiler.py +369 -0
  42. tfmemprof/diagnose.py +282 -0
  43. tfmemprof/profiler.py +466 -0
  44. tfmemprof/tf_env.py +8 -0
  45. tfmemprof/tracker.py +505 -0
  46. tfmemprof/utils.py +600 -0
  47. tfmemprof/visualizer.py +329 -0
gpumemprof/__init__.py ADDED
@@ -0,0 +1,150 @@
1
+ """Stormlog - A comprehensive memory profiling tool for PyTorch."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import importlib
6
+ from typing import Any
7
+
8
+ __version__ = "0.2.0"
9
+ __author__ = "Stormlog Team"
10
+
11
+ _TORCH_INSTALL_GUIDANCE = (
12
+ "PyTorch is required for this feature. Install with "
13
+ "`pip install 'stormlog[torch]'` "
14
+ "or follow https://pytorch.org/get-started/locally/."
15
+ )
16
+ _VIZ_INSTALL_GUIDANCE = (
17
+ "MemoryVisualizer requires optional visualization dependencies. "
18
+ "Install with `pip install stormlog[viz]`."
19
+ )
20
+
21
+ _SYMBOL_TO_MODULE = {
22
+ "GPUMemoryProfiler": (".profiler", "GPUMemoryProfiler"),
23
+ "MemorySnapshot": (".profiler", "MemorySnapshot"),
24
+ "ProfileResult": (".profiler", "ProfileResult"),
25
+ "profile_context": (".context_profiler", "profile_context"),
26
+ "profile_function": (".context_profiler", "profile_function"),
27
+ "MemoryAnalyzer": (".analyzer", "MemoryAnalyzer"),
28
+ "GapFinding": (".analyzer", "GapFinding"),
29
+ "MemoryTracker": (".tracker", "MemoryTracker"),
30
+ "OOMFlightRecorder": (".oom_flight_recorder", "OOMFlightRecorder"),
31
+ "OOMFlightRecorderConfig": (".oom_flight_recorder", "OOMFlightRecorderConfig"),
32
+ "OOMExceptionClassification": (
33
+ ".oom_flight_recorder",
34
+ "OOMExceptionClassification",
35
+ ),
36
+ "classify_oom_exception": (".oom_flight_recorder", "classify_oom_exception"),
37
+ "TelemetryEventV2": (".telemetry", "TelemetryEventV2"),
38
+ "DeviceMemoryCollector": (".device_collectors", "DeviceMemoryCollector"),
39
+ "DeviceMemorySample": (".device_collectors", "DeviceMemorySample"),
40
+ "build_device_memory_collector": (
41
+ ".device_collectors",
42
+ "build_device_memory_collector",
43
+ ),
44
+ "detect_torch_runtime_backend": (
45
+ ".device_collectors",
46
+ "detect_torch_runtime_backend",
47
+ ),
48
+ "CPUMemoryProfiler": (".cpu_profiler", "CPUMemoryProfiler"),
49
+ "CPUMemoryTracker": (".cpu_profiler", "CPUMemoryTracker"),
50
+ "telemetry_event_from_record": (".telemetry", "telemetry_event_from_record"),
51
+ "telemetry_event_to_dict": (".telemetry", "telemetry_event_to_dict"),
52
+ "validate_telemetry_record": (".telemetry", "validate_telemetry_record"),
53
+ "load_telemetry_events": (".telemetry", "load_telemetry_events"),
54
+ "resolve_distributed_identity": (".telemetry", "resolve_distributed_identity"),
55
+ "get_gpu_info": (".utils", "get_gpu_info"),
56
+ "format_bytes": (".utils", "format_bytes"),
57
+ "convert_bytes": (".utils", "convert_bytes"),
58
+ }
59
+
60
+
61
+ def _is_torch_missing(exc: BaseException) -> bool:
62
+ current: BaseException | None = exc
63
+ visited: set[int] = set()
64
+ while current is not None and id(current) not in visited:
65
+ visited.add(id(current))
66
+ if isinstance(current, ModuleNotFoundError) and current.name == "torch":
67
+ return True
68
+ next_exc = current.__cause__
69
+ if next_exc is None and not current.__suppress_context__:
70
+ next_exc = current.__context__
71
+ current = next_exc
72
+ return False
73
+
74
+
75
+ def _resolve_symbol(name: str) -> Any:
76
+ module_name, symbol_name = _SYMBOL_TO_MODULE[name]
77
+ try:
78
+ module = importlib.import_module(module_name, __name__)
79
+ except Exception as exc:
80
+ if _is_torch_missing(exc):
81
+ raise ImportError(_TORCH_INSTALL_GUIDANCE) from exc
82
+ raise
83
+
84
+ value = getattr(module, symbol_name)
85
+ globals()[name] = value
86
+ return value
87
+
88
+
89
+ def _resolve_memory_visualizer() -> Any:
90
+ try:
91
+ module = importlib.import_module(".visualizer", __name__)
92
+ value = getattr(module, "MemoryVisualizer")
93
+ except ImportError as exc:
94
+ if _is_torch_missing(exc):
95
+ raise ImportError(_TORCH_INSTALL_GUIDANCE) from exc
96
+ import_error = exc
97
+
98
+ class MemoryVisualizer:
99
+ """Fallback placeholder when optional visualization dependencies are missing."""
100
+
101
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
102
+ raise ImportError(_VIZ_INSTALL_GUIDANCE) from import_error
103
+
104
+ value = MemoryVisualizer
105
+ globals()["MemoryVisualizer"] = value
106
+ return value
107
+
108
+
109
+ def __getattr__(name: str) -> Any:
110
+ if name == "MemoryVisualizer":
111
+ return _resolve_memory_visualizer()
112
+ if name in _SYMBOL_TO_MODULE:
113
+ return _resolve_symbol(name)
114
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
115
+
116
+
117
+ def __dir__() -> list[str]:
118
+ return sorted(list(globals().keys()) + __all__)
119
+
120
+
121
+ __all__ = [
122
+ "GPUMemoryProfiler",
123
+ "MemorySnapshot",
124
+ "ProfileResult",
125
+ "profile_context",
126
+ "profile_function",
127
+ "MemoryVisualizer",
128
+ "MemoryAnalyzer",
129
+ "GapFinding",
130
+ "MemoryTracker",
131
+ "OOMFlightRecorder",
132
+ "OOMFlightRecorderConfig",
133
+ "OOMExceptionClassification",
134
+ "classify_oom_exception",
135
+ "TelemetryEventV2",
136
+ "DeviceMemoryCollector",
137
+ "DeviceMemorySample",
138
+ "build_device_memory_collector",
139
+ "detect_torch_runtime_backend",
140
+ "CPUMemoryProfiler",
141
+ "CPUMemoryTracker",
142
+ "telemetry_event_from_record",
143
+ "telemetry_event_to_dict",
144
+ "validate_telemetry_record",
145
+ "load_telemetry_events",
146
+ "resolve_distributed_identity",
147
+ "get_gpu_info",
148
+ "format_bytes",
149
+ "convert_bytes",
150
+ ]
gpumemprof/_version.py ADDED
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.2.3'
32
+ __version_tuple__ = version_tuple = (0, 2, 3)
33
+
34
+ __commit_id__ = commit_id = None