xputop 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. xputop-0.1.0/LICENSE +92 -0
  2. xputop-0.1.0/MANIFEST.in +4 -0
  3. xputop-0.1.0/PKG-INFO +235 -0
  4. xputop-0.1.0/README.md +199 -0
  5. xputop-0.1.0/pyproject.toml +54 -0
  6. xputop-0.1.0/setup.cfg +4 -0
  7. xputop-0.1.0/tests/test_amd.py +76 -0
  8. xputop-0.1.0/tests/test_config.py +89 -0
  9. xputop-0.1.0/tests/test_intel.py +32 -0
  10. xputop-0.1.0/tests/test_monitor.py +55 -0
  11. xputop-0.1.0/tests/test_npu.py +113 -0
  12. xputop-0.1.0/tests/test_tpu.py +50 -0
  13. xputop-0.1.0/xputop/__init__.py +4 -0
  14. xputop-0.1.0/xputop/__main__.py +6 -0
  15. xputop-0.1.0/xputop/alert/__init__.py +0 -0
  16. xputop-0.1.0/xputop/alert/config.py +278 -0
  17. xputop-0.1.0/xputop/alert/email_alert.py +141 -0
  18. xputop-0.1.0/xputop/cli.py +587 -0
  19. xputop-0.1.0/xputop/core/__init__.py +0 -0
  20. xputop-0.1.0/xputop/core/backend.py +77 -0
  21. xputop-0.1.0/xputop/core/backends/__init__.py +131 -0
  22. xputop-0.1.0/xputop/core/backends/amd.py +243 -0
  23. xputop-0.1.0/xputop/core/backends/custom.py +157 -0
  24. xputop-0.1.0/xputop/core/backends/demo.py +51 -0
  25. xputop-0.1.0/xputop/core/backends/intel.py +110 -0
  26. xputop-0.1.0/xputop/core/backends/npu.py +296 -0
  27. xputop-0.1.0/xputop/core/backends/nvidia.py +156 -0
  28. xputop-0.1.0/xputop/core/backends/tpu.py +140 -0
  29. xputop-0.1.0/xputop/core/monitor.py +281 -0
  30. xputop-0.1.0/xputop/core/recorder.py +161 -0
  31. xputop-0.1.0/xputop/core/sysinfo.py +345 -0
  32. xputop-0.1.0/xputop/ui/__init__.py +0 -0
  33. xputop-0.1.0/xputop/ui/tui.py +643 -0
  34. xputop-0.1.0/xputop/ui/viewer.py +210 -0
  35. xputop-0.1.0/xputop.egg-info/PKG-INFO +235 -0
  36. xputop-0.1.0/xputop.egg-info/SOURCES.txt +38 -0
  37. xputop-0.1.0/xputop.egg-info/dependency_links.txt +1 -0
  38. xputop-0.1.0/xputop.egg-info/entry_points.txt +2 -0
  39. xputop-0.1.0/xputop.egg-info/requires.txt +7 -0
  40. xputop-0.1.0/xputop.egg-info/top_level.txt +1 -0
xputop-0.1.0/LICENSE ADDED
@@ -0,0 +1,92 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity.
18
+
19
+ "You" (or "Your") shall mean an individual or Legal Entity
20
+ exercising permissions granted by this License.
21
+
22
+ "Source" form shall mean the preferred form for making modifications.
23
+
24
+ "Object" form shall mean any form resulting from mechanical
25
+ transformation or translation of a Source form.
26
+
27
+ "Work" shall mean the work of authorship made available under the License.
28
+
29
+ "Contribution" shall mean any work of authorship submitted to the Licensor
30
+ for inclusion in the Work.
31
+
32
+ "Contributor" shall mean Licensor and any Legal Entity on behalf of whom
33
+ a Contribution has been received by the Licensor.
34
+
35
+ 2. Grant of Copyright License. Subject to the terms and conditions of
36
+ this License, each Contributor hereby grants to You a perpetual,
37
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
38
+ copyright license to reproduce, prepare Derivative Works of,
39
+ publicly display, publicly perform, sublicense, and distribute the
40
+ Work and such Derivative Works in Source or Object form.
41
+
42
+ 3. Grant of Patent License. Subject to the terms and conditions of
43
+ this License, each Contributor hereby grants to You a perpetual,
44
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
45
+ patent license to make, have made, use, offer to sell, sell,
46
+ import, and otherwise transfer the Work.
47
+
48
+ 4. Redistribution. You may reproduce and distribute copies of the
49
+ Work or Derivative Works thereof in any medium, with or without
50
+ modifications, and in Source or Object form, provided that You
51
+ meet the following conditions:
52
+
53
+ (a) You must give any other recipients of the Work or
54
+ Derivative Works a copy of this License; and
55
+
56
+ (b) You must cause any modified files to carry prominent notices
57
+ stating that You changed the files; and
58
+
59
+ (c) You must retain, in the Source form of any Derivative Works
60
+ that You distribute, all copyright, patent, trademark, and
61
+ attribution notices from the Source form of the Work; and
62
+
63
+ (d) If the Work includes a "NOTICE" text file, You must include
64
+ a readable copy of the attribution notices contained within
65
+ such NOTICE file.
66
+
67
+ 5. Submission of Contributions.
68
+
69
+ 6. Trademarks. This License does not grant permission to use the trade
70
+ names, trademarks, service marks, or product names of the Licensor.
71
+
72
+ 7. Disclaimer of Warranty. The Work is provided on an "AS IS" BASIS,
73
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND.
74
+
75
+ 8. Limitation of Liability. In no event shall any Contributor be
76
+ liable to You for damages.
77
+
78
+ 9. Accepting Warranty or Additional Liability.
79
+
80
+ Copyright 2025 nputop contributors
81
+
82
+ Licensed under the Apache License, Version 2.0 (the "License");
83
+ you may not use this file except in compliance with the License.
84
+ You may obtain a copy of the License at
85
+
86
+ http://www.apache.org/licenses/LICENSE-2.0
87
+
88
+ Unless required by applicable law or agreed to in writing, software
89
+ distributed under the License is distributed on an "AS IS" BASIS,
90
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
91
+ See the License for the specific language governing permissions and
92
+ limitations under the License.
@@ -0,0 +1,4 @@
1
+ include LICENSE
2
+ include README.md
3
+ include pyproject.toml
4
+ recursive-include xputop *.py
xputop-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,235 @@
1
+ Metadata-Version: 2.4
2
+ Name: xputop
3
+ Version: 0.1.0
4
+ Summary: An interactive Huawei Ascend NPU process viewer and monitor, inspired by nvitop.
5
+ Author-email: Zander Zhao <zhaozhaongrui@mails.ucas.ac.cn>
6
+ License: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/ZanderZhao/xputop
8
+ Project-URL: Repository, https://github.com/ZanderZhao/xputop
9
+ Project-URL: Issues, https://github.com/ZanderZhao/xputop/issues
10
+ Keywords: huawei,ascend,npu,monitor,top,npu-smi,gpu
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Environment :: Console :: Curses
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: System Administrators
16
+ Classifier: License :: OSI Approved :: Apache Software License
17
+ Classifier: Operating System :: POSIX :: Linux
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.8
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Topic :: System :: Monitoring
25
+ Classifier: Topic :: Utilities
26
+ Requires-Python: >=3.8
27
+ Description-Content-Type: text/markdown
28
+ License-File: LICENSE
29
+ Requires-Dist: rich>=13.0.0
30
+ Requires-Dist: psutil>=5.9.0
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest>=7.0; extra == "dev"
33
+ Requires-Dist: build; extra == "dev"
34
+ Requires-Dist: twine; extra == "dev"
35
+ Dynamic: license-file
36
+
37
+ # xputop
38
+
39
+ An interactive Huawei Ascend NPU process viewer and monitor, inspired by [nvitop](https://github.com/XuehaiPan/nvitop).
40
+
41
+ ## Features
42
+
43
+ - Real-time monitoring of Huawei Ascend NPU devices (HBM memory, AI Core utilization, temperature, power)
44
+ - Rich terminal UI with per-device cards, usage bars, and process lists
45
+ - **Fixed process row count** per NPU card to prevent UI jitter (`-p N`, default 3)
46
+ - **Sparkline history curves** (like nvtop) for NPU, CPU, memory, disk, and network metrics
47
+ - **Detail mode** (`-d`) for combined chart + CPU + memory + disk view
48
+ - **Full detail mode** (`-a`) adds network I/O monitoring and system process tree
49
+ - **CPU per-core utilization** panel with load averages
50
+ - **System memory** (RAM + Swap) panel
51
+ - **Disk usage** panel with configurable mount points
52
+ - **Network I/O** panel with rate history (auto-skips if root privileges unavailable)
53
+ - Multi-card support with summary table
54
+ - Configurable heartbeat / refresh interval
55
+ - **Threshold-based email alerting** for both NPU and system metrics
56
+ - Configuration stored in `~/.config/xputop/config.toml` (XDG-compliant)
57
+ - **Chinese help** via `--zh`
58
+ - JSON output mode for scripting and automation
59
+ - Demo mode for development without hardware
60
+ - **Lightweight design** — non-blocking CPU sampling, no extra subprocesses
61
+
62
+ ## Installation
63
+
64
+ ```bash
65
+ pip install xputop
66
+ ```
67
+
68
+ ## Quick Start
69
+
70
+ ```bash
71
+ # Launch the interactive TUI
72
+ xputop
73
+
74
+ # Detail mode: chart + CPU + memory + disk
75
+ xputop -d
76
+
77
+ # Full detail: + network I/O + process tree
78
+ xputop -a
79
+
80
+ # Show with sparkline curves + CPU + memory + disk
81
+ xputop -C --cpu -m -D
82
+
83
+ # Control process display rows (default 3, 0=hide, -1=show all)
84
+ xputop -p 5 -C
85
+ xputop -p 0 # hide all processes
86
+ xputop -p -1 # show all (may jitter)
87
+
88
+ # Monitor specific disk paths
89
+ xputop -C -D /data /home
90
+
91
+ # Run in demo mode (no Ascend hardware needed)
92
+ xputop --demo -d
93
+ xputop --demo -a
94
+
95
+ # Set custom refresh interval (seconds)
96
+ xputop -i 5
97
+
98
+ # Print a single snapshot and exit
99
+ xputop once --cpu --mem --disk
100
+
101
+ # Print snapshot as JSON
102
+ xputop once --json --cpu --mem --disk /data
103
+
104
+ # Show Chinese help
105
+ xputop --zh
106
+
107
+ # Generate a default configuration file
108
+ xputop config --generate
109
+
110
+ # Send a test alert email
111
+ xputop alert-test
112
+ ```
113
+
114
+ ## Command-Line Options
115
+
116
+ | Short | Long | Default | Description |
117
+ |-------|------------------|---------|--------------------------------------------------|
118
+ | `-V` | `--version` | — | Show version and exit |
119
+ | `-i` | `--interval` | 2.0 | Refresh interval in seconds |
120
+ | `-c` | `--config` | — | Path to configuration file |
121
+ | `-C` | `--chart` | off | Enable nvtop-style sparkline history curves |
122
+ | `-l` | `--chart-length` | 120 | Number of history points for sparklines |
123
+ | `-p` | `--processes` | 3 | Process rows per NPU card (0=hide, -1=all) |
124
+ | | `--cpu` | off | Show CPU per-core utilization panel |
125
+ | `-m` | `--mem` | off | Show system memory panel |
126
+ | `-D` | `--disk` | off | Show disk usage panel (optionally specify paths) |
127
+ | `-d` | `--detail` | off | Detail mode = `--chart --cpu --mem --disk` |
128
+ | `-a` | `--detail-all` | off | Full detail = `-d` + network + process tree |
129
+ | | `--demo` | off | Demo mode with fake NPU data |
130
+ | | `--demo-devices` | 4 | Number of simulated NPU devices |
131
+ | | `--zh` | — | Show Chinese help |
132
+
133
+ ## Configuration
134
+
135
+ Configuration is stored at `~/.config/xputop/config.toml`. Generate a default:
136
+
137
+ ```bash
138
+ xputop config --generate
139
+ ```
140
+
141
+ You can also set `XPUTOP_CONFIG_DIR` to use a custom directory.
142
+
143
+ ### Example config.toml
144
+
145
+ ```toml
146
+ [general]
147
+ interval = 2.0
148
+ demo = false
149
+ demo_devices = 4
150
+
151
+ [display]
152
+ chart = true
153
+ chart_length = 120
154
+ cpu = true
155
+ mem = true
156
+ disk = true
157
+ disk_paths = /, /data
158
+ process_rows = 3
159
+
160
+ [email]
161
+ enabled = true
162
+ smtp_host = smtp.gmail.com
163
+ smtp_port = 587
164
+ use_tls = true
165
+ username = you@gmail.com
166
+ password = your_app_password
167
+ sender = xputop@yourdomain.com
168
+ recipients = admin@yourdomain.com, ops@yourdomain.com
169
+ subject_prefix = [xputop]
170
+
171
+ # NPU alert rules
172
+ [rule:0]
173
+ metric = temperature
174
+ limit = 80.0
175
+ cooldown = 300
176
+
177
+ [rule:1]
178
+ metric = hbm_usage_percent
179
+ limit = 95.0
180
+ cooldown = 300
181
+
182
+ # System alert rules
183
+ [rule:2]
184
+ metric = cpu_percent
185
+ limit = 95.0
186
+ cooldown = 120
187
+
188
+ [rule:3]
189
+ metric = mem_percent
190
+ limit = 90.0
191
+ cooldown = 300
192
+
193
+ [rule:4]
194
+ metric = disk_percent
195
+ limit = 95.0
196
+ cooldown = 600
197
+ ```
198
+
199
+ ### Alert Rule Metrics
200
+
201
+ | Metric | Type | Description | Unit |
202
+ |---------------------|--------|----------------------------------|------|
203
+ | `temperature` | NPU | Device temperature | °C |
204
+ | `power` | NPU | Power consumption | W |
205
+ | `aicore_rate` | NPU | AI Core utilization | % |
206
+ | `hbm_usage_percent` | NPU | HBM memory usage percentage | % |
207
+ | `hbm_used` | NPU | HBM memory used | MiB |
208
+ | `cpu_percent` | System | Overall CPU utilization | % |
209
+ | `mem_percent` | System | RAM usage percentage | % |
210
+ | `disk_percent` | System | Disk usage percentage | % |
211
+ | `swap_percent` | System | Swap usage percentage | % |
212
+
213
+ ## Lightweight Design
214
+
215
+ Designed to run alongside model training with minimal overhead:
216
+
217
+ - CPU sampling uses psutil non-blocking mode (`cpu_interval=None`), adding zero extra latency
218
+ - All system metrics are collected in a single call — no extra subprocesses
219
+ - Network I/O detection auto-skips if root privileges are unavailable (no retry)
220
+ - Default 2-second heartbeat; recommended 5–10 seconds for heavy training workloads
221
+ - `npu-smi` calls use a 10-second timeout to prevent hangs
222
+
223
+ ## Build & Release
224
+
225
+ See [BUILD.md](BUILD.md) for development setup, building, and publishing instructions.
226
+
227
+ ## Requirements
228
+
229
+ - Python >= 3.8
230
+ - Huawei Ascend driver with `npu-smi` in PATH (or use `--demo` mode)
231
+ - `rich` and `psutil` (installed automatically)
232
+
233
+ ## License
234
+
235
+ Apache License 2.0
xputop-0.1.0/README.md ADDED
@@ -0,0 +1,199 @@
1
+ # xputop
2
+
3
+ An interactive Huawei Ascend NPU process viewer and monitor, inspired by [nvitop](https://github.com/XuehaiPan/nvitop).
4
+
5
+ ## Features
6
+
7
+ - Real-time monitoring of Huawei Ascend NPU devices (HBM memory, AI Core utilization, temperature, power)
8
+ - Rich terminal UI with per-device cards, usage bars, and process lists
9
+ - **Fixed process row count** per NPU card to prevent UI jitter (`-p N`, default 3)
10
+ - **Sparkline history curves** (like nvtop) for NPU, CPU, memory, disk, and network metrics
11
+ - **Detail mode** (`-d`) for combined chart + CPU + memory + disk view
12
+ - **Full detail mode** (`-a`) adds network I/O monitoring and system process tree
13
+ - **CPU per-core utilization** panel with load averages
14
+ - **System memory** (RAM + Swap) panel
15
+ - **Disk usage** panel with configurable mount points
16
+ - **Network I/O** panel with rate history (auto-skips if root privileges unavailable)
17
+ - Multi-card support with summary table
18
+ - Configurable heartbeat / refresh interval
19
+ - **Threshold-based email alerting** for both NPU and system metrics
20
+ - Configuration stored in `~/.config/xputop/config.toml` (XDG-compliant)
21
+ - **Chinese help** via `--zh`
22
+ - JSON output mode for scripting and automation
23
+ - Demo mode for development without hardware
24
+ - **Lightweight design** — non-blocking CPU sampling, no extra subprocesses
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ pip install xputop
30
+ ```
31
+
32
+ ## Quick Start
33
+
34
+ ```bash
35
+ # Launch the interactive TUI
36
+ xputop
37
+
38
+ # Detail mode: chart + CPU + memory + disk
39
+ xputop -d
40
+
41
+ # Full detail: + network I/O + process tree
42
+ xputop -a
43
+
44
+ # Show with sparkline curves + CPU + memory + disk
45
+ xputop -C --cpu -m -D
46
+
47
+ # Control process display rows (default 3, 0=hide, -1=show all)
48
+ xputop -p 5 -C
49
+ xputop -p 0 # hide all processes
50
+ xputop -p -1 # show all (may jitter)
51
+
52
+ # Monitor specific disk paths
53
+ xputop -C -D /data /home
54
+
55
+ # Run in demo mode (no Ascend hardware needed)
56
+ xputop --demo -d
57
+ xputop --demo -a
58
+
59
+ # Set custom refresh interval (seconds)
60
+ xputop -i 5
61
+
62
+ # Print a single snapshot and exit
63
+ xputop once --cpu --mem --disk
64
+
65
+ # Print snapshot as JSON
66
+ xputop once --json --cpu --mem --disk /data
67
+
68
+ # Show Chinese help
69
+ xputop --zh
70
+
71
+ # Generate a default configuration file
72
+ xputop config --generate
73
+
74
+ # Send a test alert email
75
+ xputop alert-test
76
+ ```
77
+
78
+ ## Command-Line Options
79
+
80
+ | Short | Long | Default | Description |
81
+ |-------|------------------|---------|--------------------------------------------------|
82
+ | `-V` | `--version` | — | Show version and exit |
83
+ | `-i` | `--interval` | 2.0 | Refresh interval in seconds |
84
+ | `-c` | `--config` | — | Path to configuration file |
85
+ | `-C` | `--chart` | off | Enable nvtop-style sparkline history curves |
86
+ | `-l` | `--chart-length` | 120 | Number of history points for sparklines |
87
+ | `-p` | `--processes` | 3 | Process rows per NPU card (0=hide, -1=all) |
88
+ | | `--cpu` | off | Show CPU per-core utilization panel |
89
+ | `-m` | `--mem` | off | Show system memory panel |
90
+ | `-D` | `--disk` | off | Show disk usage panel (optionally specify paths) |
91
+ | `-d` | `--detail` | off | Detail mode = `--chart --cpu --mem --disk` |
92
+ | `-a` | `--detail-all` | off | Full detail = `-d` + network + process tree |
93
+ | | `--demo` | off | Demo mode with fake NPU data |
94
+ | | `--demo-devices` | 4 | Number of simulated NPU devices |
95
+ | | `--zh` | — | Show Chinese help |
96
+
97
+ ## Configuration
98
+
99
+ Configuration is stored at `~/.config/xputop/config.toml`. Generate a default:
100
+
101
+ ```bash
102
+ xputop config --generate
103
+ ```
104
+
105
+ You can also set `XPUTOP_CONFIG_DIR` to use a custom directory.
106
+
107
+ ### Example config.toml
108
+
109
+ ```toml
110
+ [general]
111
+ interval = 2.0
112
+ demo = false
113
+ demo_devices = 4
114
+
115
+ [display]
116
+ chart = true
117
+ chart_length = 120
118
+ cpu = true
119
+ mem = true
120
+ disk = true
121
+ disk_paths = /, /data
122
+ process_rows = 3
123
+
124
+ [email]
125
+ enabled = true
126
+ smtp_host = smtp.gmail.com
127
+ smtp_port = 587
128
+ use_tls = true
129
+ username = you@gmail.com
130
+ password = your_app_password
131
+ sender = xputop@yourdomain.com
132
+ recipients = admin@yourdomain.com, ops@yourdomain.com
133
+ subject_prefix = [xputop]
134
+
135
+ # NPU alert rules
136
+ [rule:0]
137
+ metric = temperature
138
+ limit = 80.0
139
+ cooldown = 300
140
+
141
+ [rule:1]
142
+ metric = hbm_usage_percent
143
+ limit = 95.0
144
+ cooldown = 300
145
+
146
+ # System alert rules
147
+ [rule:2]
148
+ metric = cpu_percent
149
+ limit = 95.0
150
+ cooldown = 120
151
+
152
+ [rule:3]
153
+ metric = mem_percent
154
+ limit = 90.0
155
+ cooldown = 300
156
+
157
+ [rule:4]
158
+ metric = disk_percent
159
+ limit = 95.0
160
+ cooldown = 600
161
+ ```
162
+
163
+ ### Alert Rule Metrics
164
+
165
+ | Metric | Type | Description | Unit |
166
+ |---------------------|--------|----------------------------------|------|
167
+ | `temperature` | NPU | Device temperature | °C |
168
+ | `power` | NPU | Power consumption | W |
169
+ | `aicore_rate` | NPU | AI Core utilization | % |
170
+ | `hbm_usage_percent` | NPU | HBM memory usage percentage | % |
171
+ | `hbm_used` | NPU | HBM memory used | MiB |
172
+ | `cpu_percent` | System | Overall CPU utilization | % |
173
+ | `mem_percent` | System | RAM usage percentage | % |
174
+ | `disk_percent` | System | Disk usage percentage | % |
175
+ | `swap_percent` | System | Swap usage percentage | % |
176
+
177
+ ## Lightweight Design
178
+
179
+ Designed to run alongside model training with minimal overhead:
180
+
181
+ - CPU sampling uses psutil non-blocking mode (`cpu_interval=None`), adding zero extra latency
182
+ - All system metrics are collected in a single call — no extra subprocesses
183
+ - Network I/O detection auto-skips if root privileges are unavailable (no retry)
184
+ - Default 2-second heartbeat; recommended 5–10 seconds for heavy training workloads
185
+ - `npu-smi` calls use a 10-second timeout to prevent hangs
186
+
187
+ ## Build & Release
188
+
189
+ See [BUILD.md](BUILD.md) for development setup, building, and publishing instructions.
190
+
191
+ ## Requirements
192
+
193
+ - Python >= 3.8
194
+ - Huawei Ascend driver with `npu-smi` in PATH (or use `--demo` mode)
195
+ - `rich` and `psutil` (installed automatically)
196
+
197
+ ## License
198
+
199
+ Apache License 2.0
@@ -0,0 +1,54 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "xputop"
7
+ version = "0.1.0"
8
+ description = "An interactive Huawei Ascend NPU process viewer and monitor, inspired by nvitop."
9
+ readme = "README.md"
10
+ license = {text = "Apache-2.0"}
11
+ requires-python = ">=3.8"
12
+ authors = [
13
+ {name = "Zander Zhao", email = "zhaozhaongrui@mails.ucas.ac.cn"},
14
+ ]
15
+ keywords = ["huawei", "ascend", "npu", "monitor", "top", "npu-smi", "gpu"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Environment :: Console",
19
+ "Environment :: Console :: Curses",
20
+ "Intended Audience :: Developers",
21
+ "Intended Audience :: System Administrators",
22
+ "License :: OSI Approved :: Apache Software License",
23
+ "Operating System :: POSIX :: Linux",
24
+ "Programming Language :: Python :: 3",
25
+ "Programming Language :: Python :: 3.8",
26
+ "Programming Language :: Python :: 3.9",
27
+ "Programming Language :: Python :: 3.10",
28
+ "Programming Language :: Python :: 3.11",
29
+ "Programming Language :: Python :: 3.12",
30
+ "Topic :: System :: Monitoring",
31
+ "Topic :: Utilities",
32
+ ]
33
+ dependencies = [
34
+ "rich>=13.0.0",
35
+ "psutil>=5.9.0",
36
+ ]
37
+
38
+ [project.optional-dependencies]
39
+ dev = [
40
+ "pytest>=7.0",
41
+ "build",
42
+ "twine",
43
+ ]
44
+
45
+ [project.urls]
46
+ Homepage = "https://github.com/ZanderZhao/xputop"
47
+ Repository = "https://github.com/ZanderZhao/xputop"
48
+ Issues = "https://github.com/ZanderZhao/xputop/issues"
49
+
50
+ [project.scripts]
51
+ xputop = "xputop.cli:main"
52
+
53
+ [tool.setuptools.packages.find]
54
+ include = ["xputop*"]
xputop-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,76 @@
1
+ """Tests for the AMD rocm-smi / amd-smi backend parsing."""
2
+
3
+ from xputop.core.backends.amd import AmdBackend
4
+
5
+
6
+ def test_rocm_smi_regex(monkeypatch):
7
+ backend = AmdBackend()
8
+ backend._cmd = "rocm-smi"
9
+ backend._is_amd_smi = False
10
+
11
+ fake_output = """
12
+ ============================ ROCm System Management Interface ============================
13
+ ====================================== System Info =======================================
14
+ GPU[0]: Temperature (Sensor edge) (C): 65.0
15
+ GPU[0]: GPU use (%): 99
16
+ GPU[0]: Average Graphics Package Power (W): 300.5
17
+ GPU[0]: VRAM Total Memory (B): 17163091968
18
+ GPU[0]: VRAM Total Used Memory (B): 8581545984
19
+ GPU[1]: Temperature (Sensor edge) (C): 62.0
20
+ GPU[1]: GPU use (%): 45
21
+ GPU[1]: Average Graphics Package Power (W): 150.2
22
+ GPU[1]: VRAM Total Memory (B): 17163091968
23
+ GPU[1]: VRAM Total Used Memory (B): 2000000000
24
+ =========================================================================================
25
+ """
26
+
27
+ def mock_run(*args, **kwargs):
28
+ class Res:
29
+ returncode = 0
30
+ stdout = fake_output
31
+ return Res()
32
+
33
+ monkeypatch.setattr("subprocess.run", mock_run)
34
+
35
+ devices, driver, err = backend._collect_rocm_smi()
36
+ assert err == ""
37
+ assert len(devices) == 2
38
+
39
+ d0 = devices[0]
40
+ assert d0.device_id == 0
41
+ assert d0.temperature == 65.0
42
+ assert d0.power == 300.5
43
+ assert d0.utilization_rate == 99.0
44
+ assert abs(d0.mem_total - 16368.0) < 1.0 # 17163091968 B ~ 16368 MB
45
+ assert abs(d0.mem_used - 8184.0) < 1.0
46
+
47
+
48
+ def test_amd_smi_csv(monkeypatch):
49
+ backend = AmdBackend()
50
+ backend._cmd = "amd-smi"
51
+ backend._is_amd_smi = True
52
+
53
+ fake_output = """gpu,temperature (c),power (w),usage (%),vram total (mb),vram used (mb)
54
+ 0,75.0,250.0,100,16384,16384
55
+ 1,45.0,80.0,0,16384,1024
56
+ """
57
+
58
+ def mock_run(*args, **kwargs):
59
+ class Res:
60
+ returncode = 0
61
+ stdout = fake_output
62
+ return Res()
63
+
64
+ monkeypatch.setattr("subprocess.run", mock_run)
65
+
66
+ devices, driver, err = backend._collect_amd_smi()
67
+ assert err == ""
68
+ assert len(devices) == 2
69
+
70
+ d1 = devices[1]
71
+ assert d1.device_id == 1
72
+ assert d1.temperature == 45.0
73
+ assert d1.power == 80.0
74
+ assert d1.utilization_rate == 0.0
75
+ assert d1.mem_total == 16384.0
76
+ assert d1.mem_used == 1024.0