vserve 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ cd "$(git rev-parse --show-toplevel)"
5
+
6
+ echo "=== ruff ==="
7
+ uv run ruff check src/ tests/
8
+
9
+ echo "=== mypy ==="
10
+ uv run mypy src/vserve/ --ignore-missing-imports
11
+
12
+ echo "=== pytest ==="
13
+ uv run pytest tests/ -q --tb=short
14
+
15
+ echo "=== all checks passed ==="
@@ -0,0 +1,19 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v6
14
+ - uses: astral-sh/setup-uv@v7
15
+ - run: uv python install 3.13
16
+ - run: uv sync --dev
17
+ - run: uv run ruff check src/ tests/
18
+ - run: uv run mypy src/vserve/ --ignore-missing-imports
19
+ - run: uv run pytest tests/ -q --tb=short
@@ -0,0 +1,46 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v6
13
+ - uses: astral-sh/setup-uv@v7
14
+ - run: uv python install 3.13
15
+ - run: uv sync --dev
16
+ - run: uv run ruff check src/ tests/
17
+ - run: uv run mypy src/vserve/ --ignore-missing-imports
18
+ - run: uv run pytest tests/ -q --tb=short
19
+
20
+ build:
21
+ runs-on: ubuntu-latest
22
+ needs: test
23
+ steps:
24
+ - uses: actions/checkout@v6
25
+ - uses: astral-sh/setup-uv@v7
26
+ - run: uv python install 3.13
27
+ - run: uv build
28
+ - uses: actions/upload-artifact@v4
29
+ with:
30
+ name: dist
31
+ path: dist/
32
+
33
+ publish:
34
+ runs-on: ubuntu-latest
35
+ needs: build
36
+ environment:
37
+ name: pypi
38
+ url: https://pypi.org/p/vserve
39
+ permissions:
40
+ id-token: write
41
+ steps:
42
+ - uses: actions/download-artifact@v4
43
+ with:
44
+ name: dist
45
+ path: dist/
46
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,10 @@
1
+ __pycache__/
2
+ *.pyc
3
+ .venv/
4
+ .pytest_cache/
5
+ *.egg-info/
6
+ dist/
7
+ .serena/
8
+ docs/superpowers/
9
+ docs/plans/
10
+ docs/specs/
vserve-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mohan Qiao
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
vserve-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,156 @@
1
+ Metadata-Version: 2.4
2
+ Name: vserve
3
+ Version: 0.1.0
4
+ Summary: CLI for managing vLLM inference on GPU workstations
5
+ Project-URL: Homepage, https://github.com/Gavin-Qiao/vserve
6
+ Project-URL: Repository, https://github.com/Gavin-Qiao/vserve
7
+ Project-URL: Issues, https://github.com/Gavin-Qiao/vserve/issues
8
+ Author: Mohan Qiao
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Environment :: GPU :: NVIDIA CUDA
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.12
21
+ Requires-Dist: huggingface-hub>=0.30
22
+ Requires-Dist: nvidia-ml-py>=13.595.45
23
+ Requires-Dist: pyyaml>=6
24
+ Requires-Dist: rich>=14
25
+ Requires-Dist: typer>=0.15
26
+ Description-Content-Type: text/markdown
27
+
28
+ <div align="center">
29
+
30
+ # vserve
31
+
32
+ **A CLI for managing vLLM inference on GPU workstations.**
33
+
34
+ Download models. Calculate limits. Serve with one command. Control fans.
35
+
36
+ ![Python 3.12+](https://img.shields.io/badge/python-3.12+-3776ab?style=flat-square&logo=python&logoColor=white)
37
+ ![vLLM 0.18+](https://img.shields.io/badge/vLLM-0.18+-ff6f00?style=flat-square)
38
+ ![Tests](https://img.shields.io/badge/tests-175%20passed-brightgreen?style=flat-square)
39
+ ![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)
40
+
41
+ </div>
42
+
43
+ ---
44
+
45
+ ## Install
46
+
47
+ ```bash
48
+ uv tool install vserve
49
+ ```
50
+
51
+ Or with pip:
52
+
53
+ ```bash
54
+ pip install vserve
55
+ ```
56
+
57
+ ---
58
+
59
+ ## Quick Start
60
+
61
+ ```bash
62
+ vserve init # scan GPU, vLLM, CUDA, systemd — write config
63
+ vserve download # search HuggingFace, pick variant, download
64
+ vserve tune <model> # calculate context/concurrency limits for your GPU
65
+ vserve start <model> # interactive config → serve via systemd
66
+ ```
67
+
68
+ ---
69
+
70
+ ## What It Does
71
+
72
+ **vserve** manages the full lifecycle of serving LLMs with vLLM on a GPU workstation:
73
+
74
+ - **Download** — search HuggingFace, see available weight variants (MXFP4, BF16, GGUF) with sizes, download only what you need
75
+ - **Tune** — calculate exactly what context lengths and concurrency your GPU can handle, based on model architecture and available VRAM
76
+ - **Start/Stop** — interactive config wizard, systemd service management, health check with timeout
77
+ - **Fan control** — temperature-based curve daemon with quiet hours, or hold a fixed speed
78
+ - **Multi-user** — file-based locking, terminal notifications when another user holds the GPU
79
+ - **Doctor** — diagnose GPU, CUDA, vLLM, systemd issues with actionable fix suggestions
80
+
81
+ ---
82
+
83
+ ## Commands
84
+
85
+ | Command | Description |
86
+ |:--------|:------------|
87
+ | `vserve` | Dashboard — GPU, models, status |
88
+ | `vserve init` | Auto-discover vLLM and write config |
89
+ | `vserve download [model]` | Search and download from HuggingFace with variant picker |
90
+ | `vserve models [name]` | List models or show detail (fuzzy match) |
91
+ | `vserve tune <model>` | Calculate context/concurrency limits for your GPU |
92
+ | `vserve start [model]` | Configure and start serving |
93
+ | `vserve stop` | Stop the vLLM service |
94
+ | `vserve status` | Show current serving config |
95
+ | `vserve fan [auto\|off\|30-100]` | GPU fan control with temp-based curve |
96
+ | `vserve doctor` | Check system readiness |
97
+
98
+ All commands support **fuzzy matching** — `vserve start qwen fp8` finds the right model.
99
+
100
+ ---
101
+
102
+ ## Prerequisites
103
+
104
+ | Requirement | Check | Install |
105
+ |------------|-------|---------|
106
+ | NVIDIA GPU + drivers | `nvidia-smi` | [nvidia.com/drivers](https://www.nvidia.com/drivers) |
107
+ | CUDA toolkit | `nvcc --version` | `sudo apt install nvidia-cuda-toolkit` |
108
+ | vLLM 0.18+ | `vllm --version` | [docs.vllm.ai](https://docs.vllm.ai/en/latest/getting_started/installation.html) |
109
+ | systemd | (most Linux servers) | See [troubleshooting](docs/troubleshooting.md) |
110
+ | sudo access | for systemctl, fan control | |
111
+
112
+ ---
113
+
114
+ ## Configuration
115
+
116
+ Auto-discovered on first run. Override at `~/.config/vserve/config.yaml`:
117
+
118
+ ```yaml
119
+ vllm_root: /opt/vllm
120
+ cuda_home: /usr/local/cuda
121
+ service_name: vllm
122
+ service_user: vllm
123
+ port: 8888
124
+ ```
125
+
126
+ ---
127
+
128
+ ## Fan Control
129
+
130
+ ```bash
131
+ vserve fan # show status, interactive menu
132
+ vserve fan auto # temp-based curve with quiet hours
133
+ vserve fan 80 # hold at 80% (persistent daemon)
134
+ vserve fan off # stop daemon, restore NVIDIA auto
135
+ ```
136
+
137
+ The auto curve ramps with temperature and caps fan speed during quiet hours (configurable). Emergency override at 88C ignores quiet hours.
138
+
139
+ ---
140
+
141
+ ## Development
142
+
143
+ ```bash
144
+ git clone https://github.com/Gavin-Qiao/vserve.git
145
+ cd vserve
146
+ uv sync --dev
147
+ uv run pytest tests/ # 175 tests
148
+ uv run ruff check src/ tests/ # lint
149
+ uv run mypy src/vserve/ # type check
150
+ ```
151
+
152
+ ---
153
+
154
+ ## License
155
+
156
+ [MIT](LICENSE)
vserve-0.1.0/README.md ADDED
@@ -0,0 +1,129 @@
1
+ <div align="center">
2
+
3
+ # vserve
4
+
5
+ **A CLI for managing vLLM inference on GPU workstations.**
6
+
7
+ Download models. Calculate limits. Serve with one command. Control fans.
8
+
9
+ ![Python 3.12+](https://img.shields.io/badge/python-3.12+-3776ab?style=flat-square&logo=python&logoColor=white)
10
+ ![vLLM 0.18+](https://img.shields.io/badge/vLLM-0.18+-ff6f00?style=flat-square)
11
+ ![Tests](https://img.shields.io/badge/tests-175%20passed-brightgreen?style=flat-square)
12
+ ![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)
13
+
14
+ </div>
15
+
16
+ ---
17
+
18
+ ## Install
19
+
20
+ ```bash
21
+ uv tool install vserve
22
+ ```
23
+
24
+ Or with pip:
25
+
26
+ ```bash
27
+ pip install vserve
28
+ ```
29
+
30
+ ---
31
+
32
+ ## Quick Start
33
+
34
+ ```bash
35
+ vserve init # scan GPU, vLLM, CUDA, systemd — write config
36
+ vserve download # search HuggingFace, pick variant, download
37
+ vserve tune <model> # calculate context/concurrency limits for your GPU
38
+ vserve start <model> # interactive config → serve via systemd
39
+ ```
40
+
41
+ ---
42
+
43
+ ## What It Does
44
+
45
+ **vserve** manages the full lifecycle of serving LLMs with vLLM on a GPU workstation:
46
+
47
+ - **Download** — search HuggingFace, see available weight variants (MXFP4, BF16, GGUF) with sizes, download only what you need
48
+ - **Tune** — calculate exactly what context lengths and concurrency your GPU can handle, based on model architecture and available VRAM
49
+ - **Start/Stop** — interactive config wizard, systemd service management, health check with timeout
50
+ - **Fan control** — temperature-based curve daemon with quiet hours, or hold a fixed speed
51
+ - **Multi-user** — file-based locking, terminal notifications when another user holds the GPU
52
+ - **Doctor** — diagnose GPU, CUDA, vLLM, systemd issues with actionable fix suggestions
53
+
54
+ ---
55
+
56
+ ## Commands
57
+
58
+ | Command | Description |
59
+ |:--------|:------------|
60
+ | `vserve` | Dashboard — GPU, models, status |
61
+ | `vserve init` | Auto-discover vLLM and write config |
62
+ | `vserve download [model]` | Search and download from HuggingFace with variant picker |
63
+ | `vserve models [name]` | List models or show detail (fuzzy match) |
64
+ | `vserve tune <model>` | Calculate context/concurrency limits for your GPU |
65
+ | `vserve start [model]` | Configure and start serving |
66
+ | `vserve stop` | Stop the vLLM service |
67
+ | `vserve status` | Show current serving config |
68
+ | `vserve fan [auto\|off\|30-100]` | GPU fan control with temp-based curve |
69
+ | `vserve doctor` | Check system readiness |
70
+
71
+ All commands support **fuzzy matching** — `vserve start qwen fp8` finds the right model.
72
+
73
+ ---
74
+
75
+ ## Prerequisites
76
+
77
+ | Requirement | Check | Install |
78
+ |------------|-------|---------|
79
+ | NVIDIA GPU + drivers | `nvidia-smi` | [nvidia.com/drivers](https://www.nvidia.com/drivers) |
80
+ | CUDA toolkit | `nvcc --version` | `sudo apt install nvidia-cuda-toolkit` |
81
+ | vLLM 0.18+ | `vllm --version` | [docs.vllm.ai](https://docs.vllm.ai/en/latest/getting_started/installation.html) |
82
+ | systemd | (most Linux servers) | See [troubleshooting](docs/troubleshooting.md) |
83
+ | sudo access | for systemctl, fan control | |
84
+
85
+ ---
86
+
87
+ ## Configuration
88
+
89
+ Auto-discovered on first run. Override at `~/.config/vserve/config.yaml`:
90
+
91
+ ```yaml
92
+ vllm_root: /opt/vllm
93
+ cuda_home: /usr/local/cuda
94
+ service_name: vllm
95
+ service_user: vllm
96
+ port: 8888
97
+ ```
98
+
99
+ ---
100
+
101
+ ## Fan Control
102
+
103
+ ```bash
104
+ vserve fan # show status, interactive menu
105
+ vserve fan auto # temp-based curve with quiet hours
106
+ vserve fan 80 # hold at 80% (persistent daemon)
107
+ vserve fan off # stop daemon, restore NVIDIA auto
108
+ ```
109
+
110
+ The auto curve ramps with temperature and caps fan speed during quiet hours (configurable). Emergency override at 88C ignores quiet hours.
111
+
112
+ ---
113
+
114
+ ## Development
115
+
116
+ ```bash
117
+ git clone https://github.com/Gavin-Qiao/vserve.git
118
+ cd vserve
119
+ uv sync --dev
120
+ uv run pytest tests/ # 175 tests
121
+ uv run ruff check src/ tests/ # lint
122
+ uv run mypy src/vserve/ # type check
123
+ ```
124
+
125
+ ---
126
+
127
+ ## License
128
+
129
+ [MIT](LICENSE)
@@ -0,0 +1,142 @@
1
+ # Troubleshooting
2
+
3
+ Hard-won lessons from running vLLM on NVIDIA workstation GPUs.
4
+
5
+ ## GPU Crashes (Xid Errors)
6
+
7
+ ### Xid 8 — GPU Stopped Processing
8
+
9
+ **Symptom:** vLLM dies after hours of stable inference. Kernel log shows:
10
+ ```
11
+ NVRM: krcWatchdog_IMPL: RC watchdog: GPU is probably locked!
12
+ NVRM: Xid (PCI:0000:02:00): 8, pid=..., name=VLLM::EngineCor
13
+ ```
14
+
15
+ **Root cause:** The GPU's recovery counter watchdog detected a hang. On Blackwell (SM120), this correlates with CUDA graphs under sustained FP8 load (see [vllm-project/vllm#35659](https://github.com/vllm-project/vllm/issues/35659)).
16
+
17
+ **Recovery:**
18
+ 1. The GPU usually recovers automatically after the faulting process exits — no reset needed.
19
+ 2. `nvidia-smi --gpu-reset` is deprecated as of driver 570+.
20
+ 3. If the GPU is unresponsive, reload kernel modules: `sudo rmmod nvidia_uvm nvidia_drm nvidia_modeset nvidia && sudo modprobe nvidia`
21
+ 4. Last resort: reboot.
22
+
23
+ **Prevention:**
24
+ - Use the production driver branch (595.x), not the new-feature branch (590.x).
25
+ - `vserve fan auto` keeps temps below throttle threshold.
26
+ - systemd `Restart=always` with exponential backoff recovers automatically.
27
+ - If crashes persist, try `--enforce-eager` (disables CUDA graphs, ~2.3x throughput cost).
28
+
29
+ ### vLLM exits cleanly but systemd doesn't restart
30
+
31
+ **Cause:** vLLM's APIServer shuts down cleanly (exit 0) after an EngineCore crash. `Restart=on-failure` ignores exit 0.
32
+
33
+ **Fix:** Use `Restart=always` in the systemd unit.
34
+
35
+ ## Fan Control
36
+
37
+ ### Why override NVIDIA's auto curve?
38
+
39
+ The default auto curve on workstation GPUs (RTX PRO series) caps fan speed conservatively for noise. Under sustained 300W inference, this can allow temps to reach 85-90°C, causing:
40
+ - Thermal throttling (reduced inference throughput)
41
+ - Accelerated component aging (electromigration, capacitor degradation)
42
+ - Increased risk of Xid errors
43
+
44
+ ### Coolbits setup
45
+
46
+ Fan control via `nvidia-settings` requires Coolbits enabled in X11 config:
47
+
48
+ ```bash
49
+ # /etc/X11/xorg.conf
50
+ Section "Device"
51
+ Identifier "GPU0"
52
+ Driver "nvidia"
53
+ BusID "PCI:2:0:0" # check with: nvidia-smi --query-gpu=gpu_bus_id --format=csv,noheader
54
+ Option "Coolbits" "4" # bit 2 = manual fan control
55
+ EndSection
56
+ ```
57
+
58
+ On headless systems, `vserve fan` uses a temporary Xvfb virtual display — no persistent X server needed.
59
+
60
+ ### Hardware thermal failsafe
61
+
62
+ Even if the fan daemon crashes and the fan is stuck at 30%, the GPU has hardware protection:
63
+ - GPU Boost reduces clocks starting at ~83°C
64
+ - Aggressive power limiting at ~90°C
65
+ - Hardware shutdown at ~100-105°C (cannot be overridden by software)
66
+
67
+ The `vserve fan` emergency override (100% fan at 88°C regardless of quiet hours) is software-level defense. The hardware failsafe is always present underneath.
68
+
69
+ ## JIT Compilation
70
+
71
+ ### FlashInfer JIT cache
72
+
73
+ vLLM 0.18+ uses FlashInfer with just-in-time compiled CUDA kernels. First run for each model/KV-dtype combination triggers JIT compilation that takes 2-5 minutes.
74
+
75
+ **Where the cache lives:** `$VLLM_ROOT/.cache/flashinfer/`
76
+
77
+ **Problem:** If vLLM runs as a different user (e.g., systemd `User=vllm`), the JIT cache must be writable by that user. First-time startup will be slow.
78
+
79
+ **The preheat solution:** `vserve tune` runs a preheat step that starts vLLM briefly as the service user to build the JIT cache before benchmarking. This uses `sudo -u <service_user>` with the correct `CUDA_HOME`, `PATH`, and `HF_HOME` environment.
80
+
81
+ ### torch.compile cache
82
+
83
+ vLLM also uses `torch.compile` which has its own cache at `$VLLM_ROOT/.cache/vllm/torch_compile_cache/`. Same ownership considerations apply.
84
+
85
+ ### CUDA_HOME and PATH
86
+
87
+ The JIT compiler needs:
88
+ - `nvcc` accessible (CUDA toolkit bin directory in PATH)
89
+ - `CUDA_HOME` set to the CUDA toolkit root
90
+ - Matching CUDA version between the toolkit and the PyTorch build
91
+
92
+ If JIT compilation fails with `nvcc not found` or architecture mismatches, check:
93
+ ```bash
94
+ which nvcc
95
+ nvcc --version
96
+ python -c "import torch; print(torch.version.cuda)"
97
+ ```
98
+
99
+ ### ProtectSystem=strict breaks JIT
100
+
101
+ Do NOT use `ProtectSystem=strict` in the vLLM systemd unit — it makes `/usr` read-only, preventing nvcc from writing temporary files during JIT compilation. The `gpu-fan.service` can use it (fan control doesn't need JIT), but the main `vllm.service` cannot.
102
+
103
+ ## Driver Management
104
+
105
+ ### Which driver branch to use
106
+
107
+ | Branch | Status | Use |
108
+ |--------|--------|-----|
109
+ | 570.x | Old production | Pre-Blackwell only |
110
+ | 575.x | Early Blackwell | Known issues, avoid |
111
+ | 590.x | New-feature branch | Experimental, not for production |
112
+ | 595.x | **Current production** | Recommended for Blackwell |
113
+
114
+ Check your branch: `nvidia-smi` shows the driver version in the header.
115
+
116
+ ### Open vs. proprietary kernel modules
117
+
118
+ Blackwell GPUs (RTX 50-series, RTX PRO 5000/6000) **require open kernel modules**. The proprietary modules do not support Blackwell at all. Always use packages with `-open` suffix (e.g., `nvidia-driver-595-server-open` or `nvidia-open` from the CUDA repo).
119
+
120
+ ### nvidia-settings version mismatch
121
+
122
+ If `nvidia-settings --version` shows a different version than `nvidia-smi`, you have a partial driver upgrade. This can cause subtle issues. Fix by upgrading the full driver stack to match.
123
+
124
+ ## Headless Operation
125
+
126
+ ### Disabling the display manager
127
+
128
+ For 24/7 inference, disable the display manager to free GPU memory and simplify driver reloads:
129
+
130
+ ```bash
131
+ sudo systemctl disable gdm # or lightdm/sddm
132
+ sudo systemctl set-default multi-user.target
133
+ ```
134
+
135
+ GUI is still available on demand: `sudo systemctl start gdm`
136
+
137
+ ### GPU memory with no display
138
+
139
+ With no display manager: ~2 MiB GPU memory used (driver overhead only).
140
+ With Xorg/GDM running: ~15 MiB (Xorg frame buffer).
141
+
142
+ This is negligible for 48 GB, but matters for driver reload — fewer processes holding the GPU open means cleaner `rmmod`.
@@ -0,0 +1,47 @@
1
+ [project]
2
+ name = "vserve"
3
+ version = "0.1.0"
4
+ description = "CLI for managing vLLM inference on GPU workstations"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ requires-python = ">=3.12"
8
+ authors = [{ name = "Mohan Qiao" }]
9
+ classifiers = [
10
+ "Development Status :: 4 - Beta",
11
+ "Environment :: Console",
12
+ "Environment :: GPU :: NVIDIA CUDA",
13
+ "Intended Audience :: Developers",
14
+ "Intended Audience :: Science/Research",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python :: 3.12",
17
+ "Programming Language :: Python :: 3.13",
18
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
19
+ ]
20
+ dependencies = [
21
+ "typer>=0.15",
22
+ "rich>=14",
23
+ "huggingface-hub>=0.30",
24
+ "pyyaml>=6",
25
+ "nvidia-ml-py>=13.595.45",
26
+ ]
27
+
28
+ [dependency-groups]
29
+ dev = ["pytest>=8", "pytest-mock>=3", "ruff>=0.15", "mypy>=1.15", "types-PyYAML>=6", "types-requests>=2.33"]
30
+
31
+ [project.urls]
32
+ Homepage = "https://github.com/Gavin-Qiao/vserve"
33
+ Repository = "https://github.com/Gavin-Qiao/vserve"
34
+ Issues = "https://github.com/Gavin-Qiao/vserve/issues"
35
+
36
+ [project.scripts]
37
+ vserve = "vserve.cli:app"
38
+
39
+ [build-system]
40
+ requires = ["hatchling"]
41
+ build-backend = "hatchling.build"
42
+
43
+ [tool.hatch.build.targets.wheel]
44
+ packages = ["src/vserve"]
45
+
46
+ [tool.pytest.ini_options]
47
+ testpaths = ["tests"]
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"