vec-inf 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/README.md +2 -1
- vec_inf/cli/_cli.py +39 -10
- vec_inf/cli/_helper.py +100 -19
- vec_inf/client/_helper.py +80 -31
- vec_inf/client/_slurm_script_generator.py +58 -30
- vec_inf/client/_slurm_templates.py +27 -12
- vec_inf/client/_utils.py +58 -6
- vec_inf/client/api.py +55 -2
- vec_inf/client/models.py +6 -0
- vec_inf/config/models.yaml +47 -99
- vec_inf/find_port.sh +10 -1
- {vec_inf-0.7.1.dist-info → vec_inf-0.7.3.dist-info}/METADATA +7 -6
- vec_inf-0.7.3.dist-info/RECORD +27 -0
- {vec_inf-0.7.1.dist-info → vec_inf-0.7.3.dist-info}/WHEEL +1 -1
- vec_inf-0.7.1.dist-info/RECORD +0 -27
- {vec_inf-0.7.1.dist-info → vec_inf-0.7.3.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.7.1.dist-info → vec_inf-0.7.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vec-inf
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.3
|
|
4
4
|
Summary: Efficient LLM inference on Slurm clusters using vLLM.
|
|
5
5
|
Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -13,9 +13,10 @@ Requires-Dist: requests>=2.31.0
|
|
|
13
13
|
Requires-Dist: rich>=13.7.0
|
|
14
14
|
Provides-Extra: dev
|
|
15
15
|
Requires-Dist: cupy-cuda12x==12.1.0; extra == 'dev'
|
|
16
|
-
Requires-Dist:
|
|
16
|
+
Requires-Dist: flashinfer-python>=0.4.0; extra == 'dev'
|
|
17
|
+
Requires-Dist: ray[default]>=2.50.0; extra == 'dev'
|
|
18
|
+
Requires-Dist: sglang>=0.5.0; extra == 'dev'
|
|
17
19
|
Requires-Dist: torch>=2.7.0; extra == 'dev'
|
|
18
|
-
Requires-Dist: vllm-nccl-cu12<2.19,>=2.18; extra == 'dev'
|
|
19
20
|
Requires-Dist: vllm>=0.10.0; extra == 'dev'
|
|
20
21
|
Requires-Dist: xgrammar>=0.1.11; extra == 'dev'
|
|
21
22
|
Description-Content-Type: text/markdown
|
|
@@ -29,7 +30,7 @@ Description-Content-Type: text/markdown
|
|
|
29
30
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
|
|
30
31
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
|
|
31
32
|
[](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
|
|
32
|
-
[](https://docs.vllm.ai/en/v0.11.0/)
|
|
33
34
|

|
|
34
35
|
|
|
35
36
|
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
|
|
@@ -42,7 +43,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
|
|
|
42
43
|
```bash
|
|
43
44
|
pip install vec-inf
|
|
44
45
|
```
|
|
45
|
-
Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.
|
|
46
|
+
Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
|
|
46
47
|
|
|
47
48
|
If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
|
|
48
49
|
* Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
|
|
@@ -75,7 +76,7 @@ Models that are already supported by `vec-inf` would be launched using the cache
|
|
|
75
76
|
#### Other commands
|
|
76
77
|
|
|
77
78
|
* `batch-launch`: Launch multiple model inference servers at once, currently ONLY single node models supported,
|
|
78
|
-
* `status`: Check the
|
|
79
|
+
* `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
|
|
79
80
|
* `metrics`: Streams performance metrics to the console.
|
|
80
81
|
* `shutdown`: Shutdown a model by providing its Slurm job ID.
|
|
81
82
|
* `list`: List all available model names, or view the default/cached configuration of a specific model.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
vec_inf/README.md,sha256=GpKnty9u1b06cPT2Ce_5v0LBucmXOQt6Nl4OJKvjf68,1410
|
|
2
|
+
vec_inf/__init__.py,sha256=bHwSIz9lebYuxIemni-lP0h3gwJHVbJnwExQKGJWw_Q,23
|
|
3
|
+
vec_inf/find_port.sh,sha256=HHx1kg-TIoPZu0u55S4T5jl8MDV4_mnqh4Y7r_quyWw,1358
|
|
4
|
+
vec_inf/cli/__init__.py,sha256=5XIvGQCOnaGl73XMkwetjC-Ul3xuXGrWDXdYJ3aUzvU,27
|
|
5
|
+
vec_inf/cli/_cli.py,sha256=9EzRpOFJVd1_g0G-em7DlNJFZoKb-FvBboqSOUanoxU,16787
|
|
6
|
+
vec_inf/cli/_helper.py,sha256=q8ysD0g_hgKg_6emZZNNAhYR90SqvJJMYkPAB0Kj6gc,19177
|
|
7
|
+
vec_inf/cli/_utils.py,sha256=23vSbmvNOWY1-W1aOAwYqNDkDDmx-5UVlCiXAtxUZ8A,1057
|
|
8
|
+
vec_inf/cli/_vars.py,sha256=V6DrJs_BuUa4yNcbBSSnMwpcyXwEBsizy3D0ubIg2fA,777
|
|
9
|
+
vec_inf/client/__init__.py,sha256=OLlUJ4kL1R-Kh-nXNbvKlAZ3mtHcnozHprVufkVCNWk,739
|
|
10
|
+
vec_inf/client/_client_vars.py,sha256=1D-bX9dS0-pFImLvgWt2hUnwJiz-VaxuLb2HIfPML8I,2408
|
|
11
|
+
vec_inf/client/_exceptions.py,sha256=94Nx_5k1SriJNXzbdnwyXFZolyMutydU08Gsikawzzo,749
|
|
12
|
+
vec_inf/client/_helper.py,sha256=R5fOfmRK1I9H6ta6-5hhWwr12dhjZD8O0FvJOgfTkaA,37565
|
|
13
|
+
vec_inf/client/_slurm_script_generator.py,sha256=P60W36xvVNi33iK-GqK_StZ6zpJnCr8gLnY6AEn1HKE,14182
|
|
14
|
+
vec_inf/client/_slurm_templates.py,sha256=Zjl47mNxhOTxFDAa61n9o0NAZ_TwO2KF_LvYN3JG7Mk,9349
|
|
15
|
+
vec_inf/client/_slurm_vars.py,sha256=sgP__XhpE1K7pvOzVFmotUXmINYPcOuFP-zGaePT5Iw,2910
|
|
16
|
+
vec_inf/client/_utils.py,sha256=bxn5acjDEtojEuLf8vaBv85kc5TwtYw_gIMhNKcD0M4,14405
|
|
17
|
+
vec_inf/client/api.py,sha256=-vazAWvZp0vsn4jB6R-WdUo5eZ5bR-XJqU6r6qOL16A,13596
|
|
18
|
+
vec_inf/client/config.py,sha256=VU4h2iqL0rxYAqGw2HBF_l6QvvSDJy5M79IgX5G2PW4,5830
|
|
19
|
+
vec_inf/client/models.py,sha256=jGNPOj1uPPBV7xdGy3HFv2ZwpJOGCsU8qm7pE2Rnnes,7498
|
|
20
|
+
vec_inf/config/README.md,sha256=TvZOqZyTUaAFr71hC7GVgg6QUw80AXREyq8wS4D-F30,528
|
|
21
|
+
vec_inf/config/environment.yaml,sha256=oEDp85hUERJO9NNn4wYhcgunnmkln50GNHDzG_3isMw,678
|
|
22
|
+
vec_inf/config/models.yaml,sha256=1zPnW_1I_ltLk8wAoVNLvywQ1htvn0yzdqfHEBFDthg,24730
|
|
23
|
+
vec_inf-0.7.3.dist-info/METADATA,sha256=b-qth5Y_KY6FOj5ghuRwImLz6RBu2x3mXUrfmyqXpJ8,10122
|
|
24
|
+
vec_inf-0.7.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
25
|
+
vec_inf-0.7.3.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
|
|
26
|
+
vec_inf-0.7.3.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
|
|
27
|
+
vec_inf-0.7.3.dist-info/RECORD,,
|
vec_inf-0.7.1.dist-info/RECORD
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
vec_inf/README.md,sha256=WyvjbSs5Eh5fp8u66bgOaO3FQKP2U7m_HbLgqTHs_ng,1322
|
|
2
|
-
vec_inf/__init__.py,sha256=bHwSIz9lebYuxIemni-lP0h3gwJHVbJnwExQKGJWw_Q,23
|
|
3
|
-
vec_inf/find_port.sh,sha256=bGQ6LYSFVSsfDIGatrSg5YvddbZfaPL0R-Bjo4KYD6I,1088
|
|
4
|
-
vec_inf/cli/__init__.py,sha256=5XIvGQCOnaGl73XMkwetjC-Ul3xuXGrWDXdYJ3aUzvU,27
|
|
5
|
-
vec_inf/cli/_cli.py,sha256=xrYce8iP2Wo5dNflvUO2gIfkyjA4V_V8mpiaxnMDwkk,15813
|
|
6
|
-
vec_inf/cli/_helper.py,sha256=Jr9NnMhGflkx3YEfYCN1rMHQgUzMAAwlSx_BLH92tVM,16511
|
|
7
|
-
vec_inf/cli/_utils.py,sha256=23vSbmvNOWY1-W1aOAwYqNDkDDmx-5UVlCiXAtxUZ8A,1057
|
|
8
|
-
vec_inf/cli/_vars.py,sha256=V6DrJs_BuUa4yNcbBSSnMwpcyXwEBsizy3D0ubIg2fA,777
|
|
9
|
-
vec_inf/client/__init__.py,sha256=OLlUJ4kL1R-Kh-nXNbvKlAZ3mtHcnozHprVufkVCNWk,739
|
|
10
|
-
vec_inf/client/_client_vars.py,sha256=1D-bX9dS0-pFImLvgWt2hUnwJiz-VaxuLb2HIfPML8I,2408
|
|
11
|
-
vec_inf/client/_exceptions.py,sha256=94Nx_5k1SriJNXzbdnwyXFZolyMutydU08Gsikawzzo,749
|
|
12
|
-
vec_inf/client/_helper.py,sha256=P8A9JHRMzxJRl0dgTuv9xfOluEV3BthUM1KzQlWkR7E,35752
|
|
13
|
-
vec_inf/client/_slurm_script_generator.py,sha256=d2NowdKMQR1lsVI_hw9ObKC3uSk8YJr75ZYRMkvp0RA,13354
|
|
14
|
-
vec_inf/client/_slurm_templates.py,sha256=TAH-wQV4gP2CCwxP3BmShebohtSmlMstlJT9QK6n4Dc,8277
|
|
15
|
-
vec_inf/client/_slurm_vars.py,sha256=sgP__XhpE1K7pvOzVFmotUXmINYPcOuFP-zGaePT5Iw,2910
|
|
16
|
-
vec_inf/client/_utils.py,sha256=XamAz8-AJELgkXHrR082ptTsbHSiWI47SY6MlXA44rU,12593
|
|
17
|
-
vec_inf/client/api.py,sha256=pkgNE37r7LzYBDjRGAKAh7rhOUMKHGwghJh6Hfb45TI,11681
|
|
18
|
-
vec_inf/client/config.py,sha256=VU4h2iqL0rxYAqGw2HBF_l6QvvSDJy5M79IgX5G2PW4,5830
|
|
19
|
-
vec_inf/client/models.py,sha256=qxLxsVoEhxNkuCmtABqs8In5erkwTZDK0wih7U2_U38,7296
|
|
20
|
-
vec_inf/config/README.md,sha256=TvZOqZyTUaAFr71hC7GVgg6QUw80AXREyq8wS4D-F30,528
|
|
21
|
-
vec_inf/config/environment.yaml,sha256=oEDp85hUERJO9NNn4wYhcgunnmkln50GNHDzG_3isMw,678
|
|
22
|
-
vec_inf/config/models.yaml,sha256=vzAOqEu6M_lXput83MAhNzj-aNGSBzjbC6LydOmNqxk,26248
|
|
23
|
-
vec_inf-0.7.1.dist-info/METADATA,sha256=CJEnzc3VLXxJ_00I1ubtwNNZQjvafddxlJyoi_bSwpo,10047
|
|
24
|
-
vec_inf-0.7.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
25
|
-
vec_inf-0.7.1.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
|
|
26
|
-
vec_inf-0.7.1.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
|
|
27
|
-
vec_inf-0.7.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|