vec-inf 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/cli/_cli.py +15 -1
- vec_inf/cli/_helper.py +44 -19
- vec_inf/client/_client_vars.py +0 -7
- vec_inf/client/_helper.py +66 -26
- vec_inf/client/_slurm_script_generator.py +36 -19
- vec_inf/client/_slurm_templates.py +20 -3
- vec_inf/client/_slurm_vars.py +4 -0
- vec_inf/client/_utils.py +56 -7
- vec_inf/client/api.py +8 -2
- vec_inf/client/models.py +6 -0
- vec_inf/config/environment.yaml +4 -0
- vec_inf/config/models.yaml +48 -99
- {vec_inf-0.7.0.dist-info → vec_inf-0.7.2.dist-info}/METADATA +25 -6
- vec_inf-0.7.2.dist-info/RECORD +27 -0
- vec_inf-0.7.0.dist-info/RECORD +0 -27
- {vec_inf-0.7.0.dist-info → vec_inf-0.7.2.dist-info}/WHEEL +0 -0
- {vec_inf-0.7.0.dist-info → vec_inf-0.7.2.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.7.0.dist-info → vec_inf-0.7.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vec-inf
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.2
|
|
4
4
|
Summary: Efficient LLM inference on Slurm clusters using vLLM.
|
|
5
5
|
Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -13,9 +13,10 @@ Requires-Dist: requests>=2.31.0
|
|
|
13
13
|
Requires-Dist: rich>=13.7.0
|
|
14
14
|
Provides-Extra: dev
|
|
15
15
|
Requires-Dist: cupy-cuda12x==12.1.0; extra == 'dev'
|
|
16
|
-
Requires-Dist:
|
|
16
|
+
Requires-Dist: flashinfer-python>=0.4.0; extra == 'dev'
|
|
17
|
+
Requires-Dist: ray[default]>=2.50.0; extra == 'dev'
|
|
18
|
+
Requires-Dist: sglang>=0.5.0; extra == 'dev'
|
|
17
19
|
Requires-Dist: torch>=2.7.0; extra == 'dev'
|
|
18
|
-
Requires-Dist: vllm-nccl-cu12<2.19,>=2.18; extra == 'dev'
|
|
19
20
|
Requires-Dist: vllm>=0.10.0; extra == 'dev'
|
|
20
21
|
Requires-Dist: xgrammar>=0.1.11; extra == 'dev'
|
|
21
22
|
Description-Content-Type: text/markdown
|
|
@@ -66,7 +67,7 @@ You should see an output like the following:
|
|
|
66
67
|
|
|
67
68
|
<img width="720" alt="launch_image" src="https://github.com/user-attachments/assets/c1e0c60c-cf7a-49ed-a426-fdb38ebf88ee" />
|
|
68
69
|
|
|
69
|
-
**NOTE**: On Vector Killarney Cluster environment, the
|
|
70
|
+
**NOTE**: You can set the required fields in the environment configuration (`environment.yaml`), it's a mapping between required arguments and their corresponding environment variables. On the Vector **Killarney** Cluster environment, the required fields are:
|
|
70
71
|
* `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
|
|
71
72
|
* `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
|
|
72
73
|
|
|
@@ -96,6 +97,11 @@ Example:
|
|
|
96
97
|
>>> status = client.get_status(job_id)
|
|
97
98
|
>>> if status.status == ModelStatus.READY:
|
|
98
99
|
... print(f"Model is ready at {status.base_url}")
|
|
100
|
+
>>> # Alternatively, use wait_until_ready which will either return a StatusResponse or throw a ServerError
|
|
101
|
+
>>> try:
|
|
102
|
+
>>> status = wait_until_ready(job_id)
|
|
103
|
+
>>> except ServerError as e:
|
|
104
|
+
>>> print(f"Model launch failed: {e}")
|
|
99
105
|
>>> client.shutdown_model(job_id)
|
|
100
106
|
```
|
|
101
107
|
|
|
@@ -146,6 +152,19 @@ Once the inference server is ready, you can start sending in inference requests.
|
|
|
146
152
|
## SSH tunnel from your local device
|
|
147
153
|
If you want to run inference from your local device, you can open a SSH tunnel to your cluster environment like the following:
|
|
148
154
|
```bash
|
|
149
|
-
ssh -L 8081:
|
|
155
|
+
ssh -L 8081:10.1.1.29:8081 username@v.vectorinstitute.ai -N
|
|
156
|
+
```
|
|
157
|
+
The example provided above is for the Vector Killarney cluster, change the variables accordingly for your environment. The IP address for the compute nodes on Killarney follow `10.1.1.XX` pattern, where `XX` is the GPU number (`kn029` -> `29` in this example).
|
|
158
|
+
|
|
159
|
+
## Reference
|
|
160
|
+
If you found Vector Inference useful in your research or applications, please cite using the following BibTeX template:
|
|
161
|
+
```
|
|
162
|
+
@software{vector_inference,
|
|
163
|
+
title = {Vector Inference: Efficient LLM inference on Slurm clusters using vLLM},
|
|
164
|
+
author = {Wang, Marshall},
|
|
165
|
+
organization = {Vector Institute},
|
|
166
|
+
year = {<YEAR_OF_RELEASE>},
|
|
167
|
+
version = {<VERSION_TAG>},
|
|
168
|
+
url = {https://github.com/VectorInstitute/vector-inference}
|
|
169
|
+
}
|
|
150
170
|
```
|
|
151
|
-
Where the last number in the URL is the GPU number (gpu029 in this case). The example provided above is for the vector cluster, change the variables accordingly for your environment
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
vec_inf/README.md,sha256=WyvjbSs5Eh5fp8u66bgOaO3FQKP2U7m_HbLgqTHs_ng,1322
|
|
2
|
+
vec_inf/__init__.py,sha256=bHwSIz9lebYuxIemni-lP0h3gwJHVbJnwExQKGJWw_Q,23
|
|
3
|
+
vec_inf/find_port.sh,sha256=bGQ6LYSFVSsfDIGatrSg5YvddbZfaPL0R-Bjo4KYD6I,1088
|
|
4
|
+
vec_inf/cli/__init__.py,sha256=5XIvGQCOnaGl73XMkwetjC-Ul3xuXGrWDXdYJ3aUzvU,27
|
|
5
|
+
vec_inf/cli/_cli.py,sha256=0YfxtPT_Nq5gvIol9eWmw5yW9AT1ghf_E49R9pD7UG4,16213
|
|
6
|
+
vec_inf/cli/_helper.py,sha256=0_onclvxxpDTp33ODYc19RbZ2aIhXuMTC9v19q8ZhIo,17473
|
|
7
|
+
vec_inf/cli/_utils.py,sha256=23vSbmvNOWY1-W1aOAwYqNDkDDmx-5UVlCiXAtxUZ8A,1057
|
|
8
|
+
vec_inf/cli/_vars.py,sha256=V6DrJs_BuUa4yNcbBSSnMwpcyXwEBsizy3D0ubIg2fA,777
|
|
9
|
+
vec_inf/client/__init__.py,sha256=OLlUJ4kL1R-Kh-nXNbvKlAZ3mtHcnozHprVufkVCNWk,739
|
|
10
|
+
vec_inf/client/_client_vars.py,sha256=1D-bX9dS0-pFImLvgWt2hUnwJiz-VaxuLb2HIfPML8I,2408
|
|
11
|
+
vec_inf/client/_exceptions.py,sha256=94Nx_5k1SriJNXzbdnwyXFZolyMutydU08Gsikawzzo,749
|
|
12
|
+
vec_inf/client/_helper.py,sha256=hb6m5TLwcGE0grCu5-UCUkWbByV-G5h8gA87Yzct6rk,37170
|
|
13
|
+
vec_inf/client/_slurm_script_generator.py,sha256=L6tqn71kNJ2I0xYipFh_ZxIAG8znpXhTpUxTU8LJIa4,13988
|
|
14
|
+
vec_inf/client/_slurm_templates.py,sha256=GxVNClkgggoJN2pT1AjK7CQCAErfKRMIs97Vlhxs9u8,9349
|
|
15
|
+
vec_inf/client/_slurm_vars.py,sha256=sgP__XhpE1K7pvOzVFmotUXmINYPcOuFP-zGaePT5Iw,2910
|
|
16
|
+
vec_inf/client/_utils.py,sha256=_ZBmic0XvJ4vpdIuXDi6KO5iL2rbhIpFQT01EWGItN4,14296
|
|
17
|
+
vec_inf/client/api.py,sha256=lkVWCme-HmMJMqp8JbtjkBVL_MSPsCC_IBL9FBw3Um8,12011
|
|
18
|
+
vec_inf/client/config.py,sha256=VU4h2iqL0rxYAqGw2HBF_l6QvvSDJy5M79IgX5G2PW4,5830
|
|
19
|
+
vec_inf/client/models.py,sha256=jGNPOj1uPPBV7xdGy3HFv2ZwpJOGCsU8qm7pE2Rnnes,7498
|
|
20
|
+
vec_inf/config/README.md,sha256=TvZOqZyTUaAFr71hC7GVgg6QUw80AXREyq8wS4D-F30,528
|
|
21
|
+
vec_inf/config/environment.yaml,sha256=oEDp85hUERJO9NNn4wYhcgunnmkln50GNHDzG_3isMw,678
|
|
22
|
+
vec_inf/config/models.yaml,sha256=PSDR29zI8xld32Vm6dhgCIRHPEkBhwQx7-d_uFlEAM8,24764
|
|
23
|
+
vec_inf-0.7.2.dist-info/METADATA,sha256=ljs9hao8q4igLERrjGL5u1vZ_n7DMrr8XnBHzybPE2Y,10099
|
|
24
|
+
vec_inf-0.7.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
25
|
+
vec_inf-0.7.2.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
|
|
26
|
+
vec_inf-0.7.2.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
|
|
27
|
+
vec_inf-0.7.2.dist-info/RECORD,,
|
vec_inf-0.7.0.dist-info/RECORD
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
vec_inf/README.md,sha256=WyvjbSs5Eh5fp8u66bgOaO3FQKP2U7m_HbLgqTHs_ng,1322
|
|
2
|
-
vec_inf/__init__.py,sha256=bHwSIz9lebYuxIemni-lP0h3gwJHVbJnwExQKGJWw_Q,23
|
|
3
|
-
vec_inf/find_port.sh,sha256=bGQ6LYSFVSsfDIGatrSg5YvddbZfaPL0R-Bjo4KYD6I,1088
|
|
4
|
-
vec_inf/cli/__init__.py,sha256=5XIvGQCOnaGl73XMkwetjC-Ul3xuXGrWDXdYJ3aUzvU,27
|
|
5
|
-
vec_inf/cli/_cli.py,sha256=xrYce8iP2Wo5dNflvUO2gIfkyjA4V_V8mpiaxnMDwkk,15813
|
|
6
|
-
vec_inf/cli/_helper.py,sha256=Jr9NnMhGflkx3YEfYCN1rMHQgUzMAAwlSx_BLH92tVM,16511
|
|
7
|
-
vec_inf/cli/_utils.py,sha256=23vSbmvNOWY1-W1aOAwYqNDkDDmx-5UVlCiXAtxUZ8A,1057
|
|
8
|
-
vec_inf/cli/_vars.py,sha256=V6DrJs_BuUa4yNcbBSSnMwpcyXwEBsizy3D0ubIg2fA,777
|
|
9
|
-
vec_inf/client/__init__.py,sha256=OLlUJ4kL1R-Kh-nXNbvKlAZ3mtHcnozHprVufkVCNWk,739
|
|
10
|
-
vec_inf/client/_client_vars.py,sha256=qt47xQyZX2YcBtxk5qqmsE6qM5c3m8E2RhRBa2AY068,2619
|
|
11
|
-
vec_inf/client/_exceptions.py,sha256=94Nx_5k1SriJNXzbdnwyXFZolyMutydU08Gsikawzzo,749
|
|
12
|
-
vec_inf/client/_helper.py,sha256=P8A9JHRMzxJRl0dgTuv9xfOluEV3BthUM1KzQlWkR7E,35752
|
|
13
|
-
vec_inf/client/_slurm_script_generator.py,sha256=d2NowdKMQR1lsVI_hw9ObKC3uSk8YJr75ZYRMkvp0RA,13354
|
|
14
|
-
vec_inf/client/_slurm_templates.py,sha256=TAH-wQV4gP2CCwxP3BmShebohtSmlMstlJT9QK6n4Dc,8277
|
|
15
|
-
vec_inf/client/_slurm_vars.py,sha256=9BGA4Y4dGzXez6FG4V53GsMlHb9xOj7W1d7ANjkTvSQ,2723
|
|
16
|
-
vec_inf/client/_utils.py,sha256=aQoPFYUNjp0OGHDdvPu1oec_Eslv0PjtKAiW54WSgAo,12593
|
|
17
|
-
vec_inf/client/api.py,sha256=pkgNE37r7LzYBDjRGAKAh7rhOUMKHGwghJh6Hfb45TI,11681
|
|
18
|
-
vec_inf/client/config.py,sha256=VU4h2iqL0rxYAqGw2HBF_l6QvvSDJy5M79IgX5G2PW4,5830
|
|
19
|
-
vec_inf/client/models.py,sha256=qxLxsVoEhxNkuCmtABqs8In5erkwTZDK0wih7U2_U38,7296
|
|
20
|
-
vec_inf/config/README.md,sha256=TvZOqZyTUaAFr71hC7GVgg6QUw80AXREyq8wS4D-F30,528
|
|
21
|
-
vec_inf/config/environment.yaml,sha256=VBBlHx6zbYnzjwhWcsUI6m5Xqc-2KLPOr1oZ6GUlIWk,602
|
|
22
|
-
vec_inf/config/models.yaml,sha256=vzAOqEu6M_lXput83MAhNzj-aNGSBzjbC6LydOmNqxk,26248
|
|
23
|
-
vec_inf-0.7.0.dist-info/METADATA,sha256=4JtnZxIZA1QXN6m5YsMEUWxb_HjKGgnNBFGf8Pe-IuI,9088
|
|
24
|
-
vec_inf-0.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
25
|
-
vec_inf-0.7.0.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
|
|
26
|
-
vec_inf-0.7.0.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
|
|
27
|
-
vec_inf-0.7.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|