vec-inf 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,10 +71,3 @@ VLLM_SHORT_TO_LONG_MAP = {
71
71
 
72
72
  # Required matching arguments for batch mode
73
73
  BATCH_MODE_REQUIRED_MATCHING_ARGS = ["venv", "log_dir"]
74
-
75
- # Required arguments for launching jobs that don't have a default value and their
76
- # corresponding environment variables
77
- REQUIRED_ARGS = {
78
- "account": "VEC_INF_ACCOUNT",
79
- "work_dir": "VEC_INF_WORK_DIR",
80
- }
@@ -78,5 +78,9 @@ RESOURCE_TYPE: TypeAlias = create_literal_type( # type: ignore[valid-type]
78
78
  _config["allowed_values"]["resource_type"]
79
79
  )
80
80
 
81
+ # Extract required arguments, for launching jobs that don't have a default value and
82
+ # their corresponding environment variables
83
+ REQUIRED_ARGS: dict[str, str] = _config["required_args"]
84
+
81
85
  # Extract default arguments
82
86
  DEFAULT_ARGS: dict[str, str] = _config["default_args"]
vec_inf/client/_utils.py CHANGED
@@ -14,9 +14,9 @@ from typing import Any, Optional, Union, cast
14
14
  import requests
15
15
  import yaml
16
16
 
17
- from vec_inf.client._client_vars import MODEL_READY_SIGNATURE, REQUIRED_ARGS
17
+ from vec_inf.client._client_vars import MODEL_READY_SIGNATURE
18
18
  from vec_inf.client._exceptions import MissingRequiredFieldsError
19
- from vec_inf.client._slurm_vars import CACHED_CONFIG_DIR
19
+ from vec_inf.client._slurm_vars import CACHED_CONFIG_DIR, REQUIRED_ARGS
20
20
  from vec_inf.client.config import ModelConfig
21
21
  from vec_inf.client.models import ModelStatus
22
22
 
@@ -15,6 +15,10 @@ allowed_values:
15
15
  partition: []
16
16
  resource_type: ["l40s", "h100"]
17
17
 
18
+ required_args:
19
+ account: "VEC_INF_ACCOUNT"
20
+ work_dir: "VEC_INF_WORK_DIR"
21
+
18
22
  default_args:
19
23
  cpus_per_task: "16"
20
24
  mem_per_node: "64G"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vec-inf
3
- Version: 0.7.0
3
+ Version: 0.7.1
4
4
  Summary: Efficient LLM inference on Slurm clusters using vLLM.
5
5
  Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
6
6
  License-Expression: MIT
@@ -66,7 +66,7 @@ You should see an output like the following:
66
66
 
67
67
  <img width="720" alt="launch_image" src="https://github.com/user-attachments/assets/c1e0c60c-cf7a-49ed-a426-fdb38ebf88ee" />
68
68
 
69
- **NOTE**: On Vector Killarney Cluster environment, the following fields are required:
69
+ **NOTE**: You can set the required fields in the environment configuration (`environment.yaml`), it's a mapping between required arguments and their corresponding environment variables. On the Vector **Killarney** Cluster environment, the required fields are:
70
70
  * `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
71
71
  * `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
72
72
 
@@ -96,6 +96,11 @@ Example:
96
96
  >>> status = client.get_status(job_id)
97
97
  >>> if status.status == ModelStatus.READY:
98
98
  ... print(f"Model is ready at {status.base_url}")
99
+ >>> # Alternatively, use wait_until_ready which will either return a StatusResponse or throw a ServerError
100
+ >>> try:
101
+ >>> status = wait_until_ready(job_id)
102
+ >>> except ServerError as e:
103
+ >>> print(f"Model launch failed: {e}")
99
104
  >>> client.shutdown_model(job_id)
100
105
  ```
101
106
 
@@ -146,6 +151,19 @@ Once the inference server is ready, you can start sending in inference requests.
146
151
  ## SSH tunnel from your local device
147
152
  If you want to run inference from your local device, you can open a SSH tunnel to your cluster environment like the following:
148
153
  ```bash
149
- ssh -L 8081:172.17.8.29:8081 username@v.vectorinstitute.ai -N
154
+ ssh -L 8081:10.1.1.29:8081 username@v.vectorinstitute.ai -N
155
+ ```
156
+ The example provided above is for the Vector Killarney cluster, change the variables accordingly for your environment. The IP address for the compute nodes on Killarney follow `10.1.1.XX` pattern, where `XX` is the GPU number (`kn029` -> `29` in this example).
157
+
158
+ ## Reference
159
+ If you found Vector Inference useful in your research or applications, please cite using the following BibTeX template:
160
+ ```
161
+ @software{vector_inference,
162
+ title = {Vector Inference: Efficient LLM inference on Slurm clusters using vLLM},
163
+ author = {Wang, Marshall},
164
+ organization = {Vector Institute},
165
+ year = {<YEAR_OF_RELEASE>},
166
+ version = {<VERSION_TAG>},
167
+ url = {https://github.com/VectorInstitute/vector-inference}
168
+ }
150
169
  ```
151
- Where the last number in the URL is the GPU number (gpu029 in this case). The example provided above is for the vector cluster, change the variables accordingly for your environment
@@ -7,21 +7,21 @@ vec_inf/cli/_helper.py,sha256=Jr9NnMhGflkx3YEfYCN1rMHQgUzMAAwlSx_BLH92tVM,16511
7
7
  vec_inf/cli/_utils.py,sha256=23vSbmvNOWY1-W1aOAwYqNDkDDmx-5UVlCiXAtxUZ8A,1057
8
8
  vec_inf/cli/_vars.py,sha256=V6DrJs_BuUa4yNcbBSSnMwpcyXwEBsizy3D0ubIg2fA,777
9
9
  vec_inf/client/__init__.py,sha256=OLlUJ4kL1R-Kh-nXNbvKlAZ3mtHcnozHprVufkVCNWk,739
10
- vec_inf/client/_client_vars.py,sha256=qt47xQyZX2YcBtxk5qqmsE6qM5c3m8E2RhRBa2AY068,2619
10
+ vec_inf/client/_client_vars.py,sha256=1D-bX9dS0-pFImLvgWt2hUnwJiz-VaxuLb2HIfPML8I,2408
11
11
  vec_inf/client/_exceptions.py,sha256=94Nx_5k1SriJNXzbdnwyXFZolyMutydU08Gsikawzzo,749
12
12
  vec_inf/client/_helper.py,sha256=P8A9JHRMzxJRl0dgTuv9xfOluEV3BthUM1KzQlWkR7E,35752
13
13
  vec_inf/client/_slurm_script_generator.py,sha256=d2NowdKMQR1lsVI_hw9ObKC3uSk8YJr75ZYRMkvp0RA,13354
14
14
  vec_inf/client/_slurm_templates.py,sha256=TAH-wQV4gP2CCwxP3BmShebohtSmlMstlJT9QK6n4Dc,8277
15
- vec_inf/client/_slurm_vars.py,sha256=9BGA4Y4dGzXez6FG4V53GsMlHb9xOj7W1d7ANjkTvSQ,2723
16
- vec_inf/client/_utils.py,sha256=aQoPFYUNjp0OGHDdvPu1oec_Eslv0PjtKAiW54WSgAo,12593
15
+ vec_inf/client/_slurm_vars.py,sha256=sgP__XhpE1K7pvOzVFmotUXmINYPcOuFP-zGaePT5Iw,2910
16
+ vec_inf/client/_utils.py,sha256=XamAz8-AJELgkXHrR082ptTsbHSiWI47SY6MlXA44rU,12593
17
17
  vec_inf/client/api.py,sha256=pkgNE37r7LzYBDjRGAKAh7rhOUMKHGwghJh6Hfb45TI,11681
18
18
  vec_inf/client/config.py,sha256=VU4h2iqL0rxYAqGw2HBF_l6QvvSDJy5M79IgX5G2PW4,5830
19
19
  vec_inf/client/models.py,sha256=qxLxsVoEhxNkuCmtABqs8In5erkwTZDK0wih7U2_U38,7296
20
20
  vec_inf/config/README.md,sha256=TvZOqZyTUaAFr71hC7GVgg6QUw80AXREyq8wS4D-F30,528
21
- vec_inf/config/environment.yaml,sha256=VBBlHx6zbYnzjwhWcsUI6m5Xqc-2KLPOr1oZ6GUlIWk,602
21
+ vec_inf/config/environment.yaml,sha256=oEDp85hUERJO9NNn4wYhcgunnmkln50GNHDzG_3isMw,678
22
22
  vec_inf/config/models.yaml,sha256=vzAOqEu6M_lXput83MAhNzj-aNGSBzjbC6LydOmNqxk,26248
23
- vec_inf-0.7.0.dist-info/METADATA,sha256=4JtnZxIZA1QXN6m5YsMEUWxb_HjKGgnNBFGf8Pe-IuI,9088
24
- vec_inf-0.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
- vec_inf-0.7.0.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
26
- vec_inf-0.7.0.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
27
- vec_inf-0.7.0.dist-info/RECORD,,
23
+ vec_inf-0.7.1.dist-info/METADATA,sha256=CJEnzc3VLXxJ_00I1ubtwNNZQjvafddxlJyoi_bSwpo,10047
24
+ vec_inf-0.7.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
+ vec_inf-0.7.1.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
26
+ vec_inf-0.7.1.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
27
+ vec_inf-0.7.1.dist-info/RECORD,,