vec-inf 0.7.0__tar.gz → 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {vec_inf-0.7.0 → vec_inf-0.7.1}/.github/workflows/code_checks.yml +1 -1
  2. {vec_inf-0.7.0 → vec_inf-0.7.1}/.github/workflows/docker.yml +1 -1
  3. {vec_inf-0.7.0 → vec_inf-0.7.1}/.github/workflows/docs.yml +2 -2
  4. {vec_inf-0.7.0 → vec_inf-0.7.1}/.github/workflows/publish.yml +1 -1
  5. {vec_inf-0.7.0 → vec_inf-0.7.1}/.github/workflows/unit_tests.yml +2 -2
  6. {vec_inf-0.7.0 → vec_inf-0.7.1}/.pre-commit-config.yaml +2 -2
  7. {vec_inf-0.7.0 → vec_inf-0.7.1}/PKG-INFO +22 -4
  8. {vec_inf-0.7.0 → vec_inf-0.7.1}/README.md +21 -3
  9. {vec_inf-0.7.0 → vec_inf-0.7.1}/docs/user_guide.md +3 -3
  10. {vec_inf-0.7.0 → vec_inf-0.7.1}/pyproject.toml +1 -1
  11. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/client/_client_vars.py +0 -7
  12. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/client/_slurm_vars.py +4 -0
  13. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/client/_utils.py +2 -2
  14. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/config/environment.yaml +4 -0
  15. {vec_inf-0.7.0 → vec_inf-0.7.1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  16. {vec_inf-0.7.0 → vec_inf-0.7.1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  17. {vec_inf-0.7.0 → vec_inf-0.7.1}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  18. {vec_inf-0.7.0 → vec_inf-0.7.1}/.github/ISSUE_TEMPLATE/model-request.md +0 -0
  19. {vec_inf-0.7.0 → vec_inf-0.7.1}/.github/dependabot.yml +0 -0
  20. {vec_inf-0.7.0 → vec_inf-0.7.1}/.github/pull_request_template.md +0 -0
  21. {vec_inf-0.7.0 → vec_inf-0.7.1}/.gitignore +0 -0
  22. {vec_inf-0.7.0 → vec_inf-0.7.1}/.python-version +0 -0
  23. {vec_inf-0.7.0 → vec_inf-0.7.1}/Dockerfile +0 -0
  24. {vec_inf-0.7.0 → vec_inf-0.7.1}/LICENSE +0 -0
  25. {vec_inf-0.7.0 → vec_inf-0.7.1}/MODEL_TRACKING.md +0 -0
  26. {vec_inf-0.7.0 → vec_inf-0.7.1}/codecov.yml +0 -0
  27. {vec_inf-0.7.0 → vec_inf-0.7.1}/docs/Makefile +0 -0
  28. {vec_inf-0.7.0 → vec_inf-0.7.1}/docs/api.md +0 -0
  29. {vec_inf-0.7.0 → vec_inf-0.7.1}/docs/assets/favicon-48x48.svg +0 -0
  30. {vec_inf-0.7.0 → vec_inf-0.7.1}/docs/assets/favicon.ico +0 -0
  31. {vec_inf-0.7.0 → vec_inf-0.7.1}/docs/assets/vector-logo.svg +0 -0
  32. {vec_inf-0.7.0 → vec_inf-0.7.1}/docs/contributing.md +0 -0
  33. {vec_inf-0.7.0 → vec_inf-0.7.1}/docs/index.md +0 -0
  34. {vec_inf-0.7.0 → vec_inf-0.7.1}/docs/make.bat +0 -0
  35. {vec_inf-0.7.0 → vec_inf-0.7.1}/docs/overrides/partials/copyright.html +0 -0
  36. {vec_inf-0.7.0 → vec_inf-0.7.1}/docs/overrides/partials/logo.html +0 -0
  37. {vec_inf-0.7.0 → vec_inf-0.7.1}/docs/stylesheets/extra.css +0 -0
  38. {vec_inf-0.7.0 → vec_inf-0.7.1}/examples/README.md +0 -0
  39. {vec_inf-0.7.0 → vec_inf-0.7.1}/examples/api/basic_usage.py +0 -0
  40. {vec_inf-0.7.0 → vec_inf-0.7.1}/examples/inference/llm/chat_completions.py +0 -0
  41. {vec_inf-0.7.0 → vec_inf-0.7.1}/examples/inference/llm/completions.py +0 -0
  42. {vec_inf-0.7.0 → vec_inf-0.7.1}/examples/inference/llm/completions.sh +0 -0
  43. {vec_inf-0.7.0 → vec_inf-0.7.1}/examples/inference/text_embedding/embeddings.py +0 -0
  44. {vec_inf-0.7.0 → vec_inf-0.7.1}/examples/inference/vlm/vision_completions.py +0 -0
  45. {vec_inf-0.7.0 → vec_inf-0.7.1}/examples/logits/logits.py +0 -0
  46. {vec_inf-0.7.0 → vec_inf-0.7.1}/examples/slurm_dependency/README.md +0 -0
  47. {vec_inf-0.7.0 → vec_inf-0.7.1}/examples/slurm_dependency/downstream_job.sbatch +0 -0
  48. {vec_inf-0.7.0 → vec_inf-0.7.1}/examples/slurm_dependency/run_downstream.py +0 -0
  49. {vec_inf-0.7.0 → vec_inf-0.7.1}/examples/slurm_dependency/run_workflow.sh +0 -0
  50. {vec_inf-0.7.0 → vec_inf-0.7.1}/mkdocs.yml +0 -0
  51. {vec_inf-0.7.0 → vec_inf-0.7.1}/profile/avg_throughput.py +0 -0
  52. {vec_inf-0.7.0 → vec_inf-0.7.1}/profile/gen.py +0 -0
  53. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/__init__.py +0 -0
  54. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/test_imports.py +0 -0
  55. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/vec_inf/__init__.py +0 -0
  56. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/vec_inf/cli/__init__.py +0 -0
  57. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/vec_inf/cli/test_cli.py +0 -0
  58. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/vec_inf/cli/test_helper.py +0 -0
  59. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/vec_inf/cli/test_utils.py +0 -0
  60. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/vec_inf/client/__init__.py +0 -0
  61. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/vec_inf/client/test_api.py +0 -0
  62. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/vec_inf/client/test_examples.py +0 -0
  63. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/vec_inf/client/test_helper.py +0 -0
  64. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/vec_inf/client/test_models.py +0 -0
  65. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/vec_inf/client/test_slurm_script_generator.py +0 -0
  66. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/vec_inf/client/test_utils.py +0 -0
  67. {vec_inf-0.7.0 → vec_inf-0.7.1}/tests/vec_inf/client/test_vars.env +0 -0
  68. {vec_inf-0.7.0 → vec_inf-0.7.1}/uv.lock +0 -0
  69. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/README.md +0 -0
  70. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/__init__.py +0 -0
  71. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/cli/__init__.py +0 -0
  72. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/cli/_cli.py +0 -0
  73. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/cli/_helper.py +0 -0
  74. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/cli/_utils.py +0 -0
  75. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/cli/_vars.py +0 -0
  76. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/client/__init__.py +0 -0
  77. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/client/_exceptions.py +0 -0
  78. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/client/_helper.py +0 -0
  79. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/client/_slurm_script_generator.py +0 -0
  80. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/client/_slurm_templates.py +0 -0
  81. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/client/api.py +0 -0
  82. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/client/config.py +0 -0
  83. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/client/models.py +0 -0
  84. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/config/README.md +0 -0
  85. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/config/models.yaml +0 -0
  86. {vec_inf-0.7.0 → vec_inf-0.7.1}/vec_inf/find_port.sh +0 -0
  87. {vec_inf-0.7.0 → vec_inf-0.7.1}/venv.sh +0 -0
@@ -36,7 +36,7 @@ jobs:
36
36
  version: "0.5.21"
37
37
  enable-cache: true
38
38
  - name: "Set up Python"
39
- uses: actions/setup-python@v5.5.0
39
+ uses: actions/setup-python@v6
40
40
  with:
41
41
  python-version-file: ".python-version"
42
42
  - name: Install the project
@@ -33,7 +33,7 @@ jobs:
33
33
  echo "version=$VERSION" >> $GITHUB_OUTPUT
34
34
 
35
35
  - name: Log in to Docker Hub
36
- uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1
36
+ uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
37
37
  with:
38
38
  username: ${{ secrets.DOCKER_USERNAME }}
39
39
  password: ${{ secrets.DOCKER_PASSWORD }}
@@ -62,7 +62,7 @@ jobs:
62
62
  enable-cache: true
63
63
 
64
64
  - name: Set up Python
65
- uses: actions/setup-python@v5
65
+ uses: actions/setup-python@v6
66
66
  with:
67
67
  python-version-file: ".python-version"
68
68
 
@@ -99,7 +99,7 @@ jobs:
99
99
  enable-cache: true
100
100
 
101
101
  - name: Set up Python
102
- uses: actions/setup-python@v5
102
+ uses: actions/setup-python@v6
103
103
  with:
104
104
  python-version-file: ".python-version"
105
105
 
@@ -21,7 +21,7 @@ jobs:
21
21
  version: "0.6.6"
22
22
  enable-cache: true
23
23
 
24
- - uses: actions/setup-python@v5.5.0
24
+ - uses: actions/setup-python@v6
25
25
  with:
26
26
  python-version: '3.10'
27
27
 
@@ -53,7 +53,7 @@ jobs:
53
53
  enable-cache: true
54
54
 
55
55
  - name: "Set up Python ${{ matrix.python-version }}"
56
- uses: actions/setup-python@v5.5.0
56
+ uses: actions/setup-python@v6
57
57
  with:
58
58
  python-version: ${{ matrix.python-version }}
59
59
 
@@ -76,7 +76,7 @@ jobs:
76
76
  gpg --keyserver keyserver.ubuntu.com --recv-keys 806BB28AED779869
77
77
 
78
78
  - name: Upload coverage to Codecov
79
- uses: codecov/codecov-action@v5.5.0
79
+ uses: codecov/codecov-action@v5.5.1
80
80
  with:
81
81
  token: ${{ secrets.CODECOV_TOKEN }}
82
82
  file: ./coverage.xml
@@ -17,7 +17,7 @@ repos:
17
17
  - id: check-toml
18
18
 
19
19
  - repo: https://github.com/astral-sh/ruff-pre-commit
20
- rev: 'v0.12.10'
20
+ rev: 'v0.13.2'
21
21
  hooks:
22
22
  - id: ruff
23
23
  args: [--fix, --exit-non-zero-on-fix]
@@ -26,7 +26,7 @@ repos:
26
26
  types_or: [python, jupyter]
27
27
 
28
28
  - repo: https://github.com/pre-commit/mirrors-mypy
29
- rev: v1.17.1
29
+ rev: v1.18.2
30
30
  hooks:
31
31
  - id: mypy
32
32
  entry: python3 -m mypy --config-file pyproject.toml
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vec-inf
3
- Version: 0.7.0
3
+ Version: 0.7.1
4
4
  Summary: Efficient LLM inference on Slurm clusters using vLLM.
5
5
  Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
6
6
  License-Expression: MIT
@@ -66,7 +66,7 @@ You should see an output like the following:
66
66
 
67
67
  <img width="720" alt="launch_image" src="https://github.com/user-attachments/assets/c1e0c60c-cf7a-49ed-a426-fdb38ebf88ee" />
68
68
 
69
- **NOTE**: On Vector Killarney Cluster environment, the following fields are required:
69
+ **NOTE**: You can set the required fields in the environment configuration (`environment.yaml`), it's a mapping between required arguments and their corresponding environment variables. On the Vector **Killarney** Cluster environment, the required fields are:
70
70
  * `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
71
71
  * `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
72
72
 
@@ -96,6 +96,11 @@ Example:
96
96
  >>> status = client.get_status(job_id)
97
97
  >>> if status.status == ModelStatus.READY:
98
98
  ... print(f"Model is ready at {status.base_url}")
99
+ >>> # Alternatively, use wait_until_ready which will either return a StatusResponse or throw a ServerError
100
+ >>> try:
101
+ >>> status = wait_until_ready(job_id)
102
+ >>> except ServerError as e:
103
+ >>> print(f"Model launch failed: {e}")
99
104
  >>> client.shutdown_model(job_id)
100
105
  ```
101
106
 
@@ -146,6 +151,19 @@ Once the inference server is ready, you can start sending in inference requests.
146
151
  ## SSH tunnel from your local device
147
152
  If you want to run inference from your local device, you can open a SSH tunnel to your cluster environment like the following:
148
153
  ```bash
149
- ssh -L 8081:172.17.8.29:8081 username@v.vectorinstitute.ai -N
154
+ ssh -L 8081:10.1.1.29:8081 username@v.vectorinstitute.ai -N
155
+ ```
156
+ The example provided above is for the Vector Killarney cluster, change the variables accordingly for your environment. The IP address for the compute nodes on Killarney follow `10.1.1.XX` pattern, where `XX` is the GPU number (`kn029` -> `29` in this example).
157
+
158
+ ## Reference
159
+ If you found Vector Inference useful in your research or applications, please cite using the following BibTeX template:
160
+ ```
161
+ @software{vector_inference,
162
+ title = {Vector Inference: Efficient LLM inference on Slurm clusters using vLLM},
163
+ author = {Wang, Marshall},
164
+ organization = {Vector Institute},
165
+ year = {<YEAR_OF_RELEASE>},
166
+ version = {<VERSION_TAG>},
167
+ url = {https://github.com/VectorInstitute/vector-inference}
168
+ }
150
169
  ```
151
- Where the last number in the URL is the GPU number (gpu029 in this case). The example provided above is for the vector cluster, change the variables accordingly for your environment
@@ -44,7 +44,7 @@ You should see an output like the following:
44
44
 
45
45
  <img width="720" alt="launch_image" src="https://github.com/user-attachments/assets/c1e0c60c-cf7a-49ed-a426-fdb38ebf88ee" />
46
46
 
47
- **NOTE**: On Vector Killarney Cluster environment, the following fields are required:
47
+ **NOTE**: You can set the required fields in the environment configuration (`environment.yaml`), it's a mapping between required arguments and their corresponding environment variables. On the Vector **Killarney** Cluster environment, the required fields are:
48
48
  * `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
49
49
  * `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
50
50
 
@@ -74,6 +74,11 @@ Example:
74
74
  >>> status = client.get_status(job_id)
75
75
  >>> if status.status == ModelStatus.READY:
76
76
  ... print(f"Model is ready at {status.base_url}")
77
+ >>> # Alternatively, use wait_until_ready which will either return a StatusResponse or throw a ServerError
78
+ >>> try:
79
+ >>> status = wait_until_ready(job_id)
80
+ >>> except ServerError as e:
81
+ >>> print(f"Model launch failed: {e}")
77
82
  >>> client.shutdown_model(job_id)
78
83
  ```
79
84
 
@@ -124,6 +129,19 @@ Once the inference server is ready, you can start sending in inference requests.
124
129
  ## SSH tunnel from your local device
125
130
  If you want to run inference from your local device, you can open a SSH tunnel to your cluster environment like the following:
126
131
  ```bash
127
- ssh -L 8081:172.17.8.29:8081 username@v.vectorinstitute.ai -N
132
+ ssh -L 8081:10.1.1.29:8081 username@v.vectorinstitute.ai -N
133
+ ```
134
+ The example provided above is for the Vector Killarney cluster, change the variables accordingly for your environment. The IP address for the compute nodes on Killarney follow `10.1.1.XX` pattern, where `XX` is the GPU number (`kn029` -> `29` in this example).
135
+
136
+ ## Reference
137
+ If you found Vector Inference useful in your research or applications, please cite using the following BibTeX template:
138
+ ```
139
+ @software{vector_inference,
140
+ title = {Vector Inference: Efficient LLM inference on Slurm clusters using vLLM},
141
+ author = {Wang, Marshall},
142
+ organization = {Vector Institute},
143
+ year = {<YEAR_OF_RELEASE>},
144
+ version = {<VERSION_TAG>},
145
+ url = {https://github.com/VectorInstitute/vector-inference}
146
+ }
128
147
  ```
129
- Where the last number in the URL is the GPU number (gpu029 in this case). The example provided above is for the vector cluster, change the variables accordingly for your environment
@@ -37,7 +37,7 @@ You should see an output like the following:
37
37
  └─────────────────────────┴───────────────────────────────────────────┘
38
38
  ```
39
39
 
40
- **NOTE**: On Vector Killarney Cluster environment, the following fields are required:
40
+ **NOTE**: You can set the required fields in the environment configuration (`environment.yaml`), it's a mapping between required arguments and their corresponding environment variables. On the Vector **Killarney** Cluster environment, the required fields are:
41
41
  * `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
42
42
  * `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
43
43
 
@@ -334,9 +334,9 @@ Once the inference server is ready, you can start sending in inference requests.
334
334
 
335
335
  If you want to run inference from your local device, you can open a SSH tunnel to your cluster environment like the following:
336
336
  ```bash
337
- ssh -L 8081:172.17.8.29:8081 username@v.vectorinstitute.ai -N
337
+ ssh -L 8081:10.1.1.29:8081 username@v.vectorinstitute.ai -N
338
338
  ```
339
- Where the last number in the URL is the GPU number (gpu029 in this case). The example provided above is for the vector cluster, change the variables accordingly for your environment
339
+ The example provided above is for the Vector Killarney cluster, change the variables accordingly for your environment. The IP address for the compute nodes on Killarney follow `10.1.1.XX` pattern, where `XX` is the GPU number (`kn029` -> `29` in this example). Similarly, for Bon Echo it's `172.17.8.XX`, where `XX` is from `gpuXX`.
340
340
 
341
341
  ## Python API Usage
342
342
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "vec-inf"
3
- version = "0.7.0"
3
+ version = "0.7.1"
4
4
  description = "Efficient LLM inference on Slurm clusters using vLLM."
5
5
  readme = "README.md"
6
6
  authors = [{name = "Marshall Wang", email = "marshall.wang@vectorinstitute.ai"}]
@@ -71,10 +71,3 @@ VLLM_SHORT_TO_LONG_MAP = {
71
71
 
72
72
  # Required matching arguments for batch mode
73
73
  BATCH_MODE_REQUIRED_MATCHING_ARGS = ["venv", "log_dir"]
74
-
75
- # Required arguments for launching jobs that don't have a default value and their
76
- # corresponding environment variables
77
- REQUIRED_ARGS = {
78
- "account": "VEC_INF_ACCOUNT",
79
- "work_dir": "VEC_INF_WORK_DIR",
80
- }
@@ -78,5 +78,9 @@ RESOURCE_TYPE: TypeAlias = create_literal_type( # type: ignore[valid-type]
78
78
  _config["allowed_values"]["resource_type"]
79
79
  )
80
80
 
81
+ # Extract required arguments, for launching jobs that don't have a default value and
82
+ # their corresponding environment variables
83
+ REQUIRED_ARGS: dict[str, str] = _config["required_args"]
84
+
81
85
  # Extract default arguments
82
86
  DEFAULT_ARGS: dict[str, str] = _config["default_args"]
@@ -14,9 +14,9 @@ from typing import Any, Optional, Union, cast
14
14
  import requests
15
15
  import yaml
16
16
 
17
- from vec_inf.client._client_vars import MODEL_READY_SIGNATURE, REQUIRED_ARGS
17
+ from vec_inf.client._client_vars import MODEL_READY_SIGNATURE
18
18
  from vec_inf.client._exceptions import MissingRequiredFieldsError
19
- from vec_inf.client._slurm_vars import CACHED_CONFIG_DIR
19
+ from vec_inf.client._slurm_vars import CACHED_CONFIG_DIR, REQUIRED_ARGS
20
20
  from vec_inf.client.config import ModelConfig
21
21
  from vec_inf.client.models import ModelStatus
22
22
 
@@ -15,6 +15,10 @@ allowed_values:
15
15
  partition: []
16
16
  resource_type: ["l40s", "h100"]
17
17
 
18
+ required_args:
19
+ account: "VEC_INF_ACCOUNT"
20
+ work_dir: "VEC_INF_WORK_DIR"
21
+
18
22
  default_args:
19
23
  cpus_per_task: "16"
20
24
  mem_per_node: "64G"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes