vantage-agent 3.4.0a6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vantage_agent-3.4.0a6/.gitignore +130 -0
- vantage_agent-3.4.0a6/CHANGELOG.rst +77 -0
- vantage_agent-3.4.0a6/Dockerfile +21 -0
- vantage_agent-3.4.0a6/PKG-INFO +70 -0
- vantage_agent-3.4.0a6/README.md +41 -0
- vantage_agent-3.4.0a6/justfile +87 -0
- vantage_agent-3.4.0a6/pyproject.toml +98 -0
- vantage_agent-3.4.0a6/slurm-222e7205-4044-46e4-9731-5fe480aaa919.conf +10 -0
- vantage_agent-3.4.0a6/snap/hooks/configure +91 -0
- vantage_agent-3.4.0a6/snap/hooks/install +11 -0
- vantage_agent-3.4.0a6/snap/hooks/remove +3 -0
- vantage_agent-3.4.0a6/snap/snapcraft.yaml +99 -0
- vantage_agent-3.4.0a6/tests/conftest.py +28 -0
- vantage_agent-3.4.0a6/tests/test_helpers.py +1085 -0
- vantage_agent-3.4.0a6/tests/test_logger.py +113 -0
- vantage_agent-3.4.0a6/tests/test_logicals/test_health.py +77 -0
- vantage_agent-3.4.0a6/tests/test_logicals/test_queue_actions.py +606 -0
- vantage_agent-3.4.0a6/tests/test_logicals/test_scontrol.py +285 -0
- vantage_agent-3.4.0a6/tests/test_logicals/test_squeue.py +144 -0
- vantage_agent-3.4.0a6/tests/test_logicals/test_sync_partitions.py +740 -0
- vantage_agent-3.4.0a6/tests/test_logicals/test_sync_teams.py +324 -0
- vantage_agent-3.4.0a6/tests/test_logicals/test_update.py +187 -0
- vantage_agent-3.4.0a6/tests/test_logicals/test_vantage_api.py +303 -0
- vantage_agent-3.4.0a6/tests/test_main.py +72 -0
- vantage_agent-3.4.0a6/tests/test_plugin.py +50 -0
- vantage_agent-3.4.0a6/tests/test_scheduler.py +80 -0
- vantage_agent-3.4.0a6/tests/test_sentry.py +69 -0
- vantage_agent-3.4.0a6/tests/test_settings.py +73 -0
- vantage_agent-3.4.0a6/tests/test_tasks.py +171 -0
- vantage_agent-3.4.0a6/uv.lock +697 -0
- vantage_agent-3.4.0a6/vantage_agent/__init__.py +5 -0
- vantage_agent-3.4.0a6/vantage_agent/exceptions.py +101 -0
- vantage_agent-3.4.0a6/vantage_agent/helpers.py +138 -0
- vantage_agent-3.4.0a6/vantage_agent/logger.py +50 -0
- vantage_agent-3.4.0a6/vantage_agent/logicals/__init__.py +0 -0
- vantage_agent-3.4.0a6/vantage_agent/logicals/health.py +35 -0
- vantage_agent-3.4.0a6/vantage_agent/logicals/queue_actions.py +204 -0
- vantage_agent-3.4.0a6/vantage_agent/logicals/scontrol.py +87 -0
- vantage_agent-3.4.0a6/vantage_agent/logicals/squeue.py +40 -0
- vantage_agent-3.4.0a6/vantage_agent/logicals/sync_partitions.py +274 -0
- vantage_agent-3.4.0a6/vantage_agent/logicals/sync_teams.py +156 -0
- vantage_agent-3.4.0a6/vantage_agent/logicals/update.py +141 -0
- vantage_agent-3.4.0a6/vantage_agent/logicals/vantage_api.py +151 -0
- vantage_agent-3.4.0a6/vantage_agent/main.py +38 -0
- vantage_agent-3.4.0a6/vantage_agent/plugin.py +35 -0
- vantage_agent-3.4.0a6/vantage_agent/scheduler.py +48 -0
- vantage_agent-3.4.0a6/vantage_agent/sentry.py +32 -0
- vantage_agent-3.4.0a6/vantage_agent/settings.py +99 -0
- vantage_agent-3.4.0a6/vantage_agent/tasks.py +73 -0
- vantage_agent-3.4.0a6/vantage_agent/vantage_api_client.py +126 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
*.snap
|
|
3
|
+
__pycache__/
|
|
4
|
+
*.py[cod]
|
|
5
|
+
*$py.class
|
|
6
|
+
|
|
7
|
+
# C extensions
|
|
8
|
+
*.so
|
|
9
|
+
|
|
10
|
+
# Distribution / packaging
|
|
11
|
+
.Python
|
|
12
|
+
build/
|
|
13
|
+
develop-eggs/
|
|
14
|
+
dist/
|
|
15
|
+
downloads/
|
|
16
|
+
eggs/
|
|
17
|
+
.eggs/
|
|
18
|
+
lib/
|
|
19
|
+
lib64/
|
|
20
|
+
parts/
|
|
21
|
+
sdist/
|
|
22
|
+
var/
|
|
23
|
+
wheels/
|
|
24
|
+
pip-wheel-metadata/
|
|
25
|
+
share/python-wheels/
|
|
26
|
+
*.egg-info/
|
|
27
|
+
.installed.cfg
|
|
28
|
+
*.egg
|
|
29
|
+
MANIFEST
|
|
30
|
+
|
|
31
|
+
# PyInstaller
|
|
32
|
+
# Usually these files are written by a python script from a template
|
|
33
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
34
|
+
*.manifest
|
|
35
|
+
*.spec
|
|
36
|
+
|
|
37
|
+
# Installer logs
|
|
38
|
+
pip-log.txt
|
|
39
|
+
pip-delete-this-directory.txt
|
|
40
|
+
|
|
41
|
+
# Unit test / coverage reports
|
|
42
|
+
htmlcov/
|
|
43
|
+
.tox/
|
|
44
|
+
.nox/
|
|
45
|
+
.coverage
|
|
46
|
+
.coverage.*
|
|
47
|
+
.cache
|
|
48
|
+
nosetests.xml
|
|
49
|
+
coverage.xml
|
|
50
|
+
*.cover
|
|
51
|
+
*.py,cover
|
|
52
|
+
.hypothesis/
|
|
53
|
+
.pytest_cache/
|
|
54
|
+
|
|
55
|
+
# Translations
|
|
56
|
+
*.mo
|
|
57
|
+
*.pot
|
|
58
|
+
|
|
59
|
+
# Django stuff:
|
|
60
|
+
*.log
|
|
61
|
+
local_settings.py
|
|
62
|
+
db.sqlite3
|
|
63
|
+
db.sqlite3-journal
|
|
64
|
+
|
|
65
|
+
# Flask stuff:
|
|
66
|
+
instance/
|
|
67
|
+
.webassets-cache
|
|
68
|
+
|
|
69
|
+
# Scrapy stuff:
|
|
70
|
+
.scrapy
|
|
71
|
+
|
|
72
|
+
# Sphinx documentation
|
|
73
|
+
docs/_build/
|
|
74
|
+
|
|
75
|
+
# PyBuilder
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
.python-version
|
|
87
|
+
|
|
88
|
+
# pipenv
|
|
89
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
90
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
91
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
92
|
+
# install all needed dependencies.
|
|
93
|
+
#Pipfile.lock
|
|
94
|
+
|
|
95
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
|
96
|
+
__pypackages__/
|
|
97
|
+
|
|
98
|
+
# Celery stuff
|
|
99
|
+
celerybeat-schedule
|
|
100
|
+
celerybeat.pid
|
|
101
|
+
|
|
102
|
+
# SageMath parsed files
|
|
103
|
+
*.sage.py
|
|
104
|
+
|
|
105
|
+
# Environments
|
|
106
|
+
.env
|
|
107
|
+
.venv
|
|
108
|
+
env/
|
|
109
|
+
venv/
|
|
110
|
+
ENV/
|
|
111
|
+
env.bak/
|
|
112
|
+
venv.bak/
|
|
113
|
+
|
|
114
|
+
# Spyder project settings
|
|
115
|
+
.spyderproject
|
|
116
|
+
.spyproject
|
|
117
|
+
|
|
118
|
+
# Rope project settings
|
|
119
|
+
.ropeproject
|
|
120
|
+
|
|
121
|
+
# mkdocs documentation
|
|
122
|
+
/site
|
|
123
|
+
|
|
124
|
+
# mypy
|
|
125
|
+
.mypy_cache/
|
|
126
|
+
.dmypy.json
|
|
127
|
+
dmypy.json
|
|
128
|
+
|
|
129
|
+
# Pyre type checker
|
|
130
|
+
.pyre/
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
=========
|
|
2
|
+
Changelog
|
|
3
|
+
=========
|
|
4
|
+
|
|
5
|
+
Tracking of all notable changes to the Vantage Agent project.
|
|
6
|
+
|
|
7
|
+
Unreleased
|
|
8
|
+
----------
|
|
9
|
+
|
|
10
|
+
3.3.2 - 2026-01-09
|
|
11
|
+
------------------
|
|
12
|
+
|
|
13
|
+
- Sync version with the Vantage API.
|
|
14
|
+
|
|
15
|
+
3.3.1 - 2025-09-12
|
|
16
|
+
------------------
|
|
17
|
+
|
|
18
|
+
- Add routine to update the sssd configuration on the agent (`PENG-3175`_).
|
|
19
|
+
|
|
20
|
+
.. _PENG-3175: https://app.clickup.com/t/18022949/PENG-3175
|
|
21
|
+
|
|
22
|
+
3.1.1 - 2025-09-12
|
|
23
|
+
------------------
|
|
24
|
+
|
|
25
|
+
- Sync version with the Vantage API.
|
|
26
|
+
|
|
27
|
+
3.1.0 - 2025-09-12
|
|
28
|
+
------------------
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
3.0.0 - 2025-08-22
|
|
32
|
+
------------------
|
|
33
|
+
|
|
34
|
+
- Update the *sync_partitions.py* script to modify the *gres.conf* file when new nodes have GPUs.
|
|
35
|
+
- Bump *python-jose* from 3.3.0 to 3.4.0.
|
|
36
|
+
- Implement functionality to track clusters' queue (`PENG-3043`_).
|
|
37
|
+
- Update pydantic to ^2.10.6.
|
|
38
|
+
- Implement mechanism to cancel jobs in the queue (`PENG-3069`_).
|
|
39
|
+
|
|
40
|
+
.. _PENG-3043: https://app.clickup.com/t/18022949/PENG-3043
|
|
41
|
+
.. _PENG-3069: https://app.clickup.com/t/18022949/PENG-3069
|
|
42
|
+
|
|
43
|
+
2.7.0 - 2025-03-10
|
|
44
|
+
------------------
|
|
45
|
+
|
|
46
|
+
- Modify the *sync_partitions.py* logical to handle partitions with GPU nodes (`PENG-2628`_).
|
|
47
|
+
|
|
48
|
+
.. _PENG-2628: https://app.clickup.com/t/18022949/PENG-2628
|
|
49
|
+
|
|
50
|
+
2.6.0 - 2025-02-10
|
|
51
|
+
------------------
|
|
52
|
+
|
|
53
|
+
- Pin *APScheduler* version to *3.10.4*.
|
|
54
|
+
- Fix how the agent gets the numbers of CPUs per node.
|
|
55
|
+
- Update sync partitions function to update the partitions.json file.
|
|
56
|
+
- Default the *TASK_SELF_UPDATE_INTERVAL_SECONDS* setting to *None*.
|
|
57
|
+
- Add a setting to indicate if the agent is operating in a cloud cluster.
|
|
58
|
+
- Add settings for customising Sentry's sample rates (`PENG-2592`_).
|
|
59
|
+
- Migrate Vantage domain to vantagecompute.ai (`PENG-2461`_).
|
|
60
|
+
|
|
61
|
+
.. _PENG-2592: https://app.clickup.com/t/18022949/PENG-2592
|
|
62
|
+
.. _PENG-2461: https://app.clickup.com/t/18022949/PENG-2461
|
|
63
|
+
|
|
64
|
+
2.5.0 - 2024-11-13
|
|
65
|
+
------------------
|
|
66
|
+
|
|
67
|
+
- Update Vantage API to support cloud clusters with multiple partitions (`PENG-2344`_).
|
|
68
|
+
|
|
69
|
+
.. _PENG-2344: https://app.clickup.com/t/18022949/PENG-2344
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
2.4.0 - 2024-10-02
|
|
73
|
+
------------------
|
|
74
|
+
|
|
75
|
+
- Initialize the Vantage Agent with the same version as the API (`PENG-2360`_).
|
|
76
|
+
|
|
77
|
+
.. _PENG-2360: https://app.clickup.com/t/18022949/PENG-2360
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
FROM python:3.14-slim-bookworm
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
# Install system dependencies
|
|
6
|
+
RUN apt update && apt install -y curl && rm -rf /var/lib/apt/lists/*
|
|
7
|
+
|
|
8
|
+
# Install uv
|
|
9
|
+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
|
10
|
+
|
|
11
|
+
# Copy dependency files
|
|
12
|
+
COPY ./pyproject.toml ./uv.lock* /app/
|
|
13
|
+
|
|
14
|
+
# Install dependencies
|
|
15
|
+
RUN uv sync --frozen --no-dev
|
|
16
|
+
|
|
17
|
+
# Copy application code
|
|
18
|
+
COPY ./vantage_agent /app/vantage_agent
|
|
19
|
+
|
|
20
|
+
# Run the vantage agent
|
|
21
|
+
CMD ["uv", "run", "--no-dev", "vtg-run"]
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vantage-agent
|
|
3
|
+
Version: 3.4.0a6
|
|
4
|
+
Summary: Vantage Agent
|
|
5
|
+
Author-email: Omnivector Solutions <info@omnivector.solutions>
|
|
6
|
+
Requires-Python: >=3.14
|
|
7
|
+
Requires-Dist: apscheduler>=3.11.1
|
|
8
|
+
Requires-Dist: httpx>=0.28.0
|
|
9
|
+
Requires-Dist: jsondiff>=2.2.1
|
|
10
|
+
Requires-Dist: loguru>=0.7.3
|
|
11
|
+
Requires-Dist: py-buzz>=4.1.0
|
|
12
|
+
Requires-Dist: pydantic-settings>=2.7.0
|
|
13
|
+
Requires-Dist: pydantic>=2.10.6
|
|
14
|
+
Requires-Dist: pyjwt>=2.10.0
|
|
15
|
+
Requires-Dist: python-dotenv>=1.1.0
|
|
16
|
+
Requires-Dist: python-jose>=3.4.0
|
|
17
|
+
Requires-Dist: sentry-sdk>=2.25.0
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: mypy>=1.15.0; extra == 'dev'
|
|
20
|
+
Requires-Dist: pytest-asyncio>=1.3.0; extra == 'dev'
|
|
21
|
+
Requires-Dist: pytest-cov>=7.0.0; extra == 'dev'
|
|
22
|
+
Requires-Dist: pytest-env>=1.2.0; extra == 'dev'
|
|
23
|
+
Requires-Dist: pytest-mock>=3.15.0; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest-xdist>=3.8.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest>=8.4.0; extra == 'dev'
|
|
26
|
+
Requires-Dist: respx>=0.22.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: ruff>=0.14.0; extra == 'dev'
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# Vantage Agent
|
|
31
|
+
|
|
32
|
+
## Install the package
|
|
33
|
+
|
|
34
|
+
To install the package from Pypi simply run `pip install vantage-agent`.
|
|
35
|
+
|
|
36
|
+
## Setup parameters
|
|
37
|
+
|
|
38
|
+
1. Setup dependencies
|
|
39
|
+
|
|
40
|
+
Dependencies and environment are managed in the project by [uv](https://docs.astral.sh/uv/). To initiate the development environment run:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
just install
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Or directly with uv:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
uv sync
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
2. Setup `.env` parameters
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
VANTAGE_AGENT_BASE_API_URL="<base-api-url>"
|
|
56
|
+
VANTAGE_AGENT_OIDC_DOMAIN="<OIDC-domain>"
|
|
57
|
+
VANTAGE_AGENT_OIDC_CLIENT_ID="<OIDC-audience>"
|
|
58
|
+
VANTAGE_AGENT_OIDC_CLIENT_SECRET="<OIDC-app-client-id>"
|
|
59
|
+
VANTAGE_AGENT_OIDC_USE_HTTPS="<OIDC-app-client-secret>"
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Local usage example
|
|
63
|
+
|
|
64
|
+
1. Run app
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
vtg-run
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**Note**: this command assumes you're inside a virtual environment in which the package is installed.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Vantage Agent
|
|
2
|
+
|
|
3
|
+
## Install the package
|
|
4
|
+
|
|
5
|
+
To install the package from Pypi simply run `pip install vantage-agent`.
|
|
6
|
+
|
|
7
|
+
## Setup parameters
|
|
8
|
+
|
|
9
|
+
1. Setup dependencies
|
|
10
|
+
|
|
11
|
+
Dependencies and environment are managed in the project by [uv](https://docs.astral.sh/uv/). To initiate the development environment run:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
just install
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Or directly with uv:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
uv sync
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
2. Setup `.env` parameters
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
VANTAGE_AGENT_BASE_API_URL="<base-api-url>"
|
|
27
|
+
VANTAGE_AGENT_OIDC_DOMAIN="<OIDC-domain>"
|
|
28
|
+
VANTAGE_AGENT_OIDC_CLIENT_ID="<OIDC-audience>"
|
|
29
|
+
VANTAGE_AGENT_OIDC_CLIENT_SECRET="<OIDC-app-client-id>"
|
|
30
|
+
VANTAGE_AGENT_OIDC_USE_HTTPS="<OIDC-app-client-secret>"
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Local usage example
|
|
34
|
+
|
|
35
|
+
1. Run app
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
vtg-run
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
**Note**: this command assumes you're inside a virtual environment in which the package is installed.
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env just --justfile
|
|
2
|
+
# Copyright 2025 Vantage Compute Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
uv := require("uv")
|
|
17
|
+
|
|
18
|
+
project_dir := justfile_directory()
|
|
19
|
+
package_name := "vantage_agent"
|
|
20
|
+
test_path := "tests"
|
|
21
|
+
|
|
22
|
+
export PY_COLORS := "1"
|
|
23
|
+
export PYTHONBREAKPOINT := "pdb.set_trace"
|
|
24
|
+
|
|
25
|
+
uv_run := "uv run --frozen --extra dev"
|
|
26
|
+
|
|
27
|
+
[private]
|
|
28
|
+
default:
|
|
29
|
+
@just help
|
|
30
|
+
|
|
31
|
+
# Install the project's dependencies
|
|
32
|
+
[group("dev")]
|
|
33
|
+
install:
|
|
34
|
+
uv sync --all-extras
|
|
35
|
+
|
|
36
|
+
# Regenerate uv.lock
|
|
37
|
+
[group("dev")]
|
|
38
|
+
lock:
|
|
39
|
+
uv lock --no-cache
|
|
40
|
+
|
|
41
|
+
# Upgrade uv.lock with the latest dependencies
|
|
42
|
+
[group("dev")]
|
|
43
|
+
upgrade:
|
|
44
|
+
uv lock --upgrade
|
|
45
|
+
|
|
46
|
+
# Build the package
|
|
47
|
+
[group("dev")]
|
|
48
|
+
build: lock
|
|
49
|
+
uv build --no-cache
|
|
50
|
+
|
|
51
|
+
# Run the unit tests
|
|
52
|
+
[group("test")]
|
|
53
|
+
test: install
|
|
54
|
+
{{uv_run}} pytest
|
|
55
|
+
|
|
56
|
+
# Lint the project using ruff
|
|
57
|
+
[group("lint")]
|
|
58
|
+
lint: install
|
|
59
|
+
{{uv_run}} ruff check {{package_name}} {{test_path}}
|
|
60
|
+
|
|
61
|
+
# Format the code using ruff
|
|
62
|
+
[group("lint")]
|
|
63
|
+
fmt: install
|
|
64
|
+
{{uv_run}} ruff format {{package_name}} {{test_path}}
|
|
65
|
+
|
|
66
|
+
# Run the quality assurance check
|
|
67
|
+
[group("test")]
|
|
68
|
+
qa: lint test
|
|
69
|
+
@echo "All tests pass! Ready for deployment"
|
|
70
|
+
|
|
71
|
+
# Clean all files/folders created by the project
|
|
72
|
+
[group("dev")]
|
|
73
|
+
clean:
|
|
74
|
+
@find . -iname '*.pyc' -delete
|
|
75
|
+
@find . -iname '*.pyo' -delete
|
|
76
|
+
@find . -iname '*~' -delete
|
|
77
|
+
@find . -iname '*.swp' -delete
|
|
78
|
+
@find . -iname '__pycache__' -delete
|
|
79
|
+
@find . -iname '.zip' -delete
|
|
80
|
+
@find . -iname '.mypy_cache' -delete
|
|
81
|
+
@find . -iname '..venv' -delete
|
|
82
|
+
@rm -rf .pytest_cache
|
|
83
|
+
|
|
84
|
+
# Show available commands
|
|
85
|
+
[group("help")]
|
|
86
|
+
help:
|
|
87
|
+
@just --list
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "vantage-agent"
|
|
3
|
+
version = "3.4.0-alpha.6"
|
|
4
|
+
description = "Vantage Agent"
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "Omnivector Solutions", email = "info@omnivector.solutions"}
|
|
7
|
+
]
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
requires-python = ">=3.14"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"APScheduler>=3.11.1",
|
|
12
|
+
"httpx>=0.28.0",
|
|
13
|
+
"pydantic-settings>=2.7.0",
|
|
14
|
+
"loguru>=0.7.3",
|
|
15
|
+
"python-dotenv>=1.1.0",
|
|
16
|
+
"py-buzz>=4.1.0",
|
|
17
|
+
"pyjwt>=2.10.0",
|
|
18
|
+
"python-jose>=3.4.0",
|
|
19
|
+
"jsondiff>=2.2.1",
|
|
20
|
+
"sentry-sdk>=2.25.0",
|
|
21
|
+
"pydantic>=2.10.6",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.optional-dependencies]
|
|
25
|
+
dev = [
|
|
26
|
+
"mypy>=1.15.0",
|
|
27
|
+
"ruff>=0.14.0",
|
|
28
|
+
"respx>=0.22.0",
|
|
29
|
+
"pytest>=8.4.0",
|
|
30
|
+
"pytest-mock>=3.15.0",
|
|
31
|
+
"pytest-asyncio>=1.3.0",
|
|
32
|
+
"pytest-cov>=7.0.0",
|
|
33
|
+
"pytest-env>=1.2.0",
|
|
34
|
+
"pytest-xdist>=3.8.0",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.scripts]
|
|
38
|
+
vtg-run = "vantage_agent.main:main"
|
|
39
|
+
|
|
40
|
+
[project.entry-points."vantage_agent.tasks"]
|
|
41
|
+
cluster-config = "vantage_agent.tasks:cluster_config_task"
|
|
42
|
+
cluster-partition = "vantage_agent.tasks:cluster_partition_task"
|
|
43
|
+
cluster-queue-actions = "vantage_agent.tasks:cluster_queue_actions_task"
|
|
44
|
+
cluster-queue = "vantage_agent.tasks:cluster_queue_task"
|
|
45
|
+
cluster-node = "vantage_agent.tasks:cluster_node_task"
|
|
46
|
+
partitions-sync = "vantage_agent.tasks:sync_cluster_partitions_task"
|
|
47
|
+
teams-sync = "vantage_agent.tasks:sync_cluster_teams_task"
|
|
48
|
+
self-update = "vantage_agent.tasks:self_update_task"
|
|
49
|
+
status-report = "vantage_agent.tasks:status_report_task"
|
|
50
|
+
|
|
51
|
+
[tool.ruff]
|
|
52
|
+
line-length = 110
|
|
53
|
+
extend-exclude = ["__pycache__", "*.egg_info", "__init__.py"]
|
|
54
|
+
|
|
55
|
+
[tool.ruff.lint]
|
|
56
|
+
select = ["E", "W", "F", "C", "N", "D", "I001", "I"]
|
|
57
|
+
ignore = ["D213", "D211", "D203", "C408"]
|
|
58
|
+
fixable = ["ALL"]
|
|
59
|
+
|
|
60
|
+
[tool.ruff.lint.per-file-ignores]
|
|
61
|
+
"tests/test_helpers.py" = ["E501"]
|
|
62
|
+
|
|
63
|
+
[tool.pytest.ini_options]
|
|
64
|
+
minversion = "8.0"
|
|
65
|
+
addopts = [
|
|
66
|
+
"-vv",
|
|
67
|
+
"--cov=vantage_agent",
|
|
68
|
+
"--cov-report=term-missing",
|
|
69
|
+
"-n=auto"
|
|
70
|
+
]
|
|
71
|
+
testpaths = ["tests"]
|
|
72
|
+
env = [
|
|
73
|
+
"VANTAGE_AGENT_OIDC_DOMAIN = armasec.dev",
|
|
74
|
+
"VANTAGE_AGENT_OIDC_CLIENT_ID = vantage-agent-2699404d-4b88-4285-9b57-b3aa38d674ae",
|
|
75
|
+
"VANTAGE_AGENT_OIDC_CLIENT_SECRET = super-secret",
|
|
76
|
+
"VANTAGE_AGENT_OIDC_USE_HTTPS = true"
|
|
77
|
+
]
|
|
78
|
+
asyncio_mode = "auto"
|
|
79
|
+
# Suppress race-condition warnings from parallel test execution with xdist
|
|
80
|
+
filterwarnings = [
|
|
81
|
+
"ignore:coroutine.*was never awaited:RuntimeWarning"
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
[tool.coverage.run]
|
|
85
|
+
omit = [
|
|
86
|
+
"vantage_agent/exceptions.py",
|
|
87
|
+
"vantage_agent/vantage_api_client.py",
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
[tool.coverage.report]
|
|
91
|
+
fail_under = 100
|
|
92
|
+
|
|
93
|
+
[tool.hatch.build.targets.wheel]
|
|
94
|
+
packages = ["vantage_agent"]
|
|
95
|
+
|
|
96
|
+
[build-system]
|
|
97
|
+
requires = ["hatchling"]
|
|
98
|
+
build-backend = "hatchling.build"
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
SuspendRate=100
|
|
2
|
+
ResumeTimeout=600
|
|
3
|
+
SuspendTime=350
|
|
4
|
+
TreeWidth=60000
|
|
5
|
+
NodeName=partitionA-partitionA-[0-2] State=CLOUD Weight=1 Feature=cloud CPUs=46 RealMemory=93184
|
|
6
|
+
PartitionName=partitionA Nodes=partitionA-partitionA-[0-2] MaxNodes=3 MaxTime=INFINITE State=UP Default=Yes
|
|
7
|
+
NodeName=partitionC-partitionC-0 State=CLOUD Weight=1 Feature=cloud CPUs=55 RealMemory=32768 Gres=gpu:59(File=/dev/nvidia)
|
|
8
|
+
PartitionName=partitionC Nodes=partitionC-partitionC-0 MaxNodes=1 MaxTime=INFINITE State=UP Default=No
|
|
9
|
+
NodeName=partitionB-partitionB-[0-3] State=CLOUD Weight=1 Feature=cloud CPUs=13 RealMemory=26624
|
|
10
|
+
PartitionName=partitionB Nodes=partitionB-partitionB-[0-3] MaxNodes=4 MaxTime=INFINITE State=UP Default=No
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Snapcraft `configure` hook for the Vantage Agents snap."""
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
from contextlib import contextmanager
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Union
|
|
10
|
+
|
|
11
|
+
SNAP_COMMON_PATH = "/var/snap/vantage-agent/common"
|
|
12
|
+
SNAP_INSTANCE_NAME = os.environ["SNAP_INSTANCE_NAME"]
|
|
13
|
+
DOTENV_PREFIX = "VANTAGE_AGENT_"
|
|
14
|
+
DOTENV_FILE_LOCATION = Path(f"{SNAP_COMMON_PATH}/.env")
|
|
15
|
+
AGENT_VARIABLES_MAP: dict[str, Union[str, int]] = {
|
|
16
|
+
"BASE_API_URL": "https://apis.vantagecompute.ai",
|
|
17
|
+
"OIDC_DOMAIN": "auth.vantagecompute.ai/realms/vantage",
|
|
18
|
+
"OIDC_CLIENT_ID": "dummy",
|
|
19
|
+
"OIDC_CLIENT_SECRET": "dummy",
|
|
20
|
+
"PARTITIONS_JSON_PATH": "/nfs/slurm/etc/aws/partitions.json",
|
|
21
|
+
"SLURM_CONF_PATH": "/etc/slurm/slurm.conf",
|
|
22
|
+
"TASK_JOBS_INTERVAL_SECONDS": 30,
|
|
23
|
+
"CACHE_DIR": f"{SNAP_COMMON_PATH}/.cache",
|
|
24
|
+
"CLUSTER_NAME": "",
|
|
25
|
+
"IS_CLOUD_CLUSTER": "false",
|
|
26
|
+
"SENTRY_DSN": "",
|
|
27
|
+
"SENTRY_ENV": "snap-env",
|
|
28
|
+
"SENTRY_TRACES_SAMPLE_RATE": "0.01",
|
|
29
|
+
"SENTRY_SAMPLE_RATE": "0.25",
|
|
30
|
+
"SENTRY_PROFILING_SAMPLE_RATE": "0.01",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@contextmanager
|
|
35
|
+
def handle_error(message: str):
|
|
36
|
+
"""Handle any errors encountered in this context manager."""
|
|
37
|
+
try:
|
|
38
|
+
yield
|
|
39
|
+
except Exception as exc:
|
|
40
|
+
sys.exit(f"Failed to {message} (from configure hook) -- {exc}")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def run_bash(bash_string: str) -> str:
|
|
44
|
+
"""Run bash command and return output as string."""
|
|
45
|
+
return subprocess.check_output(bash_string.split()).decode().rstrip()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def daemon_starter():
|
|
49
|
+
"""Start the daemon."""
|
|
50
|
+
with handle_error(f"start {SNAP_INSTANCE_NAME}.vtg-agent"):
|
|
51
|
+
run_bash(f"snapctl start --enable {SNAP_INSTANCE_NAME}.vtg-agent")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def daemon_stopper():
|
|
55
|
+
"""Stop the daemon."""
|
|
56
|
+
with handle_error(f"stop {SNAP_INSTANCE_NAME}.vtg-agent"):
|
|
57
|
+
run_bash(f"snapctl stop --disable {SNAP_INSTANCE_NAME}.vtg-agent")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def snapctl_get(snap_config_value: str) -> Union[str, None]:
|
|
61
|
+
"""Get snap config from snapctl.
|
|
62
|
+
|
|
63
|
+
Return python None if snapctl returns the empty string.
|
|
64
|
+
"""
|
|
65
|
+
snapctl_out: Union[str, None]
|
|
66
|
+
snapctl_out = run_bash(f"snapctl get {snap_config_value}")
|
|
67
|
+
|
|
68
|
+
if snapctl_out == "":
|
|
69
|
+
snapctl_out = None
|
|
70
|
+
|
|
71
|
+
return snapctl_out
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def configure_dotenv_files():
|
|
75
|
+
"""Configure the .env files based on the snap mode."""
|
|
76
|
+
with handle_error(f"configure .env for {SNAP_INSTANCE_NAME}"):
|
|
77
|
+
env_file_content = ""
|
|
78
|
+
for env_var, env_value in AGENT_VARIABLES_MAP.items():
|
|
79
|
+
snapctl_value = snapctl_get(env_var.lower().replace("_", "-"))
|
|
80
|
+
if snapctl_value is not None:
|
|
81
|
+
env_value = snapctl_value
|
|
82
|
+
elif bool(env_value) is False:
|
|
83
|
+
continue
|
|
84
|
+
env_file_content += f"{DOTENV_PREFIX}{env_var}={env_value}\n"
|
|
85
|
+
DOTENV_FILE_LOCATION.write_text(env_file_content)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
if __name__ == "__main__":
|
|
89
|
+
daemon_stopper()
|
|
90
|
+
configure_dotenv_files()
|
|
91
|
+
daemon_starter()
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
mkdir /etc/systemd/system/snap.vantage-agent.vtg-agent.service.d
|
|
5
|
+
|
|
6
|
+
cat <<EOF | tee /etc/systemd/system/snap.vantage-agent.vtg-agent.service.d/override.conf
|
|
7
|
+
[Unit]
|
|
8
|
+
ConditionPathExists=/var/snap/vantage-agent/common/.env
|
|
9
|
+
[Service]
|
|
10
|
+
EnvironmentFile=/var/snap/vantage-agent/common/.env
|
|
11
|
+
EOF
|