vantage-agent 3.4.0a6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. vantage_agent-3.4.0a6/.gitignore +130 -0
  2. vantage_agent-3.4.0a6/CHANGELOG.rst +77 -0
  3. vantage_agent-3.4.0a6/Dockerfile +21 -0
  4. vantage_agent-3.4.0a6/PKG-INFO +70 -0
  5. vantage_agent-3.4.0a6/README.md +41 -0
  6. vantage_agent-3.4.0a6/justfile +87 -0
  7. vantage_agent-3.4.0a6/pyproject.toml +98 -0
  8. vantage_agent-3.4.0a6/slurm-222e7205-4044-46e4-9731-5fe480aaa919.conf +10 -0
  9. vantage_agent-3.4.0a6/snap/hooks/configure +91 -0
  10. vantage_agent-3.4.0a6/snap/hooks/install +11 -0
  11. vantage_agent-3.4.0a6/snap/hooks/remove +3 -0
  12. vantage_agent-3.4.0a6/snap/snapcraft.yaml +99 -0
  13. vantage_agent-3.4.0a6/tests/conftest.py +28 -0
  14. vantage_agent-3.4.0a6/tests/test_helpers.py +1085 -0
  15. vantage_agent-3.4.0a6/tests/test_logger.py +113 -0
  16. vantage_agent-3.4.0a6/tests/test_logicals/test_health.py +77 -0
  17. vantage_agent-3.4.0a6/tests/test_logicals/test_queue_actions.py +606 -0
  18. vantage_agent-3.4.0a6/tests/test_logicals/test_scontrol.py +285 -0
  19. vantage_agent-3.4.0a6/tests/test_logicals/test_squeue.py +144 -0
  20. vantage_agent-3.4.0a6/tests/test_logicals/test_sync_partitions.py +740 -0
  21. vantage_agent-3.4.0a6/tests/test_logicals/test_sync_teams.py +324 -0
  22. vantage_agent-3.4.0a6/tests/test_logicals/test_update.py +187 -0
  23. vantage_agent-3.4.0a6/tests/test_logicals/test_vantage_api.py +303 -0
  24. vantage_agent-3.4.0a6/tests/test_main.py +72 -0
  25. vantage_agent-3.4.0a6/tests/test_plugin.py +50 -0
  26. vantage_agent-3.4.0a6/tests/test_scheduler.py +80 -0
  27. vantage_agent-3.4.0a6/tests/test_sentry.py +69 -0
  28. vantage_agent-3.4.0a6/tests/test_settings.py +73 -0
  29. vantage_agent-3.4.0a6/tests/test_tasks.py +171 -0
  30. vantage_agent-3.4.0a6/uv.lock +697 -0
  31. vantage_agent-3.4.0a6/vantage_agent/__init__.py +5 -0
  32. vantage_agent-3.4.0a6/vantage_agent/exceptions.py +101 -0
  33. vantage_agent-3.4.0a6/vantage_agent/helpers.py +138 -0
  34. vantage_agent-3.4.0a6/vantage_agent/logger.py +50 -0
  35. vantage_agent-3.4.0a6/vantage_agent/logicals/__init__.py +0 -0
  36. vantage_agent-3.4.0a6/vantage_agent/logicals/health.py +35 -0
  37. vantage_agent-3.4.0a6/vantage_agent/logicals/queue_actions.py +204 -0
  38. vantage_agent-3.4.0a6/vantage_agent/logicals/scontrol.py +87 -0
  39. vantage_agent-3.4.0a6/vantage_agent/logicals/squeue.py +40 -0
  40. vantage_agent-3.4.0a6/vantage_agent/logicals/sync_partitions.py +274 -0
  41. vantage_agent-3.4.0a6/vantage_agent/logicals/sync_teams.py +156 -0
  42. vantage_agent-3.4.0a6/vantage_agent/logicals/update.py +141 -0
  43. vantage_agent-3.4.0a6/vantage_agent/logicals/vantage_api.py +151 -0
  44. vantage_agent-3.4.0a6/vantage_agent/main.py +38 -0
  45. vantage_agent-3.4.0a6/vantage_agent/plugin.py +35 -0
  46. vantage_agent-3.4.0a6/vantage_agent/scheduler.py +48 -0
  47. vantage_agent-3.4.0a6/vantage_agent/sentry.py +32 -0
  48. vantage_agent-3.4.0a6/vantage_agent/settings.py +99 -0
  49. vantage_agent-3.4.0a6/vantage_agent/tasks.py +73 -0
  50. vantage_agent-3.4.0a6/vantage_agent/vantage_api_client.py +126 -0
@@ -0,0 +1,130 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ *.snap
3
+ __pycache__/
4
+ *.py[cod]
5
+ *$py.class
6
+
7
+ # C extensions
8
+ *.so
9
+
10
+ # Distribution / packaging
11
+ .Python
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ pip-wheel-metadata/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+
55
+ # Translations
56
+ *.mo
57
+ *.pot
58
+
59
+ # Django stuff:
60
+ *.log
61
+ local_settings.py
62
+ db.sqlite3
63
+ db.sqlite3-journal
64
+
65
+ # Flask stuff:
66
+ instance/
67
+ .webassets-cache
68
+
69
+ # Scrapy stuff:
70
+ .scrapy
71
+
72
+ # Sphinx documentation
73
+ docs/_build/
74
+
75
+ # PyBuilder
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ .python-version
87
+
88
+ # pipenv
89
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
90
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
91
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
92
+ # install all needed dependencies.
93
+ #Pipfile.lock
94
+
95
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
96
+ __pypackages__/
97
+
98
+ # Celery stuff
99
+ celerybeat-schedule
100
+ celerybeat.pid
101
+
102
+ # SageMath parsed files
103
+ *.sage.py
104
+
105
+ # Environments
106
+ .env
107
+ .venv
108
+ env/
109
+ venv/
110
+ ENV/
111
+ env.bak/
112
+ venv.bak/
113
+
114
+ # Spyder project settings
115
+ .spyderproject
116
+ .spyproject
117
+
118
+ # Rope project settings
119
+ .ropeproject
120
+
121
+ # mkdocs documentation
122
+ /site
123
+
124
+ # mypy
125
+ .mypy_cache/
126
+ .dmypy.json
127
+ dmypy.json
128
+
129
+ # Pyre type checker
130
+ .pyre/
@@ -0,0 +1,77 @@
1
+ =========
2
+ Changelog
3
+ =========
4
+
5
+ Tracking of all notable changes to the Vantage Agent project.
6
+
7
+ Unreleased
8
+ ----------
9
+
10
+ 3.3.2 - 2026-01-09
11
+ ------------------
12
+
13
+ - Sync version with the Vantage API.
14
+
15
+ 3.3.1 - 2025-09-12
16
+ ------------------
17
+
18
+ - Add routine to update the sssd configuration on the agent (`PENG-3175`_).
19
+
20
+ .. _PENG-3175: https://app.clickup.com/t/18022949/PENG-3175
21
+
22
+ 3.1.1 - 2025-09-12
23
+ ------------------
24
+
25
+ - Sync version with the Vantage API.
26
+
27
+ 3.1.0 - 2025-09-12
28
+ ------------------
29
+
30
+
31
+ 3.0.0 - 2025-08-22
32
+ ------------------
33
+
34
+ - Update the *sync_partitions.py* script to modify the *gres.conf* file when new nodes have GPUs.
35
+ - Bump *python-jose* from 3.3.0 to 3.4.0.
36
+ - Implement functionality to track clusters' queue (`PENG-3043`_).
37
+ - Update pydantic to ^2.10.6.
38
+ - Implement mechanism to cancel jobs in the queue (`PENG-3069`_).
39
+
40
+ .. _PENG-3043: https://app.clickup.com/t/18022949/PENG-3043
41
+ .. _PENG-3069: https://app.clickup.com/t/18022949/PENG-3069
42
+
43
+ 2.7.0 - 2025-03-10
44
+ ------------------
45
+
46
+ - Modify the *sync_partitions.py* logical to handle partitions with GPU nodes (`PENG-2628`_).
47
+
48
+ .. _PENG-2628: https://app.clickup.com/t/18022949/PENG-2628
49
+
50
+ 2.6.0 - 2025-02-10
51
+ ------------------
52
+
53
+ - Pin *APScheduler* version to *3.10.4*.
54
+ - Fix how the agent gets the numbers of CPUs per node.
55
+ - Update sync partitions function to update the partitions.json file.
56
+ - Default the *TASK_SELF_UPDATE_INTERVAL_SECONDS* setting to *None*.
57
+ - Add a setting to indicate if the agent is operating in a cloud cluster.
58
+ - Add settings for customising Sentry's sample rates (`PENG-2592`_).
59
+ - Migrate Vantage domain to vantagecompute.ai (`PENG-2461`_).
60
+
61
+ .. _PENG-2592: https://app.clickup.com/t/18022949/PENG-2592
62
+ .. _PENG-2461: https://app.clickup.com/t/18022949/PENG-2461
63
+
64
+ 2.5.0 - 2024-11-13
65
+ ------------------
66
+
67
+ - Update Vantage API to support cloud clusters with multiple partitions (`PENG-2344`_).
68
+
69
+ .. _PENG-2344: https://app.clickup.com/t/18022949/PENG-2344
70
+
71
+
72
+ 2.4.0 - 2024-10-02
73
+ ------------------
74
+
75
+ - Initialize the Vantage Agent with the same version as the API (`PENG-2360`_).
76
+
77
+ .. _PENG-2360: https://app.clickup.com/t/18022949/PENG-2360
@@ -0,0 +1,21 @@
1
+ FROM python:3.14-slim-bookworm
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt update && apt install -y curl && rm -rf /var/lib/apt/lists/*
7
+
8
+ # Install uv
9
+ COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
10
+
11
+ # Copy dependency files
12
+ COPY ./pyproject.toml ./uv.lock* /app/
13
+
14
+ # Install dependencies
15
+ RUN uv sync --frozen --no-dev
16
+
17
+ # Copy application code
18
+ COPY ./vantage_agent /app/vantage_agent
19
+
20
+ # Run the vantage agent
21
+ CMD ["uv", "run", "--no-dev", "vtg-run"]
@@ -0,0 +1,70 @@
1
+ Metadata-Version: 2.4
2
+ Name: vantage-agent
3
+ Version: 3.4.0a6
4
+ Summary: Vantage Agent
5
+ Author-email: Omnivector Solutions <info@omnivector.solutions>
6
+ Requires-Python: >=3.14
7
+ Requires-Dist: apscheduler>=3.11.1
8
+ Requires-Dist: httpx>=0.28.0
9
+ Requires-Dist: jsondiff>=2.2.1
10
+ Requires-Dist: loguru>=0.7.3
11
+ Requires-Dist: py-buzz>=4.1.0
12
+ Requires-Dist: pydantic-settings>=2.7.0
13
+ Requires-Dist: pydantic>=2.10.6
14
+ Requires-Dist: pyjwt>=2.10.0
15
+ Requires-Dist: python-dotenv>=1.1.0
16
+ Requires-Dist: python-jose>=3.4.0
17
+ Requires-Dist: sentry-sdk>=2.25.0
18
+ Provides-Extra: dev
19
+ Requires-Dist: mypy>=1.15.0; extra == 'dev'
20
+ Requires-Dist: pytest-asyncio>=1.3.0; extra == 'dev'
21
+ Requires-Dist: pytest-cov>=7.0.0; extra == 'dev'
22
+ Requires-Dist: pytest-env>=1.2.0; extra == 'dev'
23
+ Requires-Dist: pytest-mock>=3.15.0; extra == 'dev'
24
+ Requires-Dist: pytest-xdist>=3.8.0; extra == 'dev'
25
+ Requires-Dist: pytest>=8.4.0; extra == 'dev'
26
+ Requires-Dist: respx>=0.22.0; extra == 'dev'
27
+ Requires-Dist: ruff>=0.14.0; extra == 'dev'
28
+ Description-Content-Type: text/markdown
29
+
30
+ # Vantage Agent
31
+
32
+ ## Install the package
33
+
34
+ To install the package from Pypi simply run `pip install vantage-agent`.
35
+
36
+ ## Setup parameters
37
+
38
+ 1. Setup dependencies
39
+
40
+ Dependencies and environment are managed in the project by [uv](https://docs.astral.sh/uv/). To initiate the development environment run:
41
+
42
+ ```bash
43
+ just install
44
+ ```
45
+
46
+ Or directly with uv:
47
+
48
+ ```bash
49
+ uv sync
50
+ ```
51
+
52
+ 2. Setup `.env` parameters
53
+
54
+ ```bash
55
+ VANTAGE_AGENT_BASE_API_URL="<base-api-url>"
56
+ VANTAGE_AGENT_OIDC_DOMAIN="<OIDC-domain>"
57
+ VANTAGE_AGENT_OIDC_CLIENT_ID="<OIDC-audience>"
58
+ VANTAGE_AGENT_OIDC_CLIENT_SECRET="<OIDC-app-client-id>"
59
+ VANTAGE_AGENT_OIDC_USE_HTTPS="<OIDC-app-client-secret>"
60
+ ```
61
+
62
+ ## Local usage example
63
+
64
+ 1. Run app
65
+
66
+ ```bash
67
+ vtg-run
68
+ ```
69
+
70
+ **Note**: this command assumes you're inside a virtual environment in which the package is installed.
@@ -0,0 +1,41 @@
1
+ # Vantage Agent
2
+
3
+ ## Install the package
4
+
5
+ To install the package from Pypi simply run `pip install vantage-agent`.
6
+
7
+ ## Setup parameters
8
+
9
+ 1. Setup dependencies
10
+
11
+ Dependencies and environment are managed in the project by [uv](https://docs.astral.sh/uv/). To initiate the development environment run:
12
+
13
+ ```bash
14
+ just install
15
+ ```
16
+
17
+ Or directly with uv:
18
+
19
+ ```bash
20
+ uv sync
21
+ ```
22
+
23
+ 2. Setup `.env` parameters
24
+
25
+ ```bash
26
+ VANTAGE_AGENT_BASE_API_URL="<base-api-url>"
27
+ VANTAGE_AGENT_OIDC_DOMAIN="<OIDC-domain>"
28
+ VANTAGE_AGENT_OIDC_CLIENT_ID="<OIDC-audience>"
29
+ VANTAGE_AGENT_OIDC_CLIENT_SECRET="<OIDC-app-client-id>"
30
+ VANTAGE_AGENT_OIDC_USE_HTTPS="<OIDC-app-client-secret>"
31
+ ```
32
+
33
+ ## Local usage example
34
+
35
+ 1. Run app
36
+
37
+ ```bash
38
+ vtg-run
39
+ ```
40
+
41
+ **Note**: this command assumes you're inside a virtual environment in which the package is installed.
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env just --justfile
2
+ # Copyright 2025 Vantage Compute Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ uv := require("uv")
17
+
18
+ project_dir := justfile_directory()
19
+ package_name := "vantage_agent"
20
+ test_path := "tests"
21
+
22
+ export PY_COLORS := "1"
23
+ export PYTHONBREAKPOINT := "pdb.set_trace"
24
+
25
+ uv_run := "uv run --frozen --extra dev"
26
+
27
+ [private]
28
+ default:
29
+ @just help
30
+
31
+ # Install the project's dependencies
32
+ [group("dev")]
33
+ install:
34
+ uv sync --all-extras
35
+
36
+ # Regenerate uv.lock
37
+ [group("dev")]
38
+ lock:
39
+ uv lock --no-cache
40
+
41
+ # Upgrade uv.lock with the latest dependencies
42
+ [group("dev")]
43
+ upgrade:
44
+ uv lock --upgrade
45
+
46
+ # Build the package
47
+ [group("dev")]
48
+ build: lock
49
+ uv build --no-cache
50
+
51
+ # Run the unit tests
52
+ [group("test")]
53
+ test: install
54
+ {{uv_run}} pytest
55
+
56
+ # Lint the project using ruff
57
+ [group("lint")]
58
+ lint: install
59
+ {{uv_run}} ruff check {{package_name}} {{test_path}}
60
+
61
+ # Format the code using ruff
62
+ [group("lint")]
63
+ fmt: install
64
+ {{uv_run}} ruff format {{package_name}} {{test_path}}
65
+
66
+ # Run the quality assurance check
67
+ [group("test")]
68
+ qa: lint test
69
+ @echo "All tests pass! Ready for deployment"
70
+
71
+ # Clean all files/folders created by the project
72
+ [group("dev")]
73
+ clean:
74
+ @find . -iname '*.pyc' -delete
75
+ @find . -iname '*.pyo' -delete
76
+ @find . -iname '*~' -delete
77
+ @find . -iname '*.swp' -delete
78
+ @find . -iname '__pycache__' -delete
79
+ @find . -iname '.zip' -delete
80
+ @find . -iname '.mypy_cache' -delete
81
+ @find . -iname '..venv' -delete
82
+ @rm -rf .pytest_cache
83
+
84
+ # Show available commands
85
+ [group("help")]
86
+ help:
87
+ @just --list
@@ -0,0 +1,98 @@
1
+ [project]
2
+ name = "vantage-agent"
3
+ version = "3.4.0-alpha.6"
4
+ description = "Vantage Agent"
5
+ authors = [
6
+ {name = "Omnivector Solutions", email = "info@omnivector.solutions"}
7
+ ]
8
+ readme = "README.md"
9
+ requires-python = ">=3.14"
10
+ dependencies = [
11
+ "APScheduler>=3.11.1",
12
+ "httpx>=0.28.0",
13
+ "pydantic-settings>=2.7.0",
14
+ "loguru>=0.7.3",
15
+ "python-dotenv>=1.1.0",
16
+ "py-buzz>=4.1.0",
17
+ "pyjwt>=2.10.0",
18
+ "python-jose>=3.4.0",
19
+ "jsondiff>=2.2.1",
20
+ "sentry-sdk>=2.25.0",
21
+ "pydantic>=2.10.6",
22
+ ]
23
+
24
+ [project.optional-dependencies]
25
+ dev = [
26
+ "mypy>=1.15.0",
27
+ "ruff>=0.14.0",
28
+ "respx>=0.22.0",
29
+ "pytest>=8.4.0",
30
+ "pytest-mock>=3.15.0",
31
+ "pytest-asyncio>=1.3.0",
32
+ "pytest-cov>=7.0.0",
33
+ "pytest-env>=1.2.0",
34
+ "pytest-xdist>=3.8.0",
35
+ ]
36
+
37
+ [project.scripts]
38
+ vtg-run = "vantage_agent.main:main"
39
+
40
+ [project.entry-points."vantage_agent.tasks"]
41
+ cluster-config = "vantage_agent.tasks:cluster_config_task"
42
+ cluster-partition = "vantage_agent.tasks:cluster_partition_task"
43
+ cluster-queue-actions = "vantage_agent.tasks:cluster_queue_actions_task"
44
+ cluster-queue = "vantage_agent.tasks:cluster_queue_task"
45
+ cluster-node = "vantage_agent.tasks:cluster_node_task"
46
+ partitions-sync = "vantage_agent.tasks:sync_cluster_partitions_task"
47
+ teams-sync = "vantage_agent.tasks:sync_cluster_teams_task"
48
+ self-update = "vantage_agent.tasks:self_update_task"
49
+ status-report = "vantage_agent.tasks:status_report_task"
50
+
51
+ [tool.ruff]
52
+ line-length = 110
53
+ extend-exclude = ["__pycache__", "*.egg_info", "__init__.py"]
54
+
55
+ [tool.ruff.lint]
56
+ select = ["E", "W", "F", "C", "N", "D", "I001", "I"]
57
+ ignore = ["D213", "D211", "D203", "C408"]
58
+ fixable = ["ALL"]
59
+
60
+ [tool.ruff.lint.per-file-ignores]
61
+ "tests/test_helpers.py" = ["E501"]
62
+
63
+ [tool.pytest.ini_options]
64
+ minversion = "8.0"
65
+ addopts = [
66
+ "-vv",
67
+ "--cov=vantage_agent",
68
+ "--cov-report=term-missing",
69
+ "-n=auto"
70
+ ]
71
+ testpaths = ["tests"]
72
+ env = [
73
+ "VANTAGE_AGENT_OIDC_DOMAIN = armasec.dev",
74
+ "VANTAGE_AGENT_OIDC_CLIENT_ID = vantage-agent-2699404d-4b88-4285-9b57-b3aa38d674ae",
75
+ "VANTAGE_AGENT_OIDC_CLIENT_SECRET = super-secret",
76
+ "VANTAGE_AGENT_OIDC_USE_HTTPS = true"
77
+ ]
78
+ asyncio_mode = "auto"
79
+ # Suppress race-condition warnings from parallel test execution with xdist
80
+ filterwarnings = [
81
+ "ignore:coroutine.*was never awaited:RuntimeWarning"
82
+ ]
83
+
84
+ [tool.coverage.run]
85
+ omit = [
86
+ "vantage_agent/exceptions.py",
87
+ "vantage_agent/vantage_api_client.py",
88
+ ]
89
+
90
+ [tool.coverage.report]
91
+ fail_under = 100
92
+
93
+ [tool.hatch.build.targets.wheel]
94
+ packages = ["vantage_agent"]
95
+
96
+ [build-system]
97
+ requires = ["hatchling"]
98
+ build-backend = "hatchling.build"
@@ -0,0 +1,10 @@
1
+ SuspendRate=100
2
+ ResumeTimeout=600
3
+ SuspendTime=350
4
+ TreeWidth=60000
5
+ NodeName=partitionA-partitionA-[0-2] State=CLOUD Weight=1 Feature=cloud CPUs=46 RealMemory=93184
6
+ PartitionName=partitionA Nodes=partitionA-partitionA-[0-2] MaxNodes=3 MaxTime=INFINITE State=UP Default=Yes
7
+ NodeName=partitionC-partitionC-0 State=CLOUD Weight=1 Feature=cloud CPUs=55 RealMemory=32768 Gres=gpu:59(File=/dev/nvidia)
8
+ PartitionName=partitionC Nodes=partitionC-partitionC-0 MaxNodes=1 MaxTime=INFINITE State=UP Default=No
9
+ NodeName=partitionB-partitionB-[0-3] State=CLOUD Weight=1 Feature=cloud CPUs=13 RealMemory=26624
10
+ PartitionName=partitionB Nodes=partitionB-partitionB-[0-3] MaxNodes=4 MaxTime=INFINITE State=UP Default=No
@@ -0,0 +1,91 @@
1
+ #!/usr/bin/env python3
2
+ """Snapcraft `configure` hook for the Vantage Agents snap."""
3
+
4
+ import os
5
+ import subprocess
6
+ import sys
7
+ from contextlib import contextmanager
8
+ from pathlib import Path
9
+ from typing import Union
10
+
11
+ SNAP_COMMON_PATH = "/var/snap/vantage-agent/common"
12
+ SNAP_INSTANCE_NAME = os.environ["SNAP_INSTANCE_NAME"]
13
+ DOTENV_PREFIX = "VANTAGE_AGENT_"
14
+ DOTENV_FILE_LOCATION = Path(f"{SNAP_COMMON_PATH}/.env")
15
+ AGENT_VARIABLES_MAP: dict[str, Union[str, int]] = {
16
+ "BASE_API_URL": "https://apis.vantagecompute.ai",
17
+ "OIDC_DOMAIN": "auth.vantagecompute.ai/realms/vantage",
18
+ "OIDC_CLIENT_ID": "dummy",
19
+ "OIDC_CLIENT_SECRET": "dummy",
20
+ "PARTITIONS_JSON_PATH": "/nfs/slurm/etc/aws/partitions.json",
21
+ "SLURM_CONF_PATH": "/etc/slurm/slurm.conf",
22
+ "TASK_JOBS_INTERVAL_SECONDS": 30,
23
+ "CACHE_DIR": f"{SNAP_COMMON_PATH}/.cache",
24
+ "CLUSTER_NAME": "",
25
+ "IS_CLOUD_CLUSTER": "false",
26
+ "SENTRY_DSN": "",
27
+ "SENTRY_ENV": "snap-env",
28
+ "SENTRY_TRACES_SAMPLE_RATE": "0.01",
29
+ "SENTRY_SAMPLE_RATE": "0.25",
30
+ "SENTRY_PROFILING_SAMPLE_RATE": "0.01",
31
+ }
32
+
33
+
34
+ @contextmanager
35
+ def handle_error(message: str):
36
+ """Handle any errors encountered in this context manager."""
37
+ try:
38
+ yield
39
+ except Exception as exc:
40
+ sys.exit(f"Failed to {message} (from configure hook) -- {exc}")
41
+
42
+
43
+ def run_bash(bash_string: str) -> str:
44
+ """Run bash command and return output as string."""
45
+ return subprocess.check_output(bash_string.split()).decode().rstrip()
46
+
47
+
48
+ def daemon_starter():
49
+ """Start the daemon."""
50
+ with handle_error(f"start {SNAP_INSTANCE_NAME}.vtg-agent"):
51
+ run_bash(f"snapctl start --enable {SNAP_INSTANCE_NAME}.vtg-agent")
52
+
53
+
54
+ def daemon_stopper():
55
+ """Stop the daemon."""
56
+ with handle_error(f"stop {SNAP_INSTANCE_NAME}.vtg-agent"):
57
+ run_bash(f"snapctl stop --disable {SNAP_INSTANCE_NAME}.vtg-agent")
58
+
59
+
60
+ def snapctl_get(snap_config_value: str) -> Union[str, None]:
61
+ """Get snap config from snapctl.
62
+
63
+ Return python None if snapctl returns the empty string.
64
+ """
65
+ snapctl_out: Union[str, None]
66
+ snapctl_out = run_bash(f"snapctl get {snap_config_value}")
67
+
68
+ if snapctl_out == "":
69
+ snapctl_out = None
70
+
71
+ return snapctl_out
72
+
73
+
74
+ def configure_dotenv_files():
75
+ """Configure the .env files based on the snap mode."""
76
+ with handle_error(f"configure .env for {SNAP_INSTANCE_NAME}"):
77
+ env_file_content = ""
78
+ for env_var, env_value in AGENT_VARIABLES_MAP.items():
79
+ snapctl_value = snapctl_get(env_var.lower().replace("_", "-"))
80
+ if snapctl_value is not None:
81
+ env_value = snapctl_value
82
+ elif bool(env_value) is False:
83
+ continue
84
+ env_file_content += f"{DOTENV_PREFIX}{env_var}={env_value}\n"
85
+ DOTENV_FILE_LOCATION.write_text(env_file_content)
86
+
87
+
88
+ if __name__ == "__main__":
89
+ daemon_stopper()
90
+ configure_dotenv_files()
91
+ daemon_starter()
@@ -0,0 +1,11 @@
1
+ #!/bin/bash
2
+
3
+
4
+ mkdir /etc/systemd/system/snap.vantage-agent.vtg-agent.service.d
5
+
6
+ cat <<EOF | tee /etc/systemd/system/snap.vantage-agent.vtg-agent.service.d/override.conf
7
+ [Unit]
8
+ ConditionPathExists=/var/snap/vantage-agent/common/.env
9
+ [Service]
10
+ EnvironmentFile=/var/snap/vantage-agent/common/.env
11
+ EOF
@@ -0,0 +1,3 @@
1
+ #!/bin/bash
2
+
3
+ rm -r /etc/systemd/system/snap.vantage-agent.vtg-agent.service.d