waldur-site-agent 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- waldur_site_agent-0.2.3/LICENCE +21 -0
- waldur_site_agent-0.2.3/PKG-INFO +235 -0
- waldur_site_agent-0.2.3/README.md +210 -0
- waldur_site_agent-0.2.3/pyproject.toml +41 -0
- waldur_site_agent-0.2.3/waldur_site_agent/__init__.py +98 -0
- waldur_site_agent-0.2.3/waldur_site_agent/common_utils.py +260 -0
- waldur_site_agent-0.2.3/waldur_site_agent/main.py +19 -0
- waldur_site_agent-0.2.3/waldur_site_agent/slurm_client/__init__.py +74 -0
- waldur_site_agent-0.2.3/waldur_site_agent/slurm_client/backend.py +343 -0
- waldur_site_agent-0.2.3/waldur_site_agent/slurm_client/base.py +33 -0
- waldur_site_agent-0.2.3/waldur_site_agent/slurm_client/client.py +182 -0
- waldur_site_agent-0.2.3/waldur_site_agent/slurm_client/exceptions.py +10 -0
- waldur_site_agent-0.2.3/waldur_site_agent/slurm_client/parser.py +118 -0
- waldur_site_agent-0.2.3/waldur_site_agent/slurm_client/structures.py +15 -0
- waldur_site_agent-0.2.3/waldur_site_agent/slurm_client/utils.py +91 -0
- waldur_site_agent-0.2.3/waldur_site_agent/slurm_waldur_utils.py +250 -0
- waldur_site_agent-0.2.3/waldur_site_agent/waldur_slurm_utils.py +306 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2016-2024 OpenNode LLC
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: waldur-site-agent
|
|
3
|
+
Version: 0.2.3
|
|
4
|
+
Summary: Agent for integrating service provider's site with Waldur Mastermind.
|
|
5
|
+
Home-page: https://waldur.com
|
|
6
|
+
License: MIT
|
|
7
|
+
Author: OpenNode Team
|
|
8
|
+
Author-email: info@opennodecloud.com
|
|
9
|
+
Requires-Python: >=3.9,<4.0
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Requires-Dist: python-waldur-client (>=0.3.0,<0.4.0)
|
|
19
|
+
Requires-Dist: pyyaml (==6.0.1)
|
|
20
|
+
Requires-Dist: requests (==2.27.1)
|
|
21
|
+
Requires-Dist: sentry-sdk (==1.14.0)
|
|
22
|
+
Project-URL: Documentation, https://docs.waldur.com
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# Agent for Service Provider Integration
|
|
26
|
+
|
|
27
|
+
Agent for Mastermind integration with a provider's site.
|
|
28
|
+
The main purpose of the agent is data syncronization between Waldur instance and an application (for example SLURM or MOAB cluster).
|
|
29
|
+
The application uses order-related information from Waldur to manage accounts in the site and
|
|
30
|
+
accounting-related info from the site to update usage data in Waldur.
|
|
31
|
+
For now, the agent supports only SLURM cluster as a site.
|
|
32
|
+
|
|
33
|
+
## Architecture
|
|
34
|
+
|
|
35
|
+
This is a stateless application, which is deployed on a machine having access to SLURM cluster data.
|
|
36
|
+
The agent consists of two sub-applications:
|
|
37
|
+
|
|
38
|
+
- agent-pull, which fetches data from Waldur and updates a state of a SLURM cluster correspondingly (e.g. creation of SLURM accounts ordered in Waldur);
|
|
39
|
+
- agent-push, which sends data from SLURM cluster to Waldur (e.g. update of resource usages).
|
|
40
|
+
|
|
41
|
+
### Integration with Waldur
|
|
42
|
+
|
|
43
|
+
For this, the agent uses [Waldur client](https://github.com/waldur/python-waldur-client) based on Python and REST communication with [Waldur backend](https://github.com/waldur/waldur-mastermind). `Agent-pull` application pulls data of orders created for a specific offering linked to the site and creates/updates/removes SLURM accounts based on this info. `Agent-push` fetches data of usage, limits and associations from the site and pushes it to Waldur.
|
|
44
|
+
|
|
45
|
+
### Integration with the site
|
|
46
|
+
|
|
47
|
+
#### SLURM cluster
|
|
48
|
+
|
|
49
|
+
For this, the agent uses SLURM command line utilities (e.g. `sacct` and `sacctmgr`).
|
|
50
|
+
The access to the binaries can be either direct or using docker client.
|
|
51
|
+
In the latter case, the agent is required to have access to `docker` binary and to docker socket (e.g. `/var/run/docker.sock`).
|
|
52
|
+
|
|
53
|
+
## Setup
|
|
54
|
+
|
|
55
|
+
The application supports the following environmental variables (required ones formatted with bold font):
|
|
56
|
+
|
|
57
|
+
- **`WALDUR_API_URL`** - URL of Waldur Mastermind API (e.g. `http://localhost:8081/api/`).
|
|
58
|
+
- **`WALDUR_API_TOKEN`** - token for access to Mastermind API.
|
|
59
|
+
- **`WALDUR_SYNC_DIRECTION`** - accepts two values: `push` and `pull`. If `pull`, then application sends data from SLURM cluster to Waldur, vice versa if `push`.
|
|
60
|
+
- **`WALDUR_OFFERING_UUID`** - UUID of corresponding offering in Waldur.
|
|
61
|
+
- `REQUESTS_VERIFY_SSL` - flag for SSL verification for Waldur client, default is `true`.
|
|
62
|
+
- `SLURM_TRES_CONFIG_PATH` - a path to the SLURM TRES configuration, default is `./config-components.yaml`.
|
|
63
|
+
- `SLURM_DEPLOYMENT_TYPE` - type of SLURM deployment. accepts two values: `docker` and `native`, default is `docker`.
|
|
64
|
+
- `SLURM_CUSTOMER_PREFIX` - prefix used for customer's accounts, default is `hpc_`.
|
|
65
|
+
- `SLURM_PROJECT_PREFIX` - prefix used for project's accounts, default is `hpc_`.
|
|
66
|
+
- `SLURM_ALLOCATION_PREFIX` - prefix used for allocation's accounts, default is `hpc_`.
|
|
67
|
+
- `SLURM_ALLOCATION_NAME_MAX_LEN` - maximum length of account name created by the application.
|
|
68
|
+
- `SLURM_DEFAULT_ACCOUNT` - default account name existing in SLURM cluster for creation of new accounts. Default is `waldur`.
|
|
69
|
+
- `SLURM_CONTAINER_NAME` - name of a headnode SLURM container; must be set if SLURM_DEPLOYMENT_TYPE is docker.
|
|
70
|
+
- `ENABLE_USER_HOMEDIR_ACCOUNT_CREATION` - whether to create home directories for users related to accounts.
|
|
71
|
+
- `SENTRY_DSN` - Data Source Name for Sentry (more info [here](https://docs.sentry.io/product/sentry-basics/dsn-explainer/)).
|
|
72
|
+
- `SENTRY_ENVIRONMENT` - name of the Sentry environment.
|
|
73
|
+
|
|
74
|
+
Alternatively, the agent can serve several offerings. For this:
|
|
75
|
+
|
|
76
|
+
1. Create a config file for offerings with the following format:
|
|
77
|
+
|
|
78
|
+
```yaml
|
|
79
|
+
offerings:
|
|
80
|
+
- name: example-offering-01
|
|
81
|
+
waldur_api_url: https://waldur1.exmaple.com/api/
|
|
82
|
+
waldur_api_token: <token1>
|
|
83
|
+
waldur_offering_uuid: <uuid1>
|
|
84
|
+
- name: example-offering-02
|
|
85
|
+
waldur_api_url: https://waldur2.exmaple.com/api/
|
|
86
|
+
waldur_api_token: <token2>
|
|
87
|
+
waldur_offering_uuid: <uuid2>
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
2. Add a variable `WALDUR_CONFIG_FILE_PATH` to the environment, for example:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
export WALDUR_CONFIG_FILE_PATH=/etc/waldur-site-agent/offerings.yaml
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**NB**: Environment variables take precedence over the config file, so if you define `WALDUR_API_URL`, `WALDUR_API_TOKEN` and `WALDUR_OFFERING_UUID`, the file is ignored.
|
|
97
|
+
|
|
98
|
+
## Deployment
|
|
99
|
+
|
|
100
|
+
### Test environment
|
|
101
|
+
|
|
102
|
+
In order to test the agent, a user should deploy 2 separate instances of it.
|
|
103
|
+
The first one (called agent-pull) is for fetching data from Waldur with further processing and the second one (called agent-push) is for sending data from SLURM cluster to Waldur.
|
|
104
|
+
Both instances must be configured with environment variables (from e.g. .env-file), file for computing components and an optional file for multiple offerings.
|
|
105
|
+
|
|
106
|
+
The example of `.env-file` for agent-pull:
|
|
107
|
+
|
|
108
|
+
```env
|
|
109
|
+
WALDUR_SYNC_DIRECTION=pull # The setup for agent-pull
|
|
110
|
+
WALDUR_API_URL=http://waldur.example.com/api/ # Waldur API URL
|
|
111
|
+
WALDUR_API_TOKEN=9e1132b9616ebfe943ddf632ca32bbb7e1109a32 # Token of a service provider in Waldur
|
|
112
|
+
WALDUR_OFFERING_UUID=e21a0f0030b447deb63bedf69db6742e # UUID of SLURM offering in Waldur
|
|
113
|
+
SLURM_DEFAULT_ACCOUNT=root # Default account for SLURM
|
|
114
|
+
SLURM_CONTAINER_NAME=slurmctld # Name of SLURM namenode container
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
The example of .env-file for agent-push:
|
|
118
|
+
|
|
119
|
+
```env
|
|
120
|
+
WALDUR_SYNC_DIRECTION=push # The setup for agent-push
|
|
121
|
+
WALDUR_API_URL=http://waldur.example.com/api/ # Waldur API URL
|
|
122
|
+
WALDUR_API_TOKEN=9e1132b9616ebfe943ddf632ca32bbb7e1109a32 # Token of a service provider in Waldur
|
|
123
|
+
WALDUR_OFFERING_UUID=e21a0f0030b447deb63bedf69db6742e # UUID of SLURM offering in Waldur
|
|
124
|
+
SLURM_CONTAINER_NAME=slurmctld # Name of SLURM namenode container
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Docker-based deployment
|
|
128
|
+
|
|
129
|
+
You can find the Docker Compose configuration for testing in [examples/docker-compose/](examples/docker-compose/) folder:
|
|
130
|
+
|
|
131
|
+
- [docker-compose.yml](examples/docker-compose/docker-compose.yml)
|
|
132
|
+
- [agent-pull](examples/docker-compose/waldur-agent-pull-env)
|
|
133
|
+
- [agent-push](examples/docker-compose/waldur-agent-push-env)
|
|
134
|
+
|
|
135
|
+
In order to test it, you need to execute following commands in your terminal app:
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
cd examples/docker-compose
|
|
139
|
+
docker-compose up -d
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Systemd deployment
|
|
143
|
+
|
|
144
|
+
In case of native deployment, you need to setup and run the a systemd service executing Python module.
|
|
145
|
+
|
|
146
|
+
#### SLURM agent
|
|
147
|
+
|
|
148
|
+
The agent requires `sacct` and `sacctmgr` to be accessible on a machine, so it should run on a headnode of the SLURM cluster.
|
|
149
|
+
Firstly, install the waldur-site-agent:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
pip install waldur-site-agent
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
Secondly, put systemd unit, environment and and TRES config files to the corresponding locations.
|
|
156
|
+
Don't forget to modify Waldur-related values the env files.
|
|
157
|
+
|
|
158
|
+
##### agent-pull files for a SLURM agent
|
|
159
|
+
|
|
160
|
+
- `systemd unit`: [waldur-site-agent-pull.service](systemd-conf/agent-pull/waldur-site-agent-pull.service)
|
|
161
|
+
- `example .env`: [waldur-site-agent-pull.env](systemd-conf/agent-pull/waldur-site-agent-pull.env)
|
|
162
|
+
|
|
163
|
+
##### agent-push files for a SLURM agent
|
|
164
|
+
|
|
165
|
+
- `systemd unit`: [waldur-site-agent-push.service](systemd-conf/agent-push/waldur-site-agent-push.service)
|
|
166
|
+
- `example .env`: [waldur-site-agent-push.env](systemd-conf/agent-push/waldur-site-agent-push.env)
|
|
167
|
+
|
|
168
|
+
#### Common files
|
|
169
|
+
|
|
170
|
+
- [example of a file for config components](https://github.com/waldur/waldur-site-agent/blob/main/config-components.yaml.example)
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
# For agent-pull
|
|
174
|
+
cp systemd-conf/agent-pull/waldur-site-agent-pull.service /etc/systemd/system/
|
|
175
|
+
mkdir /etc/waldur-site-agent/
|
|
176
|
+
cp systemd-conf/agent-pull/waldur-site-agent-pull.env /etc/waldur-site-agent/pull.env
|
|
177
|
+
cp ./config-components.yaml.example /etc/waldur-site-agent/tres.yaml # you can use a different path and set SLURM_TRES_CONFIG_PATH to it
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# For agent-push
|
|
181
|
+
cp systemd-conf/agent-push/waldur-site-agent-push.service /etc/systemd/system/
|
|
182
|
+
cp systemd-conf/agent-push/waldur-site-agent-push.env /etc/waldur-site-agent/push.env
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
After these preparation steps, run the following script to apply the changes.
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
systemctl daemon-reload
|
|
189
|
+
systemctl start waldur-site-agent-pull
|
|
190
|
+
systemctl enable waldur-site-agent-pull # to start after reboot
|
|
191
|
+
systemctl start waldur-site-agent-push
|
|
192
|
+
systemctl enable waldur-site-agent-push # to start after reboot
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
#### Older systemd versions
|
|
196
|
+
|
|
197
|
+
If you want to deploy the agents on a machine with systemd revision older than 240, you should use files with legacy configuration:
|
|
198
|
+
|
|
199
|
+
- systemd legacy unit file for agent-pull: [waldur-site-agent-pull-legacy.service](systemd-conf/agent-pull/waldur-site-agent-pull-legacy.service)
|
|
200
|
+
- systemd legacy unit file for agent-push: [waldur-site-agent-push-legacy.service](systemd-conf/agent-push/waldur-site-agent-push-legacy.service)
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
# For pulling agent
|
|
204
|
+
cp systemd-conf/agent-pull/waldur-site-agent-pull-legacy.service /etc/systemd/system/waldur-site-agent-pull.service
|
|
205
|
+
# For pushing agent
|
|
206
|
+
cp systemd-conf/agent-push/waldur-site-agent-push-legacy.service /etc/systemd/system/waldur-site-agent-push.service
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### TRES configuration
|
|
210
|
+
|
|
211
|
+
To setup TRES-related info, the agent uses the corresponding configuration file configured by `SLURM_TRES_CONFIG_PATH` environment variable (`config-components.yaml` by default). Each entry of the file incudes key-value-formatted data.
|
|
212
|
+
A key is a type of TRES (with optional name if type is `gres`) and the value contains limit, measured unit, type of accounting and label.
|
|
213
|
+
The script `waldur_slurm_load_components` sends this data to Waldur:
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
waldur_slurm_load_components
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
If a user wants to change this information, a path of a custom config file should be set for `SLURM_TRES_CONFIG_PATH` variable.
|
|
220
|
+
|
|
221
|
+
## Service provider configuration
|
|
222
|
+
|
|
223
|
+
### SLURM
|
|
224
|
+
|
|
225
|
+
The agents require existing offering data in Waldur.
|
|
226
|
+
As a service provider owner, you should create an offering in the marketplace:
|
|
227
|
+
|
|
228
|
+
- Go to `Provider` section on the left tab -> `Add new offering` button
|
|
229
|
+
- Input a name, choose a category, select `SLURM remote allocation` from the drop-down list on the bottom and click `Create` button
|
|
230
|
+
|
|
231
|
+

|
|
232
|
+
|
|
233
|
+
- Open the offering page and create a plan in the `Accounting` section: click `Add plan` and input the necessary details
|
|
234
|
+
- Go to `Integration` section, click `Show integration steps` and ensure they are completed within your SLURM cluster.
|
|
235
|
+
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# Agent for Service Provider Integration
|
|
2
|
+
|
|
3
|
+
Agent for Mastermind integration with a provider's site.
|
|
4
|
+
The main purpose of the agent is data syncronization between Waldur instance and an application (for example SLURM or MOAB cluster).
|
|
5
|
+
The application uses order-related information from Waldur to manage accounts in the site and
|
|
6
|
+
accounting-related info from the site to update usage data in Waldur.
|
|
7
|
+
For now, the agent supports only SLURM cluster as a site.
|
|
8
|
+
|
|
9
|
+
## Architecture
|
|
10
|
+
|
|
11
|
+
This is a stateless application, which is deployed on a machine having access to SLURM cluster data.
|
|
12
|
+
The agent consists of two sub-applications:
|
|
13
|
+
|
|
14
|
+
- agent-pull, which fetches data from Waldur and updates a state of a SLURM cluster correspondingly (e.g. creation of SLURM accounts ordered in Waldur);
|
|
15
|
+
- agent-push, which sends data from SLURM cluster to Waldur (e.g. update of resource usages).
|
|
16
|
+
|
|
17
|
+
### Integration with Waldur
|
|
18
|
+
|
|
19
|
+
For this, the agent uses [Waldur client](https://github.com/waldur/python-waldur-client) based on Python and REST communication with [Waldur backend](https://github.com/waldur/waldur-mastermind). `Agent-pull` application pulls data of orders created for a specific offering linked to the site and creates/updates/removes SLURM accounts based on this info. `Agent-push` fetches data of usage, limits and associations from the site and pushes it to Waldur.
|
|
20
|
+
|
|
21
|
+
### Integration with the site
|
|
22
|
+
|
|
23
|
+
#### SLURM cluster
|
|
24
|
+
|
|
25
|
+
For this, the agent uses SLURM command line utilities (e.g. `sacct` and `sacctmgr`).
|
|
26
|
+
The access to the binaries can be either direct or using docker client.
|
|
27
|
+
In the latter case, the agent is required to have access to `docker` binary and to docker socket (e.g. `/var/run/docker.sock`).
|
|
28
|
+
|
|
29
|
+
## Setup
|
|
30
|
+
|
|
31
|
+
The application supports the following environmental variables (required ones formatted with bold font):
|
|
32
|
+
|
|
33
|
+
- **`WALDUR_API_URL`** - URL of Waldur Mastermind API (e.g. `http://localhost:8081/api/`).
|
|
34
|
+
- **`WALDUR_API_TOKEN`** - token for access to Mastermind API.
|
|
35
|
+
- **`WALDUR_SYNC_DIRECTION`** - accepts two values: `push` and `pull`. If `pull`, then application sends data from SLURM cluster to Waldur, vice versa if `push`.
|
|
36
|
+
- **`WALDUR_OFFERING_UUID`** - UUID of corresponding offering in Waldur.
|
|
37
|
+
- `REQUESTS_VERIFY_SSL` - flag for SSL verification for Waldur client, default is `true`.
|
|
38
|
+
- `SLURM_TRES_CONFIG_PATH` - a path to the SLURM TRES configuration, default is `./config-components.yaml`.
|
|
39
|
+
- `SLURM_DEPLOYMENT_TYPE` - type of SLURM deployment. accepts two values: `docker` and `native`, default is `docker`.
|
|
40
|
+
- `SLURM_CUSTOMER_PREFIX` - prefix used for customer's accounts, default is `hpc_`.
|
|
41
|
+
- `SLURM_PROJECT_PREFIX` - prefix used for project's accounts, default is `hpc_`.
|
|
42
|
+
- `SLURM_ALLOCATION_PREFIX` - prefix used for allocation's accounts, default is `hpc_`.
|
|
43
|
+
- `SLURM_ALLOCATION_NAME_MAX_LEN` - maximum length of account name created by the application.
|
|
44
|
+
- `SLURM_DEFAULT_ACCOUNT` - default account name existing in SLURM cluster for creation of new accounts. Default is `waldur`.
|
|
45
|
+
- `SLURM_CONTAINER_NAME` - name of a headnode SLURM container; must be set if SLURM_DEPLOYMENT_TYPE is docker.
|
|
46
|
+
- `ENABLE_USER_HOMEDIR_ACCOUNT_CREATION` - whether to create home directories for users related to accounts.
|
|
47
|
+
- `SENTRY_DSN` - Data Source Name for Sentry (more info [here](https://docs.sentry.io/product/sentry-basics/dsn-explainer/)).
|
|
48
|
+
- `SENTRY_ENVIRONMENT` - name of the Sentry environment.
|
|
49
|
+
|
|
50
|
+
Alternatively, the agent can serve several offerings. For this:
|
|
51
|
+
|
|
52
|
+
1. Create a config file for offerings with the following format:
|
|
53
|
+
|
|
54
|
+
```yaml
|
|
55
|
+
offerings:
|
|
56
|
+
- name: example-offering-01
|
|
57
|
+
waldur_api_url: https://waldur1.exmaple.com/api/
|
|
58
|
+
waldur_api_token: <token1>
|
|
59
|
+
waldur_offering_uuid: <uuid1>
|
|
60
|
+
- name: example-offering-02
|
|
61
|
+
waldur_api_url: https://waldur2.exmaple.com/api/
|
|
62
|
+
waldur_api_token: <token2>
|
|
63
|
+
waldur_offering_uuid: <uuid2>
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
2. Add a variable `WALDUR_CONFIG_FILE_PATH` to the environment, for example:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
export WALDUR_CONFIG_FILE_PATH=/etc/waldur-site-agent/offerings.yaml
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
**NB**: Environment variables take precedence over the config file, so if you define `WALDUR_API_URL`, `WALDUR_API_TOKEN` and `WALDUR_OFFERING_UUID`, the file is ignored.
|
|
73
|
+
|
|
74
|
+
## Deployment
|
|
75
|
+
|
|
76
|
+
### Test environment
|
|
77
|
+
|
|
78
|
+
In order to test the agent, a user should deploy 2 separate instances of it.
|
|
79
|
+
The first one (called agent-pull) is for fetching data from Waldur with further processing and the second one (called agent-push) is for sending data from SLURM cluster to Waldur.
|
|
80
|
+
Both instances must be configured with environment variables (from e.g. .env-file), file for computing components and an optional file for multiple offerings.
|
|
81
|
+
|
|
82
|
+
The example of `.env-file` for agent-pull:
|
|
83
|
+
|
|
84
|
+
```env
|
|
85
|
+
WALDUR_SYNC_DIRECTION=pull # The setup for agent-pull
|
|
86
|
+
WALDUR_API_URL=http://waldur.example.com/api/ # Waldur API URL
|
|
87
|
+
WALDUR_API_TOKEN=9e1132b9616ebfe943ddf632ca32bbb7e1109a32 # Token of a service provider in Waldur
|
|
88
|
+
WALDUR_OFFERING_UUID=e21a0f0030b447deb63bedf69db6742e # UUID of SLURM offering in Waldur
|
|
89
|
+
SLURM_DEFAULT_ACCOUNT=root # Default account for SLURM
|
|
90
|
+
SLURM_CONTAINER_NAME=slurmctld # Name of SLURM namenode container
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
The example of .env-file for agent-push:
|
|
94
|
+
|
|
95
|
+
```env
|
|
96
|
+
WALDUR_SYNC_DIRECTION=push # The setup for agent-push
|
|
97
|
+
WALDUR_API_URL=http://waldur.example.com/api/ # Waldur API URL
|
|
98
|
+
WALDUR_API_TOKEN=9e1132b9616ebfe943ddf632ca32bbb7e1109a32 # Token of a service provider in Waldur
|
|
99
|
+
WALDUR_OFFERING_UUID=e21a0f0030b447deb63bedf69db6742e # UUID of SLURM offering in Waldur
|
|
100
|
+
SLURM_CONTAINER_NAME=slurmctld # Name of SLURM namenode container
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Docker-based deployment
|
|
104
|
+
|
|
105
|
+
You can find the Docker Compose configuration for testing in [examples/docker-compose/](examples/docker-compose/) folder:
|
|
106
|
+
|
|
107
|
+
- [docker-compose.yml](examples/docker-compose/docker-compose.yml)
|
|
108
|
+
- [agent-pull](examples/docker-compose/waldur-agent-pull-env)
|
|
109
|
+
- [agent-push](examples/docker-compose/waldur-agent-push-env)
|
|
110
|
+
|
|
111
|
+
In order to test it, you need to execute following commands in your terminal app:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
cd examples/docker-compose
|
|
115
|
+
docker-compose up -d
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Systemd deployment
|
|
119
|
+
|
|
120
|
+
In case of native deployment, you need to setup and run the a systemd service executing Python module.
|
|
121
|
+
|
|
122
|
+
#### SLURM agent
|
|
123
|
+
|
|
124
|
+
The agent requires `sacct` and `sacctmgr` to be accessible on a machine, so it should run on a headnode of the SLURM cluster.
|
|
125
|
+
Firstly, install the waldur-site-agent:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
pip install waldur-site-agent
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Secondly, put systemd unit, environment and and TRES config files to the corresponding locations.
|
|
132
|
+
Don't forget to modify Waldur-related values the env files.
|
|
133
|
+
|
|
134
|
+
##### agent-pull files for a SLURM agent
|
|
135
|
+
|
|
136
|
+
- `systemd unit`: [waldur-site-agent-pull.service](systemd-conf/agent-pull/waldur-site-agent-pull.service)
|
|
137
|
+
- `example .env`: [waldur-site-agent-pull.env](systemd-conf/agent-pull/waldur-site-agent-pull.env)
|
|
138
|
+
|
|
139
|
+
##### agent-push files for a SLURM agent
|
|
140
|
+
|
|
141
|
+
- `systemd unit`: [waldur-site-agent-push.service](systemd-conf/agent-push/waldur-site-agent-push.service)
|
|
142
|
+
- `example .env`: [waldur-site-agent-push.env](systemd-conf/agent-push/waldur-site-agent-push.env)
|
|
143
|
+
|
|
144
|
+
#### Common files
|
|
145
|
+
|
|
146
|
+
- [example of a file for config components](https://github.com/waldur/waldur-site-agent/blob/main/config-components.yaml.example)
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
# For agent-pull
|
|
150
|
+
cp systemd-conf/agent-pull/waldur-site-agent-pull.service /etc/systemd/system/
|
|
151
|
+
mkdir /etc/waldur-site-agent/
|
|
152
|
+
cp systemd-conf/agent-pull/waldur-site-agent-pull.env /etc/waldur-site-agent/pull.env
|
|
153
|
+
cp ./config-components.yaml.example /etc/waldur-site-agent/tres.yaml # you can use a different path and set SLURM_TRES_CONFIG_PATH to it
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# For agent-push
|
|
157
|
+
cp systemd-conf/agent-push/waldur-site-agent-push.service /etc/systemd/system/
|
|
158
|
+
cp systemd-conf/agent-push/waldur-site-agent-push.env /etc/waldur-site-agent/push.env
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
After these preparation steps, run the following script to apply the changes.
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
systemctl daemon-reload
|
|
165
|
+
systemctl start waldur-site-agent-pull
|
|
166
|
+
systemctl enable waldur-site-agent-pull # to start after reboot
|
|
167
|
+
systemctl start waldur-site-agent-push
|
|
168
|
+
systemctl enable waldur-site-agent-push # to start after reboot
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
#### Older systemd versions
|
|
172
|
+
|
|
173
|
+
If you want to deploy the agents on a machine with systemd revision older than 240, you should use files with legacy configuration:
|
|
174
|
+
|
|
175
|
+
- systemd legacy unit file for agent-pull: [waldur-site-agent-pull-legacy.service](systemd-conf/agent-pull/waldur-site-agent-pull-legacy.service)
|
|
176
|
+
- systemd legacy unit file for agent-push: [waldur-site-agent-push-legacy.service](systemd-conf/agent-push/waldur-site-agent-push-legacy.service)
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
# For pulling agent
|
|
180
|
+
cp systemd-conf/agent-pull/waldur-site-agent-pull-legacy.service /etc/systemd/system/waldur-site-agent-pull.service
|
|
181
|
+
# For pushing agent
|
|
182
|
+
cp systemd-conf/agent-push/waldur-site-agent-push-legacy.service /etc/systemd/system/waldur-site-agent-push.service
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### TRES configuration
|
|
186
|
+
|
|
187
|
+
To setup TRES-related info, the agent uses the corresponding configuration file configured by `SLURM_TRES_CONFIG_PATH` environment variable (`config-components.yaml` by default). Each entry of the file incudes key-value-formatted data.
|
|
188
|
+
A key is a type of TRES (with optional name if type is `gres`) and the value contains limit, measured unit, type of accounting and label.
|
|
189
|
+
The script `waldur_slurm_load_components` sends this data to Waldur:
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
waldur_slurm_load_components
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
If a user wants to change this information, a path of a custom config file should be set for `SLURM_TRES_CONFIG_PATH` variable.
|
|
196
|
+
|
|
197
|
+
## Service provider configuration
|
|
198
|
+
|
|
199
|
+
### SLURM
|
|
200
|
+
|
|
201
|
+
The agents require existing offering data in Waldur.
|
|
202
|
+
As a service provider owner, you should create an offering in the marketplace:
|
|
203
|
+
|
|
204
|
+
- Go to `Provider` section on the left tab -> `Add new offering` button
|
|
205
|
+
- Input a name, choose a category, select `SLURM remote allocation` from the drop-down list on the bottom and click `Create` button
|
|
206
|
+
|
|
207
|
+

|
|
208
|
+
|
|
209
|
+
- Open the offering page and create a plan in the `Accounting` section: click `Add plan` and input the necessary details
|
|
210
|
+
- Go to `Integration` section, click `Show integration steps` and ensure they are completed within your SLURM cluster.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "waldur-site-agent"
|
|
3
|
+
version = "0.2.3"
|
|
4
|
+
description = "Agent for integrating service provider's site with Waldur Mastermind."
|
|
5
|
+
authors = ["OpenNode Team <info@opennodecloud.com>"]
|
|
6
|
+
license = "MIT"
|
|
7
|
+
homepage = "https://waldur.com"
|
|
8
|
+
documentation = "https://docs.waldur.com"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
classifiers=[
|
|
11
|
+
"Intended Audience :: Developers",
|
|
12
|
+
"License :: OSI Approved :: MIT License",
|
|
13
|
+
"Operating System :: OS Independent",
|
|
14
|
+
]
|
|
15
|
+
packages = [
|
|
16
|
+
{include = "waldur_site_agent"}
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[tool.poetry.dependencies]
|
|
20
|
+
python = "^3.9"
|
|
21
|
+
python-waldur-client = "^0.3.0"
|
|
22
|
+
requests = "2.27.1"
|
|
23
|
+
pyyaml = "6.0.1"
|
|
24
|
+
sentry-sdk = "1.14.0"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
[tool.poetry.group.dev.dependencies]
|
|
28
|
+
freezegun = "0.3.4"
|
|
29
|
+
coverage = "6.3.2"
|
|
30
|
+
pytest = "7.1.2"
|
|
31
|
+
pytest-cov = "3.0.0"
|
|
32
|
+
|
|
33
|
+
[tool.poetry.scripts]
|
|
34
|
+
waldur_site_agent = 'waldur_site_agent.main:main'
|
|
35
|
+
waldur_slurm_diagnostics = 'waldur_site_agent.common_utils:diagnostics'
|
|
36
|
+
waldur_slurm_load_components = 'waldur_site_agent.common_utils:create_offering_components'
|
|
37
|
+
waldur_slurm_create_homedirs = 'waldur_site_agent.common_utils:create_homedirs_for_offering_users'
|
|
38
|
+
|
|
39
|
+
[build-system]
|
|
40
|
+
requires = ["poetry-core"]
|
|
41
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from importlib.metadata import version
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
from waldur_site_agent.slurm_client import logger
|
|
8
|
+
from waldur_site_agent.slurm_client.backend import SlurmBackend
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# "pull" stands for sync from Waldur to SLURM cluster
|
|
12
|
+
# "push" stands for sync from SLURM cluster to Waldur
|
|
13
|
+
class WaldurSyncDirection(Enum):
|
|
14
|
+
PULL = "pull"
|
|
15
|
+
PUSH = "push"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
waldur_api_url = os.environ.get("WALDUR_API_URL")
|
|
19
|
+
waldur_api_token = os.environ.get("WALDUR_API_TOKEN")
|
|
20
|
+
waldur_offering_uuid = os.environ.get("WALDUR_OFFERING_UUID")
|
|
21
|
+
waldur_offering_name = os.environ.get("WALDUR_OFFERING_NAME", "Single offering")
|
|
22
|
+
WALDUR_OFFERINGS = []
|
|
23
|
+
|
|
24
|
+
if all([waldur_api_url, waldur_api_token, waldur_offering_uuid]):
|
|
25
|
+
logger.info("Using environment variables as a config source")
|
|
26
|
+
WALDUR_OFFERINGS = [
|
|
27
|
+
{
|
|
28
|
+
"name": waldur_offering_name,
|
|
29
|
+
"api_url": waldur_api_url,
|
|
30
|
+
"api_token": waldur_api_token,
|
|
31
|
+
"uuid": waldur_offering_uuid,
|
|
32
|
+
}
|
|
33
|
+
]
|
|
34
|
+
else:
|
|
35
|
+
config_file_path = os.environ.get("WALDUR_CONFIG_FILE_PATH")
|
|
36
|
+
if config_file_path is None:
|
|
37
|
+
logger.error("WALDUR_CONFIG_FILE_PATH variable is missing.")
|
|
38
|
+
exit(1)
|
|
39
|
+
|
|
40
|
+
logger.info("Using %s as a config source", config_file_path)
|
|
41
|
+
|
|
42
|
+
with open(config_file_path, "r") as stream:
|
|
43
|
+
config = yaml.safe_load(stream)
|
|
44
|
+
offering_list = config["offerings"]
|
|
45
|
+
for offering_info in offering_list:
|
|
46
|
+
WALDUR_OFFERINGS.append(
|
|
47
|
+
{
|
|
48
|
+
"name": offering_info["name"],
|
|
49
|
+
"api_url": offering_info["waldur_api_url"],
|
|
50
|
+
"api_token": offering_info["waldur_api_token"],
|
|
51
|
+
"uuid": offering_info["waldur_offering_uuid"],
|
|
52
|
+
}
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
WALDUR_SYNC_DIRECTION = os.environ["WALDUR_SYNC_DIRECTION"]
|
|
57
|
+
|
|
58
|
+
if WALDUR_SYNC_DIRECTION not in [
|
|
59
|
+
WaldurSyncDirection.PULL.value,
|
|
60
|
+
WaldurSyncDirection.PUSH.value,
|
|
61
|
+
]:
|
|
62
|
+
logger.error(
|
|
63
|
+
"SLURM_DEPLOYMENT_TYPE has invalid value: %s. Possible values are %s and %s",
|
|
64
|
+
WALDUR_SYNC_DIRECTION,
|
|
65
|
+
WaldurSyncDirection.PULL.value,
|
|
66
|
+
WaldurSyncDirection.PUSH.value,
|
|
67
|
+
)
|
|
68
|
+
exit(1)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
ENABLE_USER_HOMEDIR_ACCOUNT_CREATION = os.environ.get(
|
|
72
|
+
"ENABLE_USER_HOMEDIR_ACCOUNT_CREATION", "false"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
ENABLE_USER_HOMEDIR_ACCOUNT_CREATION = ENABLE_USER_HOMEDIR_ACCOUNT_CREATION.lower() in [
|
|
76
|
+
"yes",
|
|
77
|
+
"true",
|
|
78
|
+
]
|
|
79
|
+
waldur_site_agent_version = version("waldur-site-agent")
|
|
80
|
+
|
|
81
|
+
user_agent_dict = {
|
|
82
|
+
"pull": f"waldur-site-agent-pull/{waldur_site_agent_version}",
|
|
83
|
+
"push": f"waldur-site-agent-push/{waldur_site_agent_version}",
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
USER_AGENT = user_agent_dict.get(WALDUR_SYNC_DIRECTION)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
slurm_backend = SlurmBackend()
|
|
90
|
+
|
|
91
|
+
sentry_dsn = os.environ.get("SENTRY_DSN")
|
|
92
|
+
|
|
93
|
+
if sentry_dsn:
|
|
94
|
+
import sentry_sdk
|
|
95
|
+
|
|
96
|
+
sentry_sdk.init(
|
|
97
|
+
dsn=sentry_dsn,
|
|
98
|
+
)
|