xmanager-slurm 0.4.4__tar.gz → 0.4.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xmanager-slurm might be problematic. Click here for more details.
- xmanager_slurm-0.4.6/.pdm-build/.gitignore +1 -0
- xmanager_slurm-0.4.6/.pdm-build/xm_slurm-0.1+editable.dist-info/METADATA +125 -0
- xmanager_slurm-0.4.6/.pdm-build/xm_slurm-0.1+editable.dist-info/WHEEL +4 -0
- xmanager_slurm-0.4.6/.pdm-build/xm_slurm.pth +1 -0
- xmanager_slurm-0.4.6/.python-version +1 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/PKG-INFO +7 -3
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/metadata/launch.py +12 -3
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/pyproject.toml +7 -2
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/uv.lock +70 -10
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/__init__.py +0 -2
- xmanager_slurm-0.4.6/xm_slurm/api/__init__.py +33 -0
- xmanager_slurm-0.4.6/xm_slurm/api/abc.py +65 -0
- xmanager_slurm-0.4.6/xm_slurm/api/models.py +70 -0
- xmanager_slurm-0.4.6/xm_slurm/api/sqlite/client.py +358 -0
- xmanager_slurm-0.4.6/xm_slurm/api/web/client.py +173 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/config.py +11 -3
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/contrib/clusters/__init__.py +3 -6
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/contrib/clusters/drac.py +4 -3
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/executables.py +4 -7
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/execution.py +273 -159
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/experiment.py +26 -180
- xmanager_slurm-0.4.6/xm_slurm/filesystem.py +129 -0
- xmanager_slurm-0.4.6/xm_slurm/metadata_context.py +253 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/packageables.py +0 -9
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/packaging/docker.py +73 -23
- xmanager_slurm-0.4.6/xm_slurm/packaging/utils.py +22 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/scripts/cli.py +9 -2
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/templates/docker/uv.Dockerfile +6 -3
- xmanager_slurm-0.4.6/xm_slurm/templates/slurm/entrypoint.bash.j2 +27 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/templates/slurm/job-array.bash.j2 +4 -4
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/templates/slurm/job-group.bash.j2 +2 -2
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/templates/slurm/job.bash.j2 +5 -4
- xmanager_slurm-0.4.6/xm_slurm/templates/slurm/runtimes/apptainer.bash.j2 +68 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/templates/slurm/runtimes/podman.bash.j2 +10 -24
- {xmanager_slurm-0.4.4/xm_slurm/packaging → xmanager_slurm-0.4.6/xm_slurm}/utils.py +44 -16
- xmanager_slurm-0.4.4/xm_slurm/api.py +0 -528
- xmanager_slurm-0.4.4/xm_slurm/templates/slurm/runtimes/apptainer.bash.j2 +0 -104
- xmanager_slurm-0.4.4/xm_slurm/utils.py +0 -77
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/.devcontainer.json +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/.github/workflows/ci.yml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/.github/workflows/deploy-docs.yml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/.gitignore +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/.pre-commit-config.yaml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/.vscode/settings.json +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/LICENSE.md +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/README.md +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/docs/api/executables.rst +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/docs/api/executors.rst +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/docs/api/packageables.rst +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/docs/assets/workflow-dark.svg +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/docs/assets/workflow-light.svg +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/docs/conf.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/docs/getting-started/xmanager.md +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/docs/guides/index.md +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/docs/guides/remote-dev.md +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/docs/index.md +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/conda/environment.yml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/conda/launch.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/conda/main.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/conda/pyproject.toml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/custom-dockerfile/Dockerfile +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/custom-dockerfile/launch.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/custom-dockerfile/pyproject.toml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/job-array-sweep/launch.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/job-array-sweep/main.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/job-array-sweep/pyproject.toml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/job-array-sweep/uv.lock +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/job-dependencies/eval.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/job-dependencies/launch.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/job-dependencies/pyproject.toml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/job-dependencies/train.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/job-dependencies/uv.lock +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/job-group/Dockerfile +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/job-group/launch.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/job-group/pyproject.toml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/job-group/uv.lock +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/metadata/main.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/metadata/pyproject.toml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/metadata/requirements.txt +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/parameter-controller/launch.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/parameter-controller/main.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/parameter-controller/pyproject.toml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/parameter-controller/requirements.txt +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/pip/launch.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/pip/main.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/pip/pyproject.toml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/pip/requirements.txt +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/uv/launch.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/uv/pyproject.toml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/examples/uv/uv.lock +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/conftest.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/fixtures/slurm/Dockerfile +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/fixtures/slurm/README.md +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/fixtures/slurm/cgroup.conf +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/fixtures/slurm/docker-compose.yml +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/fixtures/slurm/docker-entrypoint.sh +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/fixtures/slurm/host_ed25519 +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/fixtures/slurm/host_ed25519.pub +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/fixtures/slurm/id_ed25519 +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/fixtures/slurm/id_ed25519.pub +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/fixtures/slurm/slurm.conf +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/fixtures/slurm/slurmdbd.conf +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/fixtures/slurm/sshd_config +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/integration/test_remote_execution.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/tests/test_dependencies.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/batching.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/console.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/constants.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/contrib/__init__.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/dependencies.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/executors.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/experimental/parameter_controller.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/job_blocks.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/packaging/__init__.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/packaging/registry.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/packaging/router.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/resources.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/scripts/_cloudpickle.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/status.py +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/templates/docker/docker-bake.hcl.j2 +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/templates/docker/mamba.Dockerfile +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/templates/docker/python.Dockerfile +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/templates/slurm/fragments/monitor.bash.j2 +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/templates/slurm/fragments/proxy.bash.j2 +0 -0
- {xmanager_slurm-0.4.4 → xmanager_slurm-0.4.6}/xm_slurm/types.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
*
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: xm-slurm
|
|
3
|
+
Version: 0.1+editable
|
|
4
|
+
Author-Email: Jesse Farebrother <jfarebro@cs.mcgill.ca>
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: xmanager>=0.4.0
|
|
8
|
+
Requires-Dist: asyncssh>=2.13.2
|
|
9
|
+
Requires-Dist: humanize>=4.8.0
|
|
10
|
+
Requires-Dist: jinja2>=3.1.2
|
|
11
|
+
Requires-Dist: toml>=0.10.2
|
|
12
|
+
Requires-Dist: rich>=13.5.2
|
|
13
|
+
Requires-Dist: immutabledict>=3.0.0
|
|
14
|
+
Requires-Dist: backoff>=2.2.1
|
|
15
|
+
Requires-Dist: sqlalchemy>=2.0
|
|
16
|
+
Requires-Dist: alembic>=1.13.1
|
|
17
|
+
Requires-Dist: aiosqlite>=0.20.0
|
|
18
|
+
Requires-Dist: pathspec>=0.11.2; extra == "gcp"
|
|
19
|
+
Requires-Dist: google-cloud-storage>=2.11.0; extra == "gcp"
|
|
20
|
+
Requires-Dist: google-cloud-build>=3.20.0; extra == "gcp"
|
|
21
|
+
Requires-Dist: google-cloud-logging>=3.8.0; extra == "gcp"
|
|
22
|
+
Requires-Dist: google-cloud-iam>=2.12.2; extra == "gcp"
|
|
23
|
+
Requires-Dist: google-cloud-kms>=2.19.2; extra == "gcp"
|
|
24
|
+
Requires-Dist: google-crc32c>=1.5.0; extra == "gcp"
|
|
25
|
+
Requires-Dist: pytest>=7.4.3; extra == "test"
|
|
26
|
+
Provides-Extra: gcp
|
|
27
|
+
Provides-Extra: test
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# Slurm XManager
|
|
31
|
+
|
|
32
|
+
This project adds support for Slurm to XManager. This is done through the use of Docker on the client and Singularity/Apptainer containers on the cluster.
|
|
33
|
+
This provides the following benefits:
|
|
34
|
+
|
|
35
|
+
1. All development can be done locally and launched on any Slurm cluster without any configuration.
|
|
36
|
+
2. Reproducible experiments (e.g., containerized runtime, code checkpointing, etc.)
|
|
37
|
+
3. Easy to configure distributed experiments, all configuration is in Python.
|
|
38
|
+
4. Launch experiments on any XManager launcher, e.g., GCP or Kubernetes
|
|
39
|
+
|
|
40
|
+
## Minimal Example
|
|
41
|
+
|
|
42
|
+
Currently the only out of the box container type is a [PDM](https://pdm.fming.dev/latest/) container.
|
|
43
|
+
To use the pdm container you can start a new project with `pdm init`. From there you'll need to implement a launch script. A launch script will be decomposed into three parts:
|
|
44
|
+
|
|
45
|
+
1. Specifying an executor specification, this will be where the exectuable will be stored.
|
|
46
|
+
2. Specifying an executable and packaging the executables.
|
|
47
|
+
3. Specifying a job and job requirements.
|
|
48
|
+
|
|
49
|
+
```py
|
|
50
|
+
import datetime
|
|
51
|
+
from pathlib import Path
|
|
52
|
+
|
|
53
|
+
from xmanager import xm
|
|
54
|
+
|
|
55
|
+
import xm_slurm
|
|
56
|
+
from xm_slurm.apptainer import packageables
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@xm.run_in_asyncio_loop
|
|
60
|
+
async def main():
|
|
61
|
+
title = FLAGS.title
|
|
62
|
+
|
|
63
|
+
async with xm_slurm.create_experiment(title) as experiment:
|
|
64
|
+
# Step 1: Specify the executor specification
|
|
65
|
+
executor_spec = xm_slurm.SlurmSpec(tag="ghcr.io/YOUR_GITHUB_USERNAME/YOUR_GITHUB_REPOSITORY/launch:latest")
|
|
66
|
+
|
|
67
|
+
# Step 2: Specify the executable and package it
|
|
68
|
+
[executable] = experiment.package(
|
|
69
|
+
[
|
|
70
|
+
packageables.pdm_container(
|
|
71
|
+
executor_spec=executor_spec,
|
|
72
|
+
entrypoint=xm.ModuleName("train"),
|
|
73
|
+
annotations={
|
|
74
|
+
"org.opencontainers.image.source": "https://github.com/YOUR_GITHUB_USERNAME/YOUR_GITHUB_REPOSITORY"
|
|
75
|
+
},
|
|
76
|
+
args={}, # SPECIFY COMMAND CLI ARGS FOR THE EXECUTABLE
|
|
77
|
+
env_vars={}, # SPECIFY COMMON ENV VARS FOR THE EXECUTABLE
|
|
78
|
+
),
|
|
79
|
+
]
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Step 3: Specify the executor and add the job
|
|
83
|
+
executor = xm_slurm.Slurm(
|
|
84
|
+
requirements=xm.JobRequirements(
|
|
85
|
+
resources={xm.ResourceType.A100: 1},
|
|
86
|
+
RAM=8 * xm.GiB,
|
|
87
|
+
CPU=4,
|
|
88
|
+
),
|
|
89
|
+
time=dt.timedelta(hours=24),
|
|
90
|
+
account="", # SLURM ACCOUNT
|
|
91
|
+
# SPECIFY OTHER SLURM ARGUMENTS
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
await experiment.add(
|
|
95
|
+
job=xm.Job(
|
|
96
|
+
executor=executor,
|
|
97
|
+
executable=executable,
|
|
98
|
+
args={}, # SPECIFY ARGS FOR THIS JOB
|
|
99
|
+
env_vars={}, # SPECIFY ENV VARS FOR THIS JOB
|
|
100
|
+
),
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if __name__ == "__main__":
|
|
104
|
+
main()
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Specifying sweeps
|
|
108
|
+
|
|
109
|
+
When calling `experiment.add` you can specify an additional keyword argument `args`. This argument can be a sequence of arguments that'll specify a sweep. For example, if you want to run a sweep over the learning rate you can specify the following:
|
|
110
|
+
|
|
111
|
+
```py
|
|
112
|
+
await experiment.add(
|
|
113
|
+
job=xm.Job(
|
|
114
|
+
executor=executor,
|
|
115
|
+
executable=executable,
|
|
116
|
+
),
|
|
117
|
+
args=[
|
|
118
|
+
{"learning_rate": 0.1},
|
|
119
|
+
{"learning_rate": 0.01},
|
|
120
|
+
{"learning_rate": 0.001},
|
|
121
|
+
],
|
|
122
|
+
)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
This will launch a job array with 3 jobs, each with a different learning rate.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/workspaces/xm-slurm
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.11
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: xmanager-slurm
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.6
|
|
4
4
|
Summary: Slurm backend for XManager.
|
|
5
5
|
Project-URL: GitHub, https://github.com/jessefarebro/xm-slurm
|
|
6
6
|
Author-email: Jesse Farebrother <jfarebro@cs.mcgill.ca>
|
|
7
7
|
License: MIT
|
|
8
|
+
License-File: LICENSE.md
|
|
8
9
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
9
10
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
11
|
Classifier: Operating System :: OS Independent
|
|
@@ -13,13 +14,16 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
16
|
Requires-Python: >=3.10
|
|
16
|
-
Requires-Dist:
|
|
17
|
+
Requires-Dist: aiofile>=3.9.0
|
|
18
|
+
Requires-Dist: asyncssh>=2.19.0
|
|
17
19
|
Requires-Dist: backoff>=2.2.1
|
|
18
20
|
Requires-Dist: cloudpickle>=3.0.0
|
|
21
|
+
Requires-Dist: httpx>=0.28.1
|
|
19
22
|
Requires-Dist: humanize>=4.8.0
|
|
20
23
|
Requires-Dist: immutabledict>=3.0.0
|
|
21
24
|
Requires-Dist: jinja2>=3.1.2
|
|
22
25
|
Requires-Dist: more-itertools>=10.2.0
|
|
23
26
|
Requires-Dist: rich>=13.5.2
|
|
24
27
|
Requires-Dist: toml>=0.10.2
|
|
28
|
+
Requires-Dist: wrapt>=1.16.0
|
|
25
29
|
Requires-Dist: xmanager>=0.5.0
|
|
@@ -25,9 +25,18 @@ def f(x):
|
|
|
25
25
|
```
|
|
26
26
|
"""
|
|
27
27
|
experiment.context.annotations.title = "Cool Experiment #2"
|
|
28
|
-
experiment.context.artifacts.
|
|
29
|
-
|
|
30
|
-
|
|
28
|
+
experiment.context.artifacts["wandb"] = "https://wandb.ai/..."
|
|
29
|
+
experiment.context.python_config = """
|
|
30
|
+
seed = 0
|
|
31
|
+
"""
|
|
32
|
+
experiment.context.graphviz_config = """
|
|
33
|
+
digraph G {
|
|
34
|
+
A -> B;
|
|
35
|
+
A -> C;
|
|
36
|
+
B -> D;
|
|
37
|
+
C -> D;
|
|
38
|
+
}
|
|
39
|
+
"""
|
|
31
40
|
|
|
32
41
|
# Step 1: Specify the executor specification
|
|
33
42
|
executor_spec = xm_slurm.Slurm.Spec(tag="ghcr.io/jessefarebro/xm-slurm/test:latest")
|
|
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "xmanager-slurm"
|
|
7
7
|
description = "Slurm backend for XManager."
|
|
8
|
-
version = "0.4.
|
|
8
|
+
version = "0.4.6"
|
|
9
9
|
# readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
11
11
|
license = { text = "MIT" }
|
|
@@ -22,7 +22,7 @@ classifiers = [
|
|
|
22
22
|
|
|
23
23
|
dependencies = [
|
|
24
24
|
"xmanager>=0.5.0",
|
|
25
|
-
"asyncssh>=2.
|
|
25
|
+
"asyncssh>=2.19.0",
|
|
26
26
|
"humanize>=4.8.0",
|
|
27
27
|
"jinja2>=3.1.2",
|
|
28
28
|
"toml>=0.10.2",
|
|
@@ -31,6 +31,9 @@ dependencies = [
|
|
|
31
31
|
"backoff>=2.2.1",
|
|
32
32
|
"more-itertools>=10.2.0",
|
|
33
33
|
"cloudpickle>=3.0.0",
|
|
34
|
+
"aiofile>=3.9.0",
|
|
35
|
+
"wrapt>=1.16.0",
|
|
36
|
+
"httpx>=0.28.1",
|
|
34
37
|
]
|
|
35
38
|
|
|
36
39
|
[project.urls]
|
|
@@ -57,6 +60,8 @@ dev-dependencies = [
|
|
|
57
60
|
[tool.ruff]
|
|
58
61
|
indent-width = 4
|
|
59
62
|
line-length = 100
|
|
63
|
+
|
|
64
|
+
[tool.ruff.lint]
|
|
60
65
|
# Don't fix unused imports
|
|
61
66
|
unfixable = ["F401"]
|
|
62
67
|
# Ignore unused variables
|
|
@@ -15,6 +15,18 @@ wheels = [
|
|
|
15
15
|
{ url = "https://files.pythonhosted.org/packages/a2/ad/e0d3c824784ff121c03cc031f944bc7e139a8f1870ffd2845cc2dd76f6c4/absl_py-2.1.0-py3-none-any.whl", hash = "sha256:526a04eadab8b4ee719ce68f204172ead1027549089702d99b9059f129ff1308", size = 133706 },
|
|
16
16
|
]
|
|
17
17
|
|
|
18
|
+
[[package]]
|
|
19
|
+
name = "aiofile"
|
|
20
|
+
version = "3.9.0"
|
|
21
|
+
source = { registry = "https://pypi.org/simple" }
|
|
22
|
+
dependencies = [
|
|
23
|
+
{ name = "caio" },
|
|
24
|
+
]
|
|
25
|
+
sdist = { url = "https://files.pythonhosted.org/packages/67/e2/d7cb819de8df6b5c1968a2756c3cb4122d4fa2b8fc768b53b7c9e5edb646/aiofile-3.9.0.tar.gz", hash = "sha256:e5ad718bb148b265b6df1b3752c4d1d83024b93da9bd599df74b9d9ffcf7919b", size = 17943 }
|
|
26
|
+
wheels = [
|
|
27
|
+
{ url = "https://files.pythonhosted.org/packages/50/25/da1f0b4dd970e52bf5a36c204c107e11a0c6d3ed195eba0bfbc664c312b2/aiofile-3.9.0-py3-none-any.whl", hash = "sha256:ce2f6c1571538cbdfa0143b04e16b208ecb0e9cb4148e528af8a640ed51cc8aa", size = 19539 },
|
|
28
|
+
]
|
|
29
|
+
|
|
18
30
|
[[package]]
|
|
19
31
|
name = "aiofiles"
|
|
20
32
|
version = "24.1.0"
|
|
@@ -157,17 +169,16 @@ wheels = [
|
|
|
157
169
|
|
|
158
170
|
[[package]]
|
|
159
171
|
name = "anyio"
|
|
160
|
-
version = "
|
|
172
|
+
version = "3.7.1"
|
|
161
173
|
source = { registry = "https://pypi.org/simple" }
|
|
162
174
|
dependencies = [
|
|
163
175
|
{ name = "exceptiongroup", marker = "python_full_version < '3.11'" },
|
|
164
176
|
{ name = "idna" },
|
|
165
177
|
{ name = "sniffio" },
|
|
166
|
-
{ name = "typing-extensions", marker = "python_full_version < '3.11'" },
|
|
167
178
|
]
|
|
168
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
179
|
+
sdist = { url = "https://files.pythonhosted.org/packages/28/99/2dfd53fd55ce9838e6ff2d4dac20ce58263798bd1a0dbe18b3a9af3fcfce/anyio-3.7.1.tar.gz", hash = "sha256:44a3c9aba0f5defa43261a8b3efb97891f2bd7d804e0e1f56419befa1adfc780", size = 142927 }
|
|
169
180
|
wheels = [
|
|
170
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
181
|
+
{ url = "https://files.pythonhosted.org/packages/19/24/44299477fe7dcc9cb58d0a57d5a7588d6af2ff403fdd2d47a246c91a3246/anyio-3.7.1-py3-none-any.whl", hash = "sha256:91dee416e570e92c64041bd18b900d1d6fa78dff7048769ce5ac5ddad004fbb5", size = 80896 },
|
|
171
182
|
]
|
|
172
183
|
|
|
173
184
|
[[package]]
|
|
@@ -190,15 +201,15 @@ wheels = [
|
|
|
190
201
|
|
|
191
202
|
[[package]]
|
|
192
203
|
name = "asyncssh"
|
|
193
|
-
version = "2.
|
|
204
|
+
version = "2.19.0"
|
|
194
205
|
source = { registry = "https://pypi.org/simple" }
|
|
195
206
|
dependencies = [
|
|
196
207
|
{ name = "cryptography" },
|
|
197
208
|
{ name = "typing-extensions" },
|
|
198
209
|
]
|
|
199
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
210
|
+
sdist = { url = "https://files.pythonhosted.org/packages/75/85/c723aa7dd69570a31b6638c1405e659712f18f569f280da2da27989445d3/asyncssh-2.19.0.tar.gz", hash = "sha256:723dead4d068b558708dc66a4ca7e7a93a813aa9416036eccb9af4c03ae2cf30", size = 533702 }
|
|
200
211
|
wheels = [
|
|
201
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
212
|
+
{ url = "https://files.pythonhosted.org/packages/95/8f/f0749af566fa39204f6380473e6f9632b804daeb0ecb24cc7de1fc9f2717/asyncssh-2.19.0-py3-none-any.whl", hash = "sha256:bb82ac30ff0cb4393fbaf1114e606ad7a4f13d6c4bdaed423c033ee26b455228", size = 372704 },
|
|
202
213
|
]
|
|
203
214
|
|
|
204
215
|
[[package]]
|
|
@@ -249,6 +260,21 @@ wheels = [
|
|
|
249
260
|
{ url = "https://files.pythonhosted.org/packages/a4/07/14f8ad37f2d12a5ce41206c21820d8cb6561b728e51fad4530dff0552a67/cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292", size = 9524 },
|
|
250
261
|
]
|
|
251
262
|
|
|
263
|
+
[[package]]
|
|
264
|
+
name = "caio"
|
|
265
|
+
version = "0.9.17"
|
|
266
|
+
source = { registry = "https://pypi.org/simple" }
|
|
267
|
+
sdist = { url = "https://files.pythonhosted.org/packages/88/cf/59b868909a85ad9eca985ad5bbeb3c0a8cd435e50ae12def770d16911753/caio-0.9.17.tar.gz", hash = "sha256:8f30511526814d961aeef389ea6885273abe6c655f1e08abbadb95d12fdd9b4f", size = 25001 }
|
|
268
|
+
wheels = [
|
|
269
|
+
{ url = "https://files.pythonhosted.org/packages/db/c3/17bc41b7c795d91d58ee7a70ad98e23f1ba0d50bdeadd82173bb02cddc8e/caio-0.9.17-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3f69395fdd45c115b2ef59732e3c8664722a2b51de2d6eedb3d354b2f5f3be3c", size = 37849 },
|
|
270
|
+
{ url = "https://files.pythonhosted.org/packages/bf/3f/0ae9f69deb3dc96b20bf084cc262b438a154d5de08a064628272713ff239/caio-0.9.17-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3028b746e9ec7f6d6ebb386a7fd8caf0eebed5d6e6b4f18c8ef25861934b1673", size = 79417 },
|
|
271
|
+
{ url = "https://files.pythonhosted.org/packages/52/44/a79c7004a9562a176d78437816c736c34ab9fb6233a4b8164eb25628a09c/caio-0.9.17-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:079730a353bbde03796fab681e969472eace09ffbe5000e584868a7fe389ba6f", size = 37842 },
|
|
272
|
+
{ url = "https://files.pythonhosted.org/packages/57/e2/1d04e506a5fd735856f0bb95b4d03b800947bd43c98193ee57d37070a51e/caio-0.9.17-cp311-cp311-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:549caa51b475877fe32856a26fe937366ae7a1c23a9727005b441db9abb12bcc", size = 79985 },
|
|
273
|
+
{ url = "https://files.pythonhosted.org/packages/c3/b8/37dcee4bc4fae1701a86373a297bbca797f6b7bfe5f85993a11049649c63/caio-0.9.17-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0ddb253b145a53ecca76381677ce465bc5efeaecb6aaf493fac43ae79659f0fb", size = 37909 },
|
|
274
|
+
{ url = "https://files.pythonhosted.org/packages/80/f5/5e993120daeb4ec084f5f84c118bbd48b65379f63ed56919bf224e0eab42/caio-0.9.17-cp312-cp312-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3e320b0ea371c810359934f8e8fe81777c493cc5fb4d41de44277cbe7336e74", size = 81709 },
|
|
275
|
+
{ url = "https://files.pythonhosted.org/packages/58/72/3f4895adb1d23b0ac1d8afc748405a2ad3c77d8d0f23b05a64ff583c11e5/caio-0.9.17-py3-none-any.whl", hash = "sha256:c55d4dc6b3a36f93237ecd6360e1c131c3808bc47d4191a130148a99b80bb311", size = 19062 },
|
|
276
|
+
]
|
|
277
|
+
|
|
252
278
|
[[package]]
|
|
253
279
|
name = "certifi"
|
|
254
280
|
version = "2024.8.30"
|
|
@@ -374,7 +400,7 @@ name = "click"
|
|
|
374
400
|
version = "8.1.7"
|
|
375
401
|
source = { registry = "https://pypi.org/simple" }
|
|
376
402
|
dependencies = [
|
|
377
|
-
{ name = "colorama", marker = "
|
|
403
|
+
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
|
378
404
|
]
|
|
379
405
|
sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 }
|
|
380
406
|
wheels = [
|
|
@@ -926,6 +952,19 @@ wheels = [
|
|
|
926
952
|
{ url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 },
|
|
927
953
|
]
|
|
928
954
|
|
|
955
|
+
[[package]]
|
|
956
|
+
name = "httpcore"
|
|
957
|
+
version = "1.0.7"
|
|
958
|
+
source = { registry = "https://pypi.org/simple" }
|
|
959
|
+
dependencies = [
|
|
960
|
+
{ name = "certifi" },
|
|
961
|
+
{ name = "h11" },
|
|
962
|
+
]
|
|
963
|
+
sdist = { url = "https://files.pythonhosted.org/packages/6a/41/d7d0a89eb493922c37d343b607bc1b5da7f5be7e383740b4753ad8943e90/httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c", size = 85196 }
|
|
964
|
+
wheels = [
|
|
965
|
+
{ url = "https://files.pythonhosted.org/packages/87/f5/72347bc88306acb359581ac4d52f23c0ef445b57157adedb9aee0cd689d2/httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd", size = 78551 },
|
|
966
|
+
]
|
|
967
|
+
|
|
929
968
|
[[package]]
|
|
930
969
|
name = "httplib2"
|
|
931
970
|
version = "0.22.0"
|
|
@@ -938,6 +977,21 @@ wheels = [
|
|
|
938
977
|
{ url = "https://files.pythonhosted.org/packages/a8/6c/d2fbdaaa5959339d53ba38e94c123e4e84b8fbc4b84beb0e70d7c1608486/httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc", size = 96854 },
|
|
939
978
|
]
|
|
940
979
|
|
|
980
|
+
[[package]]
|
|
981
|
+
name = "httpx"
|
|
982
|
+
version = "0.28.1"
|
|
983
|
+
source = { registry = "https://pypi.org/simple" }
|
|
984
|
+
dependencies = [
|
|
985
|
+
{ name = "anyio" },
|
|
986
|
+
{ name = "certifi" },
|
|
987
|
+
{ name = "httpcore" },
|
|
988
|
+
{ name = "idna" },
|
|
989
|
+
]
|
|
990
|
+
sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 }
|
|
991
|
+
wheels = [
|
|
992
|
+
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 },
|
|
993
|
+
]
|
|
994
|
+
|
|
941
995
|
[[package]]
|
|
942
996
|
name = "humanize"
|
|
943
997
|
version = "4.10.0"
|
|
@@ -2196,18 +2250,21 @@ wheels = [
|
|
|
2196
2250
|
|
|
2197
2251
|
[[package]]
|
|
2198
2252
|
name = "xmanager-slurm"
|
|
2199
|
-
version = "0.4.
|
|
2253
|
+
version = "0.4.6"
|
|
2200
2254
|
source = { editable = "." }
|
|
2201
2255
|
dependencies = [
|
|
2256
|
+
{ name = "aiofile" },
|
|
2202
2257
|
{ name = "asyncssh" },
|
|
2203
2258
|
{ name = "backoff" },
|
|
2204
2259
|
{ name = "cloudpickle" },
|
|
2260
|
+
{ name = "httpx" },
|
|
2205
2261
|
{ name = "humanize" },
|
|
2206
2262
|
{ name = "immutabledict" },
|
|
2207
2263
|
{ name = "jinja2" },
|
|
2208
2264
|
{ name = "more-itertools" },
|
|
2209
2265
|
{ name = "rich" },
|
|
2210
2266
|
{ name = "toml" },
|
|
2267
|
+
{ name = "wrapt" },
|
|
2211
2268
|
{ name = "xmanager" },
|
|
2212
2269
|
]
|
|
2213
2270
|
|
|
@@ -2228,15 +2285,18 @@ dev = [
|
|
|
2228
2285
|
|
|
2229
2286
|
[package.metadata]
|
|
2230
2287
|
requires-dist = [
|
|
2231
|
-
{ name = "
|
|
2288
|
+
{ name = "aiofile", specifier = ">=3.9.0" },
|
|
2289
|
+
{ name = "asyncssh", specifier = ">=2.19.0" },
|
|
2232
2290
|
{ name = "backoff", specifier = ">=2.2.1" },
|
|
2233
2291
|
{ name = "cloudpickle", specifier = ">=3.0.0" },
|
|
2292
|
+
{ name = "httpx", specifier = ">=0.28.1" },
|
|
2234
2293
|
{ name = "humanize", specifier = ">=4.8.0" },
|
|
2235
2294
|
{ name = "immutabledict", specifier = ">=3.0.0" },
|
|
2236
2295
|
{ name = "jinja2", specifier = ">=3.1.2" },
|
|
2237
2296
|
{ name = "more-itertools", specifier = ">=10.2.0" },
|
|
2238
2297
|
{ name = "rich", specifier = ">=13.5.2" },
|
|
2239
2298
|
{ name = "toml", specifier = ">=0.10.2" },
|
|
2299
|
+
{ name = "wrapt", specifier = ">=1.16.0" },
|
|
2240
2300
|
{ name = "xmanager", specifier = ">=0.5.0" },
|
|
2241
2301
|
]
|
|
2242
2302
|
|
|
@@ -3,7 +3,6 @@ import logging
|
|
|
3
3
|
from xm_slurm.executables import Dockerfile, DockerImage
|
|
4
4
|
from xm_slurm.executors import Slurm, SlurmSpec
|
|
5
5
|
from xm_slurm.experiment import (
|
|
6
|
-
Artifact,
|
|
7
6
|
SlurmExperiment,
|
|
8
7
|
create_experiment,
|
|
9
8
|
get_current_experiment,
|
|
@@ -25,7 +24,6 @@ logging.getLogger("asyncssh").setLevel(logging.WARN)
|
|
|
25
24
|
logging.getLogger("httpx").setLevel(logging.WARN)
|
|
26
25
|
|
|
27
26
|
__all__ = [
|
|
28
|
-
"Artifact",
|
|
29
27
|
"conda_container",
|
|
30
28
|
"create_experiment",
|
|
31
29
|
"docker_container",
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from xm_slurm.api import models
|
|
6
|
+
from xm_slurm.api.abc import XManagerAPI
|
|
7
|
+
from xm_slurm.api.sqlite import client as sqlite_client
|
|
8
|
+
from xm_slurm.api.web import client as web_client
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@functools.cache
|
|
14
|
+
def client() -> XManagerAPI:
|
|
15
|
+
backend = os.environ.get("XM_SLURM_API_BACKEND", "sqlite").lower()
|
|
16
|
+
match backend:
|
|
17
|
+
case "rest":
|
|
18
|
+
if "XM_SLURM_REST_API_BASE_URL" not in os.environ:
|
|
19
|
+
raise ValueError("XM_SLURM_REST_API_BASE_URL is not set")
|
|
20
|
+
if "XM_SLURM_REST_API_TOKEN" not in os.environ:
|
|
21
|
+
raise ValueError("XM_SLURM_REST_API_TOKEN is not set")
|
|
22
|
+
|
|
23
|
+
return web_client.XManagerWebAPI(
|
|
24
|
+
base_url=os.environ["XM_SLURM_REST_API_BASE_URL"],
|
|
25
|
+
token=os.environ["XM_SLURM_REST_API_TOKEN"],
|
|
26
|
+
)
|
|
27
|
+
case "sqlite":
|
|
28
|
+
return sqlite_client.XManagerSqliteAPI()
|
|
29
|
+
case _:
|
|
30
|
+
raise ValueError(f"Invalid XM_SLURM_API_BACKEND: {backend}")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
__all__ = ["client", "XManagerAPI", "models"]
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
|
|
3
|
+
from xm_slurm.api import models
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class XManagerAPI(abc.ABC):
|
|
7
|
+
@abc.abstractmethod
|
|
8
|
+
def get_experiment(self, xid: int) -> models.Experiment:
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
@abc.abstractmethod
|
|
12
|
+
def delete_experiment(self, experiment_id: int) -> None:
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
@abc.abstractmethod
|
|
16
|
+
def insert_experiment(self, experiment: models.ExperimentPatch) -> int:
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
@abc.abstractmethod
|
|
20
|
+
def update_experiment(
|
|
21
|
+
self, experiment_id: int, experiment_patch: models.ExperimentPatch
|
|
22
|
+
) -> None:
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
@abc.abstractmethod
|
|
26
|
+
def insert_job(self, experiment_id: int, work_unit_id: int, job: models.SlurmJob) -> None:
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
@abc.abstractmethod
|
|
30
|
+
def insert_work_unit(self, experiment_id: int, work_unit: models.WorkUnitPatch) -> None:
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
@abc.abstractmethod
|
|
34
|
+
def update_work_unit(
|
|
35
|
+
self, experiment_id: int, work_unit_id: int, patch: models.ExperimentUnitPatch
|
|
36
|
+
) -> None:
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
@abc.abstractmethod
|
|
40
|
+
def delete_work_unit_artifact(self, experiment_id: int, work_unit_id: int, name: str) -> None:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
@abc.abstractmethod
|
|
44
|
+
def insert_work_unit_artifact(
|
|
45
|
+
self, experiment_id: int, work_unit_id: int, artifact: models.Artifact
|
|
46
|
+
) -> None:
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
@abc.abstractmethod
|
|
50
|
+
def delete_experiment_artifact(self, experiment_id: int, name: str) -> None:
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
@abc.abstractmethod
|
|
54
|
+
def insert_experiment_artifact(self, experiment_id: int, artifact: models.Artifact) -> None:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
@abc.abstractmethod
|
|
58
|
+
def insert_experiment_config_artifact(
|
|
59
|
+
self, experiment_id: int, artifact: models.ConfigArtifact
|
|
60
|
+
) -> None:
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
@abc.abstractmethod
|
|
64
|
+
def delete_experiment_config_artifact(self, experiment_id: int, name: str) -> None:
|
|
65
|
+
pass
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import enum
|
|
3
|
+
import typing as tp
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ExperimentUnitRole(enum.Enum):
|
|
7
|
+
WORK_UNIT = enum.auto()
|
|
8
|
+
AUX_UNIT = enum.auto()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
12
|
+
class ExperimentPatch:
|
|
13
|
+
title: str | None = None
|
|
14
|
+
description: str | None = None
|
|
15
|
+
note: str | None = None
|
|
16
|
+
tags: list[str] | None = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
20
|
+
class SlurmJob:
|
|
21
|
+
name: str
|
|
22
|
+
slurm_job_id: str
|
|
23
|
+
slurm_ssh_config: str
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
27
|
+
class Artifact:
|
|
28
|
+
name: str
|
|
29
|
+
uri: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
33
|
+
class ConfigArtifact:
|
|
34
|
+
name: tp.Literal["GRAPHVIZ", "PYTHON"]
|
|
35
|
+
uri: str
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
39
|
+
class ExperimentUnit:
|
|
40
|
+
identity: str
|
|
41
|
+
args: str | None = None
|
|
42
|
+
jobs: list[SlurmJob] = dataclasses.field(default_factory=list)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
46
|
+
class ExperimentUnitPatch:
|
|
47
|
+
identity: str | None = None
|
|
48
|
+
args: str | None = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
52
|
+
class WorkUnit(ExperimentUnit):
|
|
53
|
+
wid: int
|
|
54
|
+
artifacts: list[Artifact] = dataclasses.field(default_factory=list)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
58
|
+
class WorkUnitPatch(ExperimentUnitPatch):
|
|
59
|
+
wid: int
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
63
|
+
class Experiment:
|
|
64
|
+
title: str
|
|
65
|
+
description: str | None
|
|
66
|
+
note: str | None
|
|
67
|
+
tags: list[str] | None
|
|
68
|
+
|
|
69
|
+
work_units: list[WorkUnit] = dataclasses.field(default_factory=list)
|
|
70
|
+
artifacts: list[Artifact] = dataclasses.field(default_factory=list)
|