trainml 0.5.6__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/integration/test_jobs_integration.py +13 -0
- tests/unit/cli/cloudbender/test_cli_service_unit.py +34 -0
- tests/unit/cloudbender/test_data_connectors_unit.py +176 -0
- tests/unit/cloudbender/test_services_unit.py +6 -0
- tests/unit/test_projects_unit.py +45 -5
- trainml/__init__.py +1 -1
- trainml/checkpoints.py +25 -25
- trainml/cli/cloudbender/__init__.py +1 -0
- trainml/cli/cloudbender/data_connector.py +159 -0
- trainml/cli/cloudbender/service.py +19 -2
- trainml/cloudbender/cloudbender.py +2 -0
- trainml/cloudbender/data_connectors.py +112 -0
- trainml/cloudbender/services.py +65 -1
- trainml/datasets.py +19 -8
- trainml/jobs.py +13 -6
- trainml/models.py +22 -19
- trainml/projects.py +60 -8
- trainml/volumes.py +9 -2
- {trainml-0.5.6.dist-info → trainml-0.5.7.dist-info}/METADATA +1 -1
- {trainml-0.5.6.dist-info → trainml-0.5.7.dist-info}/RECORD +24 -20
- {trainml-0.5.6.dist-info → trainml-0.5.7.dist-info}/LICENSE +0 -0
- {trainml-0.5.6.dist-info → trainml-0.5.7.dist-info}/WHEEL +0 -0
- {trainml-0.5.6.dist-info → trainml-0.5.7.dist-info}/entry_points.txt +0 -0
- {trainml-0.5.6.dist-info → trainml-0.5.7.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@ from .regions import Regions
|
|
|
3
3
|
from .nodes import Nodes
|
|
4
4
|
from .devices import Devices
|
|
5
5
|
from .datastores import Datastores
|
|
6
|
+
from .data_connectors import DataConnectors
|
|
6
7
|
from .services import Services
|
|
7
8
|
from .device_configs import DeviceConfigs
|
|
8
9
|
|
|
@@ -15,5 +16,6 @@ class Cloudbender(object):
|
|
|
15
16
|
self.nodes = Nodes(trainml)
|
|
16
17
|
self.devices = Devices(trainml)
|
|
17
18
|
self.datastores = Datastores(trainml)
|
|
19
|
+
self.data_connectors = DataConnectors(trainml)
|
|
18
20
|
self.services = Services(trainml)
|
|
19
21
|
self.device_configs = DeviceConfigs(trainml)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DataConnectors(object):
|
|
6
|
+
def __init__(self, trainml):
|
|
7
|
+
self.trainml = trainml
|
|
8
|
+
|
|
9
|
+
async def get(self, provider_uuid, region_uuid, id, **kwargs):
|
|
10
|
+
resp = await self.trainml._query(
|
|
11
|
+
f"/provider/{provider_uuid}/region/{region_uuid}/data_connector/{id}",
|
|
12
|
+
"GET",
|
|
13
|
+
kwargs,
|
|
14
|
+
)
|
|
15
|
+
return DataConnector(self.trainml, **resp)
|
|
16
|
+
|
|
17
|
+
async def list(self, provider_uuid, region_uuid, **kwargs):
|
|
18
|
+
resp = await self.trainml._query(
|
|
19
|
+
f"/provider/{provider_uuid}/region/{region_uuid}/data_connector",
|
|
20
|
+
"GET",
|
|
21
|
+
kwargs,
|
|
22
|
+
)
|
|
23
|
+
data_connectors = [
|
|
24
|
+
DataConnector(self.trainml, **data_connector) for data_connector in resp
|
|
25
|
+
]
|
|
26
|
+
return data_connectors
|
|
27
|
+
|
|
28
|
+
async def create(
|
|
29
|
+
self,
|
|
30
|
+
provider_uuid,
|
|
31
|
+
region_uuid,
|
|
32
|
+
name,
|
|
33
|
+
type,
|
|
34
|
+
**kwargs,
|
|
35
|
+
):
|
|
36
|
+
logging.info(f"Creating Data Connector {name}")
|
|
37
|
+
data = dict(
|
|
38
|
+
name=name,
|
|
39
|
+
type=type,
|
|
40
|
+
**kwargs,
|
|
41
|
+
)
|
|
42
|
+
payload = {k: v for k, v in data.items() if v is not None}
|
|
43
|
+
resp = await self.trainml._query(
|
|
44
|
+
f"/provider/{provider_uuid}/region/{region_uuid}/data_connector",
|
|
45
|
+
"POST",
|
|
46
|
+
None,
|
|
47
|
+
payload,
|
|
48
|
+
)
|
|
49
|
+
data_connector = DataConnector(self.trainml, **resp)
|
|
50
|
+
logging.info(f"Created Data Connector {name} with id {data_connector.id}")
|
|
51
|
+
return data_connector
|
|
52
|
+
|
|
53
|
+
async def remove(self, provider_uuid, region_uuid, id, **kwargs):
|
|
54
|
+
await self.trainml._query(
|
|
55
|
+
f"/provider/{provider_uuid}/region/{region_uuid}/data_connector/{id}",
|
|
56
|
+
"DELETE",
|
|
57
|
+
kwargs,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class DataConnector:
|
|
62
|
+
def __init__(self, trainml, **kwargs):
|
|
63
|
+
self.trainml = trainml
|
|
64
|
+
self._data_connector = kwargs
|
|
65
|
+
self._id = self._data_connector.get("connector_id")
|
|
66
|
+
self._provider_uuid = self._data_connector.get("provider_uuid")
|
|
67
|
+
self._region_uuid = self._data_connector.get("region_uuid")
|
|
68
|
+
self._type = self._data_connector.get("type")
|
|
69
|
+
self._name = self._data_connector.get("name")
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def id(self) -> str:
|
|
73
|
+
return self._id
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def provider_uuid(self) -> str:
|
|
77
|
+
return self._provider_uuid
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def region_uuid(self) -> str:
|
|
81
|
+
return self._region_uuid
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def type(self) -> str:
|
|
85
|
+
return self._type
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def name(self) -> str:
|
|
89
|
+
return self._name
|
|
90
|
+
|
|
91
|
+
def __str__(self):
|
|
92
|
+
return json.dumps({k: v for k, v in self._data_connector.items()})
|
|
93
|
+
|
|
94
|
+
def __repr__(self):
|
|
95
|
+
return f"DataConnector( trainml , **{self._data_connector.__repr__()})"
|
|
96
|
+
|
|
97
|
+
def __bool__(self):
|
|
98
|
+
return bool(self._id)
|
|
99
|
+
|
|
100
|
+
async def remove(self):
|
|
101
|
+
await self.trainml._query(
|
|
102
|
+
f"/provider/{self._provider_uuid}/region/{self._region_uuid}/data_connector/{self._id}",
|
|
103
|
+
"DELETE",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
async def refresh(self):
|
|
107
|
+
resp = await self.trainml._query(
|
|
108
|
+
f"/provider/{self._provider_uuid}/region/{self._region_uuid}/data_connector/{self._id}",
|
|
109
|
+
"GET",
|
|
110
|
+
)
|
|
111
|
+
self.__init__(self.trainml, **resp)
|
|
112
|
+
return self
|
trainml/cloudbender/services.py
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import asyncio
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
from trainml.exceptions import (
|
|
7
|
+
ApiError,
|
|
8
|
+
SpecificationError,
|
|
9
|
+
TrainMLException,
|
|
10
|
+
)
|
|
3
11
|
|
|
4
12
|
|
|
5
13
|
class Services(object):
|
|
@@ -28,12 +36,14 @@ class Services(object):
|
|
|
28
36
|
provider_uuid,
|
|
29
37
|
region_uuid,
|
|
30
38
|
name,
|
|
39
|
+
type,
|
|
31
40
|
public,
|
|
32
41
|
**kwargs,
|
|
33
42
|
):
|
|
34
43
|
logging.info(f"Creating Service {name}")
|
|
35
44
|
data = dict(
|
|
36
45
|
name=name,
|
|
46
|
+
type=type,
|
|
37
47
|
public=public,
|
|
38
48
|
**kwargs,
|
|
39
49
|
)
|
|
@@ -65,7 +75,12 @@ class Service:
|
|
|
65
75
|
self._region_uuid = self._service.get("region_uuid")
|
|
66
76
|
self._public = self._service.get("public")
|
|
67
77
|
self._name = self._service.get("name")
|
|
68
|
-
self.
|
|
78
|
+
self._type = self._service.get("type")
|
|
79
|
+
self._hostname = self._service.get("custom_hostname") or self._service.get(
|
|
80
|
+
"hostname"
|
|
81
|
+
)
|
|
82
|
+
self._status = self._service.get("status")
|
|
83
|
+
self._port = self._service.get("port")
|
|
69
84
|
|
|
70
85
|
@property
|
|
71
86
|
def id(self) -> str:
|
|
@@ -91,6 +106,18 @@ class Service:
|
|
|
91
106
|
def hostname(self) -> str:
|
|
92
107
|
return self._hostname
|
|
93
108
|
|
|
109
|
+
@property
|
|
110
|
+
def status(self) -> str:
|
|
111
|
+
return self._status
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def type(self) -> str:
|
|
115
|
+
return self._type
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def port(self) -> str:
|
|
119
|
+
return self._port
|
|
120
|
+
|
|
94
121
|
def __str__(self):
|
|
95
122
|
return json.dumps({k: v for k, v in self._service.items()})
|
|
96
123
|
|
|
@@ -113,3 +140,40 @@ class Service:
|
|
|
113
140
|
)
|
|
114
141
|
self.__init__(self.trainml, **resp)
|
|
115
142
|
return self
|
|
143
|
+
|
|
144
|
+
async def wait_for(self, status, timeout=300):
|
|
145
|
+
if self.status == status:
|
|
146
|
+
return
|
|
147
|
+
valid_statuses = ["active", "archived"]
|
|
148
|
+
if not status in valid_statuses:
|
|
149
|
+
raise SpecificationError(
|
|
150
|
+
"status",
|
|
151
|
+
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
152
|
+
)
|
|
153
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
154
|
+
if timeout > MAX_TIMEOUT:
|
|
155
|
+
raise SpecificationError(
|
|
156
|
+
"timeout",
|
|
157
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
POLL_INTERVAL_MIN = 5
|
|
161
|
+
POLL_INTERVAL_MAX = 60
|
|
162
|
+
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
|
163
|
+
retry_count = math.ceil(timeout / POLL_INTERVAL)
|
|
164
|
+
count = 0
|
|
165
|
+
while count < retry_count:
|
|
166
|
+
await asyncio.sleep(POLL_INTERVAL)
|
|
167
|
+
try:
|
|
168
|
+
await self.refresh()
|
|
169
|
+
except ApiError as e:
|
|
170
|
+
if status == "archived" and e.status == 404:
|
|
171
|
+
return
|
|
172
|
+
raise e
|
|
173
|
+
if self.status == status:
|
|
174
|
+
return self
|
|
175
|
+
else:
|
|
176
|
+
count += 1
|
|
177
|
+
logging.debug(f"self: {self}, retry count {count}")
|
|
178
|
+
|
|
179
|
+
raise TrainMLException(f"Timeout waiting for {status}")
|
trainml/datasets.py
CHANGED
|
@@ -119,12 +119,16 @@ class Dataset:
|
|
|
119
119
|
project_uuid=self._dataset.get("project_uuid"),
|
|
120
120
|
cidr=self._dataset.get("vpn").get("cidr"),
|
|
121
121
|
ssh_port=self._dataset.get("vpn").get("client").get("ssh_port"),
|
|
122
|
-
input_path=
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
122
|
+
input_path=(
|
|
123
|
+
self._dataset.get("source_uri")
|
|
124
|
+
if self.status in ["new", "downloading"]
|
|
125
|
+
else None
|
|
126
|
+
),
|
|
127
|
+
output_path=(
|
|
128
|
+
self._dataset.get("output_uri")
|
|
129
|
+
if self.status == "exporting"
|
|
130
|
+
else None
|
|
131
|
+
),
|
|
128
132
|
)
|
|
129
133
|
else:
|
|
130
134
|
details = dict()
|
|
@@ -215,14 +219,21 @@ class Dataset:
|
|
|
215
219
|
return self
|
|
216
220
|
|
|
217
221
|
async def wait_for(self, status, timeout=300):
|
|
222
|
+
if self.status == status:
|
|
223
|
+
return
|
|
218
224
|
valid_statuses = ["downloading", "ready", "archived"]
|
|
219
225
|
if not status in valid_statuses:
|
|
220
226
|
raise SpecificationError(
|
|
221
227
|
"status",
|
|
222
228
|
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
223
229
|
)
|
|
224
|
-
|
|
225
|
-
|
|
230
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
231
|
+
if timeout > MAX_TIMEOUT:
|
|
232
|
+
raise SpecificationError(
|
|
233
|
+
"timeout",
|
|
234
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
235
|
+
)
|
|
236
|
+
|
|
226
237
|
POLL_INTERVAL_MIN = 5
|
|
227
238
|
POLL_INTERVAL_MAX = 60
|
|
228
239
|
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
trainml/jobs.py
CHANGED
|
@@ -468,6 +468,12 @@ class Job:
|
|
|
468
468
|
return job
|
|
469
469
|
|
|
470
470
|
async def wait_for(self, status, timeout=300):
|
|
471
|
+
if self.status == status or (
|
|
472
|
+
self.type == "training"
|
|
473
|
+
and status == "finished"
|
|
474
|
+
and self.status == "stopped"
|
|
475
|
+
):
|
|
476
|
+
return
|
|
471
477
|
valid_statuses = [
|
|
472
478
|
"waiting for data/model download",
|
|
473
479
|
"waiting for GPUs",
|
|
@@ -492,12 +498,13 @@ class Job:
|
|
|
492
498
|
"'stopped' status is deprecated for training jobs, use 'finished' instead.",
|
|
493
499
|
DeprecationWarning,
|
|
494
500
|
)
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
+
|
|
502
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
503
|
+
if timeout > MAX_TIMEOUT:
|
|
504
|
+
raise SpecificationError(
|
|
505
|
+
"timeout",
|
|
506
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
507
|
+
)
|
|
501
508
|
|
|
502
509
|
POLL_INTERVAL_MIN = 5
|
|
503
510
|
POLL_INTERVAL_MAX = 60
|
trainml/models.py
CHANGED
|
@@ -32,8 +32,7 @@ class Models(object):
|
|
|
32
32
|
source_type=source_type,
|
|
33
33
|
source_uri=source_uri,
|
|
34
34
|
source_options=kwargs.get("source_options"),
|
|
35
|
-
project_uuid=kwargs.get("project_uuid")
|
|
36
|
-
or self.trainml.active_project,
|
|
35
|
+
project_uuid=kwargs.get("project_uuid") or self.trainml.active_project,
|
|
37
36
|
)
|
|
38
37
|
payload = {k: v for k, v in data.items() if v is not None}
|
|
39
38
|
logging.info(f"Creating Model {name}")
|
|
@@ -44,9 +43,7 @@ class Models(object):
|
|
|
44
43
|
return model
|
|
45
44
|
|
|
46
45
|
async def remove(self, id, **kwargs):
|
|
47
|
-
await self.trainml._query(
|
|
48
|
-
f"/model/{id}", "DELETE", dict(**kwargs, force=True)
|
|
49
|
-
)
|
|
46
|
+
await self.trainml._query(f"/model/{id}", "DELETE", dict(**kwargs, force=True))
|
|
50
47
|
|
|
51
48
|
|
|
52
49
|
class Model:
|
|
@@ -115,12 +112,16 @@ class Model:
|
|
|
115
112
|
project_uuid=self._model.get("project_uuid"),
|
|
116
113
|
cidr=self._model.get("vpn").get("cidr"),
|
|
117
114
|
ssh_port=self._model.get("vpn").get("client").get("ssh_port"),
|
|
118
|
-
input_path=
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
115
|
+
input_path=(
|
|
116
|
+
self._model.get("source_uri")
|
|
117
|
+
if self.status in ["new", "downloading"]
|
|
118
|
+
else None
|
|
119
|
+
),
|
|
120
|
+
output_path=(
|
|
121
|
+
self._model.get("output_uri")
|
|
122
|
+
if self.status == "exporting"
|
|
123
|
+
else None
|
|
124
|
+
),
|
|
124
125
|
)
|
|
125
126
|
else:
|
|
126
127
|
details = dict()
|
|
@@ -185,9 +186,7 @@ class Model:
|
|
|
185
186
|
if msg_handler:
|
|
186
187
|
msg_handler(data)
|
|
187
188
|
else:
|
|
188
|
-
timestamp = datetime.fromtimestamp(
|
|
189
|
-
int(data.get("time")) / 1000
|
|
190
|
-
)
|
|
189
|
+
timestamp = datetime.fromtimestamp(int(data.get("time")) / 1000)
|
|
191
190
|
print(
|
|
192
191
|
f"{timestamp.strftime('%m/%d/%Y, %H:%M:%S')}: {data.get('msg').rstrip()}"
|
|
193
192
|
)
|
|
@@ -214,19 +213,23 @@ class Model:
|
|
|
214
213
|
return self
|
|
215
214
|
|
|
216
215
|
async def wait_for(self, status, timeout=300):
|
|
216
|
+
if self.status == status:
|
|
217
|
+
return
|
|
217
218
|
valid_statuses = ["downloading", "ready", "archived"]
|
|
218
219
|
if not status in valid_statuses:
|
|
219
220
|
raise SpecificationError(
|
|
220
221
|
"status",
|
|
221
222
|
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
222
223
|
)
|
|
223
|
-
|
|
224
|
-
|
|
224
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
225
|
+
if timeout > MAX_TIMEOUT:
|
|
226
|
+
raise SpecificationError(
|
|
227
|
+
"timeout",
|
|
228
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
229
|
+
)
|
|
225
230
|
POLL_INTERVAL_MIN = 5
|
|
226
231
|
POLL_INTERVAL_MAX = 60
|
|
227
|
-
POLL_INTERVAL = max(
|
|
228
|
-
min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN
|
|
229
|
-
)
|
|
232
|
+
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
|
230
233
|
retry_count = math.ceil(timeout / POLL_INTERVAL)
|
|
231
234
|
count = 0
|
|
232
235
|
while count < retry_count:
|
trainml/projects.py
CHANGED
|
@@ -10,6 +10,12 @@ class Projects(object):
|
|
|
10
10
|
resp = await self.trainml._query(f"/project/{id}", "GET", kwargs)
|
|
11
11
|
return Project(self.trainml, **resp)
|
|
12
12
|
|
|
13
|
+
async def get_current(self, **kwargs):
|
|
14
|
+
resp = await self.trainml._query(
|
|
15
|
+
f"/project/{self.trainml.project}", "GET", kwargs
|
|
16
|
+
)
|
|
17
|
+
return Project(self.trainml, **resp)
|
|
18
|
+
|
|
13
19
|
async def list(self, **kwargs):
|
|
14
20
|
resp = await self.trainml._query(f"/project", "GET", kwargs)
|
|
15
21
|
projects = [Project(self.trainml, **project) for project in resp]
|
|
@@ -72,6 +78,46 @@ class ProjectDatastore:
|
|
|
72
78
|
return bool(self._id)
|
|
73
79
|
|
|
74
80
|
|
|
81
|
+
class ProjectDataConnector:
|
|
82
|
+
def __init__(self, trainml, **kwargs):
|
|
83
|
+
self.trainml = trainml
|
|
84
|
+
self._data_connector = kwargs
|
|
85
|
+
self._id = self._data_connector.get("id")
|
|
86
|
+
self._project_uuid = self._data_connector.get("project_uuid")
|
|
87
|
+
self._name = self._data_connector.get("name")
|
|
88
|
+
self._type = self._data_connector.get("type")
|
|
89
|
+
self._region_uuid = self._data_connector.get("region_uuid")
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def id(self) -> str:
|
|
93
|
+
return self._id
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def project_uuid(self) -> str:
|
|
97
|
+
return self._project_uuid
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def name(self) -> str:
|
|
101
|
+
return self._name
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def type(self) -> str:
|
|
105
|
+
return self._type
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def region_uuid(self) -> str:
|
|
109
|
+
return self._region_uuid
|
|
110
|
+
|
|
111
|
+
def __str__(self):
|
|
112
|
+
return json.dumps({k: v for k, v in self._data_connector.items()})
|
|
113
|
+
|
|
114
|
+
def __repr__(self):
|
|
115
|
+
return f"ProjectDataConnector( trainml , **{self._data_connector.__repr__()})"
|
|
116
|
+
|
|
117
|
+
def __bool__(self):
|
|
118
|
+
return bool(self._id)
|
|
119
|
+
|
|
120
|
+
|
|
75
121
|
class ProjectService:
|
|
76
122
|
def __init__(self, trainml, **kwargs):
|
|
77
123
|
self.trainml = trainml
|
|
@@ -79,9 +125,8 @@ class ProjectService:
|
|
|
79
125
|
self._id = self._service.get("id")
|
|
80
126
|
self._project_uuid = self._service.get("project_uuid")
|
|
81
127
|
self._name = self._service.get("name")
|
|
82
|
-
self._type = self._service.get("type")
|
|
83
128
|
self._hostname = self._service.get("hostname")
|
|
84
|
-
self.
|
|
129
|
+
self._public = self._service.get("public")
|
|
85
130
|
self._region_uuid = self._service.get("region_uuid")
|
|
86
131
|
|
|
87
132
|
@property
|
|
@@ -96,17 +141,13 @@ class ProjectService:
|
|
|
96
141
|
def name(self) -> str:
|
|
97
142
|
return self._name
|
|
98
143
|
|
|
99
|
-
@property
|
|
100
|
-
def type(self) -> str:
|
|
101
|
-
return self._type
|
|
102
|
-
|
|
103
144
|
@property
|
|
104
145
|
def hostname(self) -> str:
|
|
105
146
|
return self._hostname
|
|
106
147
|
|
|
107
148
|
@property
|
|
108
|
-
def
|
|
109
|
-
return self.
|
|
149
|
+
def public(self) -> bool:
|
|
150
|
+
return self._public
|
|
110
151
|
|
|
111
152
|
@property
|
|
112
153
|
def region_uuid(self) -> str:
|
|
@@ -164,6 +205,14 @@ class Project:
|
|
|
164
205
|
datastores = [ProjectDatastore(self.trainml, **datastore) for datastore in resp]
|
|
165
206
|
return datastores
|
|
166
207
|
|
|
208
|
+
async def list_data_connectors(self):
|
|
209
|
+
resp = await self.trainml._query(f"/project/{self._id}/data_connectors", "GET")
|
|
210
|
+
data_connectors = [
|
|
211
|
+
ProjectDataConnector(self.trainml, **data_connector)
|
|
212
|
+
for data_connector in resp
|
|
213
|
+
]
|
|
214
|
+
return data_connectors
|
|
215
|
+
|
|
167
216
|
async def list_services(self):
|
|
168
217
|
resp = await self.trainml._query(f"/project/{self._id}/services", "GET")
|
|
169
218
|
services = [ProjectService(self.trainml, **service) for service in resp]
|
|
@@ -172,5 +221,8 @@ class Project:
|
|
|
172
221
|
async def refresh_datastores(self):
|
|
173
222
|
await self.trainml._query(f"/project/{self._id}/datastores", "PATCH")
|
|
174
223
|
|
|
224
|
+
async def refresh_data_connectors(self):
|
|
225
|
+
await self.trainml._query(f"/project/{self._id}/data_connectors", "PATCH")
|
|
226
|
+
|
|
175
227
|
async def refresh_services(self):
|
|
176
228
|
await self.trainml._query(f"/project/{self._id}/services", "PATCH")
|
trainml/volumes.py
CHANGED
|
@@ -223,14 +223,21 @@ class Volume:
|
|
|
223
223
|
return self
|
|
224
224
|
|
|
225
225
|
async def wait_for(self, status, timeout=300):
|
|
226
|
+
if self.status == status:
|
|
227
|
+
return
|
|
226
228
|
valid_statuses = ["downloading", "ready", "archived"]
|
|
227
229
|
if not status in valid_statuses:
|
|
228
230
|
raise SpecificationError(
|
|
229
231
|
"status",
|
|
230
232
|
f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
|
|
231
233
|
)
|
|
232
|
-
|
|
233
|
-
|
|
234
|
+
|
|
235
|
+
MAX_TIMEOUT = 24 * 60 * 60
|
|
236
|
+
if timeout > MAX_TIMEOUT:
|
|
237
|
+
raise SpecificationError(
|
|
238
|
+
"timeout",
|
|
239
|
+
f"timeout must be less than {MAX_TIMEOUT} seconds.",
|
|
240
|
+
)
|
|
234
241
|
POLL_INTERVAL_MIN = 5
|
|
235
242
|
POLL_INTERVAL_MAX = 60
|
|
236
243
|
POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
|
|
@@ -8,7 +8,7 @@ tests/integration/test_checkpoints_integration.py,sha256=mLha1BhVZ916OJIDOKF6vah
|
|
|
8
8
|
tests/integration/test_datasets_integration.py,sha256=zdHOevduuMUWvVxaHBslpmH8AdvPdqEJ95MdqCC5_rw,3499
|
|
9
9
|
tests/integration/test_environments_integration.py,sha256=0IckhJvQhd8j4Ouiu0hMq2b7iA1dbZpZYmknyfWjsFM,1403
|
|
10
10
|
tests/integration/test_gpu_types_integration.py,sha256=V2OncokZWWVq_l5FSmKEDM4EsWrmpB-zKiVPt-we0aY,1256
|
|
11
|
-
tests/integration/test_jobs_integration.py,sha256=
|
|
11
|
+
tests/integration/test_jobs_integration.py,sha256=N2peEQGYHteGMd0J7NK4gJaaTyps5jjuiKq2ENnD8SY,25117
|
|
12
12
|
tests/integration/test_models_integration.py,sha256=UPRAz0lcpzGihsnDUARoafbd5sZ6OM8TIeh8HNN6Bg0,2902
|
|
13
13
|
tests/integration/test_projects_integration.py,sha256=BX-LqLfzawTQUhtx--5dw7QqR8kl_CJvwSCyNXDUQTw,1446
|
|
14
14
|
tests/integration/test_volumes_integration.py,sha256=gOmZpwwFxqeOAVmfKWSTmuyshx8nb2zu_0xv1RUEepM,3270
|
|
@@ -25,7 +25,7 @@ tests/unit/test_exceptions.py,sha256=3tAok6kAU1QRjN7qTNVYuSGWDg7IEoK__OXFLyzLr7k
|
|
|
25
25
|
tests/unit/test_gpu_types_unit.py,sha256=c9ie6YSYT5onBnlmHvHWON9WgQiJ1eO2C-4Tk-UPQHg,2054
|
|
26
26
|
tests/unit/test_jobs_unit.py,sha256=bZxN9HUfHCyQCjZCZGn6WFIhu8S5FU1z5ZG9sgH2XEg,26835
|
|
27
27
|
tests/unit/test_models_unit.py,sha256=uezWF7FUHGmCSQBtpyyKhBttTnCTRjxU22NsHdJLYYg,15064
|
|
28
|
-
tests/unit/test_projects_unit.py,sha256=
|
|
28
|
+
tests/unit/test_projects_unit.py,sha256=uqMs3v4mNevUSh5QgP54_R88ctqOXdD73t0AgjTWXbg,10743
|
|
29
29
|
tests/unit/test_trainml.py,sha256=8vAKvFD1xYsx_VY4HFVa0b1MUlMoNApY6TO8r7vI-UQ,1701
|
|
30
30
|
tests/unit/test_volumes_unit.py,sha256=KHVmdbQIiX8tEE09U-XsH-vl6wfYGVoRzR_UQJlhOVE,15305
|
|
31
31
|
tests/unit/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -45,7 +45,9 @@ tests/unit/cli/cloudbender/test_cli_node_unit.py,sha256=KbK7axJ1L4y4sN7KQRpOVIqp
|
|
|
45
45
|
tests/unit/cli/cloudbender/test_cli_provider_unit.py,sha256=Rm-tRNPbTTB7ZzkkIpLfDp_pEYfqihjB0ZYk_EPQUfs,781
|
|
46
46
|
tests/unit/cli/cloudbender/test_cli_region_unit.py,sha256=iH5AbrzZ-R2EJ-Bd2HFN7FN2lTpkr3-pCLR59ZVvdQU,1262
|
|
47
47
|
tests/unit/cli/cloudbender/test_cli_reservation_unit.py,sha256=4LDOJDXygMuho2cdI2K59eq4oyiry9hNaG0avEr0_tw,1311
|
|
48
|
+
tests/unit/cli/cloudbender/test_cli_service_unit.py,sha256=4LDOJDXygMuho2cdI2K59eq4oyiry9hNaG0avEr0_tw,1311
|
|
48
49
|
tests/unit/cloudbender/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
|
+
tests/unit/cloudbender/test_data_connectors_unit.py,sha256=qy97mNAcy_xEkh8obBobHt16B3e1N3QcBSyfV9xJwPI,5783
|
|
49
51
|
tests/unit/cloudbender/test_datastores_unit.py,sha256=54mPokxhrRjlkBfqpmeA_q-PLml-HUNNit91aQVTpCg,5398
|
|
50
52
|
tests/unit/cloudbender/test_device_configs_unit.py,sha256=lzyCuF7MRoQrtJVTQFL27lqPnRwQFv25htPgJqDuQI8,5714
|
|
51
53
|
tests/unit/cloudbender/test_devices_unit.py,sha256=QBWnlOe1tw_XNA_i-yDHkmpGvtK36f2u1HhoXquoVaE,9103
|
|
@@ -53,21 +55,21 @@ tests/unit/cloudbender/test_nodes_unit.py,sha256=BDpfJXCBNNpLt5rhJMk2BVXDQ_4QSmx
|
|
|
53
55
|
tests/unit/cloudbender/test_providers_unit.py,sha256=OgxifgC1IqLH8DNMKXy1Ne9_7a75ea6kHEOfRSRoQuQ,4373
|
|
54
56
|
tests/unit/cloudbender/test_regions_unit.py,sha256=BbJICLIQmlotpA1UmLD0KTW_H9g2UW0J8ZYzQk1_Xjc,6299
|
|
55
57
|
tests/unit/cloudbender/test_reservations_unit.py,sha256=nWEZ_p9EF2C49nbgL7Dt4NG2Irmyt94ZqJJQDyNfGFI,5624
|
|
56
|
-
tests/unit/cloudbender/test_services_unit.py,sha256=
|
|
57
|
-
trainml/__init__.py,sha256=
|
|
58
|
+
tests/unit/cloudbender/test_services_unit.py,sha256=fYJx-W89HD-EYkO32_v33X40VxipUfWQCy13FZO2fcA,5220
|
|
59
|
+
trainml/__init__.py,sha256=Y4nVE4uHHtjpS5d72HA93YdFA0TnjfAempDoVlrKeBg,432
|
|
58
60
|
trainml/__main__.py,sha256=JgErYkiskih8Y6oRwowALtR-rwQhAAdqOYWjQraRIPI,59
|
|
59
61
|
trainml/auth.py,sha256=gruZv27nhttrCbhcVQTH9kZkF2uMm1E06SwA_2pQAHQ,26565
|
|
60
|
-
trainml/checkpoints.py,sha256=
|
|
62
|
+
trainml/checkpoints.py,sha256=rzVkty_3wNOUxuCfA5pSrUwL0Euef8VLHUcf_XhX-R4,8430
|
|
61
63
|
trainml/connections.py,sha256=h-S1NZbOkaXpIlpRStA6q-3eXc_OMlFWOLzF8R9SVG8,20029
|
|
62
|
-
trainml/datasets.py,sha256=
|
|
64
|
+
trainml/datasets.py,sha256=75LqUPDyFuDXjjW6goGsjU9owCWjURUIM9q8uRlyU5o,8249
|
|
63
65
|
trainml/environments.py,sha256=OH4o08zXZ7IJ2CiA1rPnys2Fl45r8qvQHfM2mCBRAIc,1507
|
|
64
66
|
trainml/exceptions.py,sha256=MG1FkcjRacv3HoPuBS1IWLCUk0wGHEQ6DaOzXNymsNI,4094
|
|
65
67
|
trainml/gpu_types.py,sha256=mm-dwfYc02192bmYPIJmzesndyBcoOdkKYBaYZXOUwU,1901
|
|
66
|
-
trainml/jobs.py,sha256=
|
|
67
|
-
trainml/models.py,sha256=
|
|
68
|
-
trainml/projects.py,sha256=
|
|
68
|
+
trainml/jobs.py,sha256=28U0kyqczGhzP-tuRGAk6lNTeOzYdElZ7VpiHgz81rg,18056
|
|
69
|
+
trainml/models.py,sha256=Z12QDxuJic07yKcIa4hKzNrGENoZtWytHYF8Kj_xhhc,7961
|
|
70
|
+
trainml/projects.py,sha256=ZVsVJtUPTYsQAkCLimjYF3DAwFwQV4ucLila-N-jSJo,6585
|
|
69
71
|
trainml/trainml.py,sha256=EBnqQ3Q291xrPKYuN6xKm5yt0mJQOJ3b7GAlR-fl8NI,10864
|
|
70
|
-
trainml/volumes.py,sha256=
|
|
72
|
+
trainml/volumes.py,sha256=1EAPOuYhs8WsOZDb0k0qhpqn2WJnoFxDTQ5tv9ZABUw,8312
|
|
71
73
|
trainml/cli/__init__.py,sha256=Gvj6oGSEtgpb40ACtiVeMD93GM-uy15MG6VlX6rwdwA,4346
|
|
72
74
|
trainml/cli/checkpoint.py,sha256=8Rh4bmFwJ4DKlIjHK-FLTeRynABqKCgIUGRtbQhAsX4,7170
|
|
73
75
|
trainml/cli/connection.py,sha256=ELV6bPL30dzttFNxDU7Fb74R8oPL_E70k7TcJEzbwtQ,1700
|
|
@@ -77,18 +79,20 @@ trainml/cli/gpu.py,sha256=CMcQyl2qbUgc2bc-gvUVT6X7bq2-sgiCHl3hyZ6kFWM,883
|
|
|
77
79
|
trainml/cli/model.py,sha256=hR23E6ttRXcLk-RofkPK6wUXMO7OU6sT6jTEHTmUg9Q,6111
|
|
78
80
|
trainml/cli/project.py,sha256=f772bHs68AVRY60l7dbVKgeDmDC3u2bZjqrz7zm7xvQ,3314
|
|
79
81
|
trainml/cli/volume.py,sha256=kDUss93N78DT-YlLjC6I3jEq5nBWfRNNR5M4tY_F_Zg,6246
|
|
80
|
-
trainml/cli/cloudbender/__init__.py,sha256=
|
|
82
|
+
trainml/cli/cloudbender/__init__.py,sha256=tKkL8TzD9nEeRtf1OEYM4XZJWb0-rGMPTmLIdA5G_SY,592
|
|
83
|
+
trainml/cli/cloudbender/data_connector.py,sha256=q0Hqeh5w00Zkmh61fzO3pNR6EmQfoLQ2DbgJ5ZBx1UM,3606
|
|
81
84
|
trainml/cli/cloudbender/datastore.py,sha256=gJ-comfAq65uiPoONQ35UIDLNVN7QKMf3l_2EcTN6zY,3478
|
|
82
85
|
trainml/cli/cloudbender/device.py,sha256=KGZCFwwvS4tWsWuudrhlvquu_IFtV7LCUAOmCajicic,3453
|
|
83
86
|
trainml/cli/cloudbender/node.py,sha256=iN_WaPCxOhtgDtnSsIFAEMGADG4MKiLjWoez6YSYwZI,3843
|
|
84
87
|
trainml/cli/cloudbender/provider.py,sha256=oFjZWKfFQjNY7OtDu7nUdfv-RTmQc_Huuug963D3BdA,1726
|
|
85
88
|
trainml/cli/cloudbender/region.py,sha256=X6-FYOb-pGpOEazn-NbsYSwa9ergB7FGATFkTe4a8Pk,2892
|
|
86
89
|
trainml/cli/cloudbender/reservation.py,sha256=z2oMYwp-w_Keo1DepKUtuRnwiGz2VscVHDYWEFap1gs,3569
|
|
87
|
-
trainml/cli/cloudbender/service.py,sha256=
|
|
90
|
+
trainml/cli/cloudbender/service.py,sha256=Wh6ycEuECiKL7qpFhc4IyO1rR5lvLtIHk3S475_R6pk,3147
|
|
88
91
|
trainml/cli/job/__init__.py,sha256=ljY-ELeXhXQ7txASbJEKGBom7OXfNyy7sWILz3nxRAE,6545
|
|
89
92
|
trainml/cli/job/create.py,sha256=pfOCqs5Vfk4PAI5KZpXHJ1vp3DDe4ccvYzieh0oFexY,34288
|
|
90
93
|
trainml/cloudbender/__init__.py,sha256=iE29obtC0_9f0IhRvHQcG5aY58fVhVYipTakpjAhdss,64
|
|
91
|
-
trainml/cloudbender/cloudbender.py,sha256=
|
|
94
|
+
trainml/cloudbender/cloudbender.py,sha256=ekJZHSQ1F4HF8y0sAJ3MDB_hiC8QxPv9-O7U24z_RR4,717
|
|
95
|
+
trainml/cloudbender/data_connectors.py,sha256=Qr-p9nukBeIaCg2v2plgZTBiBznAIBqDejzWpqHx310,3297
|
|
92
96
|
trainml/cloudbender/datastores.py,sha256=biVGifedc3r1DcuxsfCQh-f1Tw4HcJMMJfdgHxPfkKM,3506
|
|
93
97
|
trainml/cloudbender/device_configs.py,sha256=DJWiGFaOE4C4xLE1BLDAiEjeL4T00R3FA_pb1xnSOr4,3399
|
|
94
98
|
trainml/cloudbender/devices.py,sha256=QORNmKdLJoqGZmeWXRnivC1JmNBIw-ebvf4bsoem3r8,5660
|
|
@@ -96,10 +100,10 @@ trainml/cloudbender/nodes.py,sha256=7HV2VLmxiUcJ-Kc6AAXS3M8C_XO-HKmaVgJpPdVnBQk,
|
|
|
96
100
|
trainml/cloudbender/providers.py,sha256=-gkdiTu6Ah2znUuyyc3ZuRALagW8s1-OgqVjtlvc1AU,2036
|
|
97
101
|
trainml/cloudbender/regions.py,sha256=Aqc_MeLVAeEv21e-lR5u8x1eintqUhZT2DBiQG3AcEE,3570
|
|
98
102
|
trainml/cloudbender/reservations.py,sha256=rOrGXWIUHON4ad2aufEcvK4Yv_Mv3dDoScUtLJE8LWw,3586
|
|
99
|
-
trainml/cloudbender/services.py,sha256=
|
|
100
|
-
trainml-0.5.
|
|
101
|
-
trainml-0.5.
|
|
102
|
-
trainml-0.5.
|
|
103
|
-
trainml-0.5.
|
|
104
|
-
trainml-0.5.
|
|
105
|
-
trainml-0.5.
|
|
103
|
+
trainml/cloudbender/services.py,sha256=KC3VcyljvnazUUG-Tzwm6Ab6d0--yuccXjOaMgYB5uA,5126
|
|
104
|
+
trainml-0.5.7.dist-info/LICENSE,sha256=s0lpBxhSSUEpMavwde-Vb6K_K7xDCTTvSpNznVqVGR0,1069
|
|
105
|
+
trainml-0.5.7.dist-info/METADATA,sha256=NsTsUqBAGFthKTG-StiIAvRAhionkM9dDCUPsxYAXsc,7345
|
|
106
|
+
trainml-0.5.7.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
107
|
+
trainml-0.5.7.dist-info/entry_points.txt,sha256=OzBDm2wXby1bSGF02jTVxzRFZLejnbFiLHXhKdW3Bds,63
|
|
108
|
+
trainml-0.5.7.dist-info/top_level.txt,sha256=Y1kLFRWKUW7RG8BX7cvejHF_yW8wBOaRYF1JQHENY4w,23
|
|
109
|
+
trainml-0.5.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|