trainml 0.5.8__py3-none-any.whl → 0.5.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. tests/integration/projects/__init__.py +0 -0
  2. tests/integration/projects/conftest.py +10 -0
  3. tests/integration/projects/test_projects_data_connectors_integration.py +44 -0
  4. tests/integration/projects/test_projects_datastores_integration.py +42 -0
  5. tests/integration/{test_projects_integration.py → projects/test_projects_integration.py} +0 -6
  6. tests/integration/projects/test_projects_keys_integration.py +43 -0
  7. tests/integration/projects/test_projects_secrets_integration.py +44 -0
  8. tests/integration/projects/test_projects_services_integration.py +44 -0
  9. tests/integration/test_checkpoints_integration.py +1 -2
  10. tests/integration/test_models_integration.py +0 -1
  11. tests/unit/cli/projects/__init__.py +0 -0
  12. tests/unit/cli/projects/test_cli_project_data_connector_unit.py +28 -0
  13. tests/unit/cli/projects/test_cli_project_datastore_unit.py +26 -0
  14. tests/unit/cli/projects/test_cli_project_key_unit.py +26 -0
  15. tests/unit/cli/projects/test_cli_project_secret_unit.py +26 -0
  16. tests/unit/cli/projects/test_cli_project_service_unit.py +26 -0
  17. tests/unit/cli/projects/test_cli_project_unit.py +19 -0
  18. tests/unit/cloudbender/test_datastores_unit.py +1 -5
  19. tests/unit/conftest.py +146 -3
  20. tests/unit/projects/__init__.py +0 -0
  21. tests/unit/projects/test_project_data_connectors_unit.py +102 -0
  22. tests/unit/projects/test_project_datastores_unit.py +96 -0
  23. tests/unit/projects/test_project_keys_unit.py +96 -0
  24. tests/unit/projects/test_project_secrets_unit.py +101 -0
  25. tests/unit/projects/test_project_services_unit.py +102 -0
  26. tests/unit/projects/test_projects_unit.py +128 -0
  27. tests/unit/test_checkpoints_unit.py +15 -23
  28. tests/unit/test_datasets_unit.py +15 -20
  29. tests/unit/test_models_unit.py +13 -16
  30. tests/unit/test_volumes_unit.py +3 -0
  31. trainml/__init__.py +1 -1
  32. trainml/checkpoints.py +14 -3
  33. trainml/cli/cloudbender/datastore.py +2 -7
  34. trainml/cli/project/__init__.py +84 -0
  35. trainml/cli/project/data_connector.py +61 -0
  36. trainml/cli/project/datastore.py +61 -0
  37. trainml/cli/project/key.py +124 -0
  38. trainml/cli/project/secret.py +71 -0
  39. trainml/cli/project/service.py +61 -0
  40. trainml/cloudbender/data_connectors.py +8 -0
  41. trainml/cloudbender/datastores.py +9 -19
  42. trainml/cloudbender/nodes.py +44 -1
  43. trainml/cloudbender/providers.py +53 -0
  44. trainml/cloudbender/regions.py +48 -0
  45. trainml/datasets.py +14 -3
  46. trainml/exceptions.py +51 -0
  47. trainml/jobs.py +2 -13
  48. trainml/models.py +14 -3
  49. trainml/projects/__init__.py +3 -0
  50. trainml/projects/data_connectors.py +63 -0
  51. trainml/projects/datastores.py +58 -0
  52. trainml/projects/keys.py +71 -0
  53. trainml/projects/projects.py +83 -0
  54. trainml/projects/secrets.py +70 -0
  55. trainml/projects/services.py +63 -0
  56. trainml/volumes.py +15 -3
  57. {trainml-0.5.8.dist-info → trainml-0.5.11.dist-info}/METADATA +1 -1
  58. {trainml-0.5.8.dist-info → trainml-0.5.11.dist-info}/RECORD +64 -38
  59. tests/unit/cli/cloudbender/test_cli_reservation_unit.py +0 -34
  60. tests/unit/cli/test_cli_project_unit.py +0 -42
  61. tests/unit/cloudbender/test_reservations_unit.py +0 -173
  62. tests/unit/test_projects_unit.py +0 -320
  63. trainml/cli/cloudbender/reservation.py +0 -159
  64. trainml/cli/project.py +0 -149
  65. trainml/cloudbender/reservations.py +0 -126
  66. trainml/projects.py +0 -228
  67. /tests/unit/{test_auth.py → test_auth_unit.py} +0 -0
  68. /tests/unit/{test_trainml.py → test_trainml_unit.py} +0 -0
  69. {trainml-0.5.8.dist-info → trainml-0.5.11.dist-info}/LICENSE +0 -0
  70. {trainml-0.5.8.dist-info → trainml-0.5.11.dist-info}/WHEEL +0 -0
  71. {trainml-0.5.8.dist-info → trainml-0.5.11.dist-info}/entry_points.txt +0 -0
  72. {trainml-0.5.8.dist-info → trainml-0.5.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,124 @@
1
+ import click
2
+ import os
3
+ import json
4
+ import base64
5
+ from pathlib import Path
6
+ from trainml.cli import pass_config
7
+ from trainml.cli.project import project
8
+
9
+
10
+ @project.group()
11
+ @pass_config
12
+ def key(config):
13
+ """trainML project key commands."""
14
+ pass
15
+
16
+
17
+ @key.command()
18
+ @pass_config
19
+ def list(config):
20
+ """List keys."""
21
+ data = [
22
+ ["TYPE", "KEY ID", "UPDATED AT"],
23
+ [
24
+ "-" * 80,
25
+ "-" * 80,
26
+ "-" * 80,
27
+ ],
28
+ ]
29
+ project = config.trainml.run(config.trainml.client.projects.get_current())
30
+ keys = config.trainml.run(project.keys.list())
31
+
32
+ for key in keys:
33
+ data.append(
34
+ [
35
+ key.type,
36
+ key.key_id,
37
+ key.updated_at.isoformat(timespec="seconds"),
38
+ ]
39
+ )
40
+
41
+ for row in data:
42
+ click.echo(
43
+ "{: >13.11} {: >37.35} {: >28.26}" "".format(*row),
44
+ file=config.stdout,
45
+ )
46
+
47
+
48
+ @key.command()
49
+ @click.argument(
50
+ "type",
51
+ type=click.Choice(
52
+ [
53
+ "aws",
54
+ "azure",
55
+ "docker",
56
+ "gcp",
57
+ "huggingface",
58
+ "kaggle",
59
+ "ngc",
60
+ "wasabi",
61
+ ],
62
+ case_sensitive=False,
63
+ ),
64
+ )
65
+ @pass_config
66
+ def put(config, type):
67
+ """
68
+ Set a key.
69
+
70
+ A key is uploaded.
71
+ """
72
+ project = config.trainml.run(config.trainml.client.projects.get_current())
73
+
74
+ tenant = None
75
+
76
+ if type in ["aws", "wasabi"]:
77
+ key_id = click.prompt("Enter the key ID", type=str, hide_input=False)
78
+ secret = click.prompt("Enter the secret key", type=str, hide_input=True)
79
+ elif type == "azure":
80
+ key_id = click.prompt(
81
+ "Enter the Application (client) ID", type=str, hide_input=False
82
+ )
83
+ tenant = click.prompt(
84
+ "Enter the Directory (tenant) ley", type=str, hide_input=False
85
+ )
86
+ secret = click.prompt("Enter the client secret", type=str, hide_input=True)
87
+ elif type in ["docker", "huggingface"]:
88
+ key_id = click.prompt("Enter the username", type=str, hide_input=False)
89
+ secret = click.prompt("Enter the access token", type=str, hide_input=True)
90
+ elif type in ["gcp", "kaggle"]:
91
+ file_name = click.prompt(
92
+ "Enter the path of the credentials file",
93
+ type=click.Path(
94
+ exists=True, file_okay=True, dir_okay=False, resolve_path=True
95
+ ),
96
+ hide_input=False,
97
+ )
98
+ key_id = os.path.basename(file_name)
99
+ with open(file_name) as f:
100
+ secret = json.load(f)
101
+ secret = json.dumps(secret)
102
+ elif type == "ngc":
103
+ key_id = "$oauthtoken"
104
+ secret = click.prompt("Enter the access token", type=str, hide_input=True)
105
+ else:
106
+ raise click.UsageError("Unsupported key type")
107
+
108
+ return config.trainml.run(
109
+ project.keys.put(type=type, key_id=key_id, secret=secret, tenant=tenant)
110
+ )
111
+
112
+
113
+ @key.command()
114
+ @click.argument("name", type=click.STRING)
115
+ @pass_config
116
+ def remove(config, name):
117
+ """
118
+ Remove a key.
119
+
120
+
121
+ """
122
+ project = config.trainml.run(config.trainml.client.projects.get_current())
123
+
124
+ return config.trainml.run(project.key.remove(name))
@@ -0,0 +1,71 @@
1
+ import click
2
+ from trainml.cli import pass_config
3
+ from trainml.cli.project import project
4
+
5
+
6
+ @project.group()
7
+ @pass_config
8
+ def secret(config):
9
+ """trainML project secret commands."""
10
+ pass
11
+
12
+
13
+ @secret.command()
14
+ @pass_config
15
+ def list(config):
16
+ """List secrets."""
17
+ data = [
18
+ ["NAME", "CREATED BY", "UPDATED AT"],
19
+ [
20
+ "-" * 80,
21
+ "-" * 80,
22
+ "-" * 80,
23
+ ],
24
+ ]
25
+ project = config.trainml.run(config.trainml.client.projects.get_current())
26
+ secrets = config.trainml.run(project.secrets.list())
27
+
28
+ for secret in secrets:
29
+ data.append(
30
+ [
31
+ secret.name,
32
+ secret.created_by,
33
+ secret.updated_at.isoformat(timespec="seconds"),
34
+ ]
35
+ )
36
+
37
+ for row in data:
38
+ click.echo(
39
+ "{: >38.36} {: >30.28} {: >28.26}" "".format(*row),
40
+ file=config.stdout,
41
+ )
42
+
43
+
44
+ @secret.command()
45
+ @click.argument("name", type=click.STRING)
46
+ @pass_config
47
+ def put(config, name):
48
+ """
49
+ Set a secret value.
50
+
51
+ Secret is created with the specified NAME.
52
+ """
53
+ project = config.trainml.run(config.trainml.client.projects.get_current())
54
+
55
+ value = click.prompt("Enter the secret value", type=str, hide_input=True)
56
+
57
+ return config.trainml.run(project.secrets.put(name=name, value=value))
58
+
59
+
60
+ @secret.command()
61
+ @click.argument("name", type=click.STRING)
62
+ @pass_config
63
+ def remove(config, name):
64
+ """
65
+ Remove a secret.
66
+
67
+
68
+ """
69
+ project = config.trainml.run(config.trainml.client.projects.get_current())
70
+
71
+ return config.trainml.run(project.secret.remove(name))
@@ -0,0 +1,61 @@
1
+ import click
2
+ import os
3
+ import json
4
+ import base64
5
+ from pathlib import Path
6
+ from trainml.cli import pass_config
7
+ from trainml.cli.project import project
8
+
9
+
10
+ @project.group()
11
+ @pass_config
12
+ def service(config):
13
+ """trainML project service commands."""
14
+ pass
15
+
16
+
17
+ @service.command()
18
+ @pass_config
19
+ def list(config):
20
+ """List project services."""
21
+ data = [
22
+ ["ID", "NAME", "TYPE", "REGION_UUID"],
23
+ [
24
+ "-" * 80,
25
+ "-" * 80,
26
+ "-" * 80,
27
+ "-" * 80,
28
+ ],
29
+ ]
30
+ project = config.trainml.run(
31
+ config.trainml.client.projects.get(config.trainml.client.project)
32
+ )
33
+
34
+ services = config.trainml.run(project.services.list())
35
+
36
+ for service in services:
37
+ data.append(
38
+ [
39
+ service.id,
40
+ service.name,
41
+ service.hostname,
42
+ service.region_uuid,
43
+ ]
44
+ )
45
+
46
+ for row in data:
47
+ click.echo(
48
+ "{: >38.36} {: >30.28} {: >15.13} {: >38.36}" "".format(*row),
49
+ file=config.stdout,
50
+ )
51
+
52
+
53
+ @service.command()
54
+ @pass_config
55
+ def refresh(config):
56
+ """
57
+ Refresh project service list.
58
+ """
59
+ project = config.trainml.run(config.trainml.client.projects.get_current())
60
+
61
+ return config.trainml.run(project.services.refresh())
@@ -1,5 +1,13 @@
1
1
  import json
2
2
  import logging
3
+ import asyncio
4
+ import math
5
+
6
+ from trainml.exceptions import (
7
+ ApiError,
8
+ SpecificationError,
9
+ TrainMLException,
10
+ )
3
11
 
4
12
 
5
13
  class DataConnectors(object):
@@ -1,5 +1,13 @@
1
1
  import json
2
2
  import logging
3
+ import asyncio
4
+ import math
5
+
6
+ from trainml.exceptions import (
7
+ ApiError,
8
+ SpecificationError,
9
+ TrainMLException,
10
+ )
3
11
 
4
12
 
5
13
  class Datastores(object):
@@ -20,9 +28,7 @@ class Datastores(object):
20
28
  "GET",
21
29
  kwargs,
22
30
  )
23
- datastores = [
24
- Datastore(self.trainml, **datastore) for datastore in resp
25
- ]
31
+ datastores = [Datastore(self.trainml, **datastore) for datastore in resp]
26
32
  return datastores
27
33
 
28
34
  async def create(
@@ -31,18 +37,12 @@ class Datastores(object):
31
37
  region_uuid,
32
38
  name,
33
39
  type,
34
- uri,
35
- root,
36
- options=None,
37
40
  **kwargs,
38
41
  ):
39
42
  logging.info(f"Creating Datastore {name}")
40
43
  data = dict(
41
44
  name=name,
42
45
  type=type,
43
- uri=uri,
44
- root=root,
45
- options=options,
46
46
  **kwargs,
47
47
  )
48
48
  payload = {k: v for k, v in data.items() if v is not None}
@@ -73,8 +73,6 @@ class Datastore:
73
73
  self._region_uuid = self._datastore.get("region_uuid")
74
74
  self._type = self._datastore.get("type")
75
75
  self._name = self._datastore.get("name")
76
- self._uri = self._datastore.get("uri")
77
- self._root = self._datastore.get("root")
78
76
 
79
77
  @property
80
78
  def id(self) -> str:
@@ -96,14 +94,6 @@ class Datastore:
96
94
  def name(self) -> str:
97
95
  return self._name
98
96
 
99
- @property
100
- def uri(self) -> str:
101
- return self._uri
102
-
103
- @property
104
- def root(self) -> str:
105
- return self._root
106
-
107
97
  def __str__(self):
108
98
  return json.dumps({k: v for k, v in self._datastore.items()})
109
99
 
@@ -1,5 +1,9 @@
1
1
  import json
2
2
  import logging
3
+ import asyncio
4
+ import math
5
+
6
+ from trainml.exceptions import ApiError, SpecificationError, TrainMLException, NodeError
3
7
 
4
8
 
5
9
  class Nodes(object):
@@ -29,7 +33,7 @@ class Nodes(object):
29
33
  region_uuid,
30
34
  friendly_name,
31
35
  hostname,
32
- minion_id,
36
+ minion_id=None,
33
37
  type="permanent",
34
38
  service="compute",
35
39
  **kwargs,
@@ -153,3 +157,42 @@ class Node:
153
157
  None,
154
158
  dict(command=command),
155
159
  )
160
+
161
+ async def wait_for(self, status, timeout=300):
162
+ if self.status == status:
163
+ return
164
+ valid_statuses = ["active", "maintenance", "offline", "stopped", "archived"]
165
+ if not status in valid_statuses:
166
+ raise SpecificationError(
167
+ "status",
168
+ f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
169
+ )
170
+ MAX_TIMEOUT = 24 * 60 * 60
171
+ if timeout > MAX_TIMEOUT:
172
+ raise SpecificationError(
173
+ "timeout",
174
+ f"timeout must be less than {MAX_TIMEOUT} seconds.",
175
+ )
176
+
177
+ POLL_INTERVAL_MIN = 5
178
+ POLL_INTERVAL_MAX = 60
179
+ POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
180
+ retry_count = math.ceil(timeout / POLL_INTERVAL)
181
+ count = 0
182
+ while count < retry_count:
183
+ await asyncio.sleep(POLL_INTERVAL)
184
+ try:
185
+ await self.refresh()
186
+ except ApiError as e:
187
+ if status == "archived" and e.status == 404:
188
+ return
189
+ raise e
190
+ if self.status in ["errored", "failed"]:
191
+ raise NodeError(self.status, self)
192
+ if self.status == status:
193
+ return self
194
+ else:
195
+ count += 1
196
+ logging.debug(f"self: {self}, retry count {count}")
197
+
198
+ raise TrainMLException(f"Timeout waiting for {status}")
@@ -1,7 +1,16 @@
1
1
  import json
2
2
  import logging
3
+ import asyncio
4
+ import math
3
5
  from datetime import datetime
4
6
 
7
+ from trainml.exceptions import (
8
+ ApiError,
9
+ SpecificationError,
10
+ TrainMLException,
11
+ ProviderError,
12
+ )
13
+
5
14
 
6
15
  class Providers(object):
7
16
  def __init__(self, trainml):
@@ -36,6 +45,7 @@ class Provider:
36
45
  self._provider = kwargs
37
46
  self._id = self._provider.get("provider_uuid")
38
47
  self._type = self._provider.get("type")
48
+ self._status = self._provider.get("status")
39
49
  self._credits = self._provider.get("credits")
40
50
 
41
51
  @property
@@ -46,6 +56,10 @@ class Provider:
46
56
  def type(self) -> str:
47
57
  return self._type
48
58
 
59
+ @property
60
+ def status(self) -> str:
61
+ return self._status
62
+
49
63
  @property
50
64
  def credits(self) -> float:
51
65
  return self._credits
@@ -69,3 +83,42 @@ class Provider:
69
83
  )
70
84
  self.__init__(self.trainml, **resp)
71
85
  return self
86
+
87
+ async def wait_for(self, status, timeout=300):
88
+ if self.status == status:
89
+ return
90
+ valid_statuses = ["ready", "archived"]
91
+ if not status in valid_statuses:
92
+ raise SpecificationError(
93
+ "status",
94
+ f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
95
+ )
96
+ MAX_TIMEOUT = 24 * 60 * 60
97
+ if timeout > MAX_TIMEOUT:
98
+ raise SpecificationError(
99
+ "timeout",
100
+ f"timeout must be less than {MAX_TIMEOUT} seconds.",
101
+ )
102
+
103
+ POLL_INTERVAL_MIN = 5
104
+ POLL_INTERVAL_MAX = 60
105
+ POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
106
+ retry_count = math.ceil(timeout / POLL_INTERVAL)
107
+ count = 0
108
+ while count < retry_count:
109
+ await asyncio.sleep(POLL_INTERVAL)
110
+ try:
111
+ await self.refresh()
112
+ except ApiError as e:
113
+ if status == "archived" and e.status == 404:
114
+ return
115
+ raise e
116
+ if self.status in ["errored", "failed"]:
117
+ raise ProviderError(self.status, self)
118
+ if self.status == status:
119
+ return self
120
+ else:
121
+ count += 1
122
+ logging.debug(f"self: {self}, retry count {count}")
123
+
124
+ raise TrainMLException(f"Timeout waiting for {status}")
@@ -1,5 +1,14 @@
1
1
  import json
2
2
  import logging
3
+ import asyncio
4
+ import math
5
+
6
+ from trainml.exceptions import (
7
+ ApiError,
8
+ SpecificationError,
9
+ TrainMLException,
10
+ RegionError,
11
+ )
3
12
 
4
13
 
5
14
  class Regions(object):
@@ -111,3 +120,42 @@ class Region:
111
120
  None,
112
121
  dict(project_uuid=project_uuid, checkpoint_uuid=checkpoint_uuid),
113
122
  )
123
+
124
+ async def wait_for(self, status, timeout=300):
125
+ if self.status == status:
126
+ return
127
+ valid_statuses = ["healthy", "offline", "archived"]
128
+ if not status in valid_statuses:
129
+ raise SpecificationError(
130
+ "status",
131
+ f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
132
+ )
133
+ MAX_TIMEOUT = 24 * 60 * 60
134
+ if timeout > MAX_TIMEOUT:
135
+ raise SpecificationError(
136
+ "timeout",
137
+ f"timeout must be less than {MAX_TIMEOUT} seconds.",
138
+ )
139
+
140
+ POLL_INTERVAL_MIN = 5
141
+ POLL_INTERVAL_MAX = 60
142
+ POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
143
+ retry_count = math.ceil(timeout / POLL_INTERVAL)
144
+ count = 0
145
+ while count < retry_count:
146
+ await asyncio.sleep(POLL_INTERVAL)
147
+ try:
148
+ await self.refresh()
149
+ except ApiError as e:
150
+ if status == "archived" and e.status == 404:
151
+ return
152
+ raise e
153
+ if self.status in ["errored", "failed"]:
154
+ raise RegionError(self.status, self)
155
+ if self.status == status:
156
+ return self
157
+ else:
158
+ count += 1
159
+ logging.debug(f"self: {self}, retry count {count}")
160
+
161
+ raise TrainMLException(f"Timeout waiting for {status}")
trainml/datasets.py CHANGED
@@ -31,13 +31,24 @@ class Datasets(object):
31
31
  datasets = [Dataset(self.trainml, **dataset) for dataset in resp]
32
32
  return datasets
33
33
 
34
- async def create(self, name, source_type, source_uri, **kwargs):
34
+ async def create(
35
+ self,
36
+ name,
37
+ source_type,
38
+ source_uri,
39
+ type="evefs",
40
+ project_uuid=None,
41
+ **kwargs,
42
+ ):
43
+ if not project_uuid:
44
+ project_uuid = self.trainml.active_project
35
45
  data = dict(
36
46
  name=name,
37
47
  source_type=source_type,
38
48
  source_uri=source_uri,
39
- source_options=kwargs.get("source_options"),
40
- project_uuid=kwargs.get("project_uuid") or self.trainml.active_project,
49
+ project_uuid=project_uuid,
50
+ type=type,
51
+ **kwargs,
41
52
  )
42
53
  payload = {k: v for k, v in data.items() if v is not None}
43
54
  logging.info(f"Creating Dataset {name}")
trainml/exceptions.py CHANGED
@@ -147,3 +147,54 @@ class SpecificationError(TrainMLException):
147
147
 
148
148
  def __str__(self):
149
149
  return "SpecificationError({self.attribute}, {self.message})".format(self=self)
150
+
151
+
152
+ class NodeError(TrainMLException):
153
+ def __init__(self, status, data, *args):
154
+ super().__init__(data, *args)
155
+ self._status = status
156
+ self._message = data
157
+
158
+ @property
159
+ def status(self) -> str:
160
+ return self._status
161
+
162
+ def __repr__(self):
163
+ return "NodeError({self.status}, {self.message})".format(self=self)
164
+
165
+ def __str__(self):
166
+ return "NodeError({self.status}, {self.message})".format(self=self)
167
+
168
+
169
+ class ProviderError(TrainMLException):
170
+ def __init__(self, status, data, *args):
171
+ super().__init__(data, *args)
172
+ self._status = status
173
+ self._message = data
174
+
175
+ @property
176
+ def status(self) -> str:
177
+ return self._status
178
+
179
+ def __repr__(self):
180
+ return "ProviderError({self.status}, {self.message})".format(self=self)
181
+
182
+ def __str__(self):
183
+ return "ProviderError({self.status}, {self.message})".format(self=self)
184
+
185
+
186
+ class RegionError(TrainMLException):
187
+ def __init__(self, status, data, *args):
188
+ super().__init__(data, *args)
189
+ self._status = status
190
+ self._message = data
191
+
192
+ @property
193
+ def status(self) -> str:
194
+ return self._status
195
+
196
+ def __repr__(self):
197
+ return "RegionError({self.status}, {self.message})".format(self=self)
198
+
199
+ def __str__(self):
200
+ return "RegionError({self.status}, {self.message})".format(self=self)
trainml/jobs.py CHANGED
@@ -476,7 +476,6 @@ class Job:
476
476
  return
477
477
  valid_statuses = [
478
478
  "waiting for data/model download",
479
- "waiting for GPUs",
480
479
  "waiting for resources",
481
480
  "running",
482
481
  "stopped",
@@ -488,11 +487,6 @@ class Job:
488
487
  "status",
489
488
  f"Invalid wait_for status {status}. Valid statuses are: {valid_statuses}",
490
489
  )
491
- if status == "waiting for GPUs":
492
- warnings.warn(
493
- "'waiting for GPUs' status is deprecated, use 'waiting for resources' instead.",
494
- DeprecationWarning,
495
- )
496
490
  if (self.type == "training") and status == "stopped":
497
491
  warnings.warn(
498
492
  "'stopped' status is deprecated for training jobs, use 'finished' instead.",
@@ -523,12 +517,8 @@ class Job:
523
517
  self.status == status
524
518
  or (
525
519
  status
526
- in [
527
- "waiting for GPUs",
528
- "waiting for resources",
529
- ] ## this status could be very short and the polling could miss it
530
- and self.status
531
- not in ["new", "waiting for GPUs", "waiting for resources"]
520
+ == "waiting for resources" ## this status could be very short and the polling could miss it
521
+ and self.status not in ["new", "waiting for resources"]
532
522
  )
533
523
  or (
534
524
  status
@@ -536,7 +526,6 @@ class Job:
536
526
  and self.status
537
527
  not in [
538
528
  "new",
539
- "waiting for GPUs",
540
529
  "waiting for resources",
541
530
  "waiting for data/model download",
542
531
  ]
trainml/models.py CHANGED
@@ -26,13 +26,24 @@ class Models(object):
26
26
  models = [Model(self.trainml, **model) for model in resp]
27
27
  return models
28
28
 
29
- async def create(self, name, source_type, source_uri, **kwargs):
29
+ async def create(
30
+ self,
31
+ name,
32
+ source_type,
33
+ source_uri,
34
+ type="evefs",
35
+ project_uuid=None,
36
+ **kwargs,
37
+ ):
38
+ if not project_uuid:
39
+ project_uuid = self.trainml.active_project
30
40
  data = dict(
31
41
  name=name,
32
42
  source_type=source_type,
33
43
  source_uri=source_uri,
34
- source_options=kwargs.get("source_options"),
35
- project_uuid=kwargs.get("project_uuid") or self.trainml.active_project,
44
+ project_uuid=project_uuid,
45
+ type=type,
46
+ **kwargs,
36
47
  )
37
48
  payload = {k: v for k, v in data.items() if v is not None}
38
49
  logging.info(f"Creating Model {name}")
@@ -0,0 +1,3 @@
1
+ from .projects import Projects, Project
2
+
3
+ __all__ = ["Projects", "Project"]