zenml-nightly 0.61.0.dev20240711__py3-none-any.whl → 0.61.0.dev20240713__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zenml/VERSION +1 -1
- zenml/cli/stack.py +220 -71
- zenml/constants.py +6 -0
- zenml/enums.py +16 -0
- zenml/integrations/gcp/service_connectors/gcp_service_connector.py +54 -6
- zenml/logging/step_logging.py +34 -35
- zenml/models/__init__.py +2 -0
- zenml/models/v2/core/server_settings.py +0 -20
- zenml/models/v2/misc/stack_deployment.py +20 -0
- zenml/orchestrators/step_launcher.py +1 -0
- zenml/stack_deployments/aws_stack_deployment.py +56 -91
- zenml/stack_deployments/gcp_stack_deployment.py +260 -0
- zenml/stack_deployments/stack_deployment.py +103 -25
- zenml/stack_deployments/utils.py +4 -0
- zenml/zen_server/routers/devices_endpoints.py +4 -1
- zenml/zen_server/routers/server_endpoints.py +29 -2
- zenml/zen_server/routers/stack_deployment_endpoints.py +34 -20
- zenml/zen_stores/migrations/versions/b4fca5241eea_migrate_onboarding_state.py +167 -0
- zenml/zen_stores/rest_zen_store.py +45 -21
- zenml/zen_stores/schemas/server_settings_schemas.py +23 -11
- zenml/zen_stores/sql_zen_store.py +117 -19
- zenml/zen_stores/zen_store_interface.py +6 -5
- {zenml_nightly-0.61.0.dev20240711.dist-info → zenml_nightly-0.61.0.dev20240713.dist-info}/METADATA +1 -1
- {zenml_nightly-0.61.0.dev20240711.dist-info → zenml_nightly-0.61.0.dev20240713.dist-info}/RECORD +27 -25
- {zenml_nightly-0.61.0.dev20240711.dist-info → zenml_nightly-0.61.0.dev20240713.dist-info}/LICENSE +0 -0
- {zenml_nightly-0.61.0.dev20240711.dist-info → zenml_nightly-0.61.0.dev20240713.dist-info}/WHEEL +0 -0
- {zenml_nightly-0.61.0.dev20240711.dist-info → zenml_nightly-0.61.0.dev20240713.dist-info}/entry_points.txt +0 -0
zenml/logging/step_logging.py
CHANGED
@@ -240,8 +240,6 @@ class StepLogsStorage:
|
|
240
240
|
|
241
241
|
# Immutable filesystems state
|
242
242
|
self.last_merge_time = time.time()
|
243
|
-
self.log_files_not_merged: List[str] = []
|
244
|
-
self.next_merged_file_name: str = self._get_timestamped_filename()
|
245
243
|
|
246
244
|
@property
|
247
245
|
def artifact_store(self) -> "BaseArtifactStore":
|
@@ -279,13 +277,16 @@ class StepLogsStorage:
|
|
279
277
|
or time.time() - self.last_save_time >= self.time_interval
|
280
278
|
)
|
281
279
|
|
282
|
-
def _get_timestamped_filename(self) -> str:
|
280
|
+
def _get_timestamped_filename(self, suffix: str = "") -> str:
|
283
281
|
"""Returns a timestamped filename.
|
284
282
|
|
283
|
+
Args:
|
284
|
+
suffix: optional suffix for the file name
|
285
|
+
|
285
286
|
Returns:
|
286
287
|
The timestamped filename.
|
287
288
|
"""
|
288
|
-
return f"{time.time()}{LOGS_EXTENSION}"
|
289
|
+
return f"{time.time()}{suffix}{LOGS_EXTENSION}"
|
289
290
|
|
290
291
|
def save_to_file(self, force: bool = False) -> None:
|
291
292
|
"""Method to save the buffer to the given URI.
|
@@ -302,12 +303,7 @@ class StepLogsStorage:
|
|
302
303
|
try:
|
303
304
|
if self.buffer:
|
304
305
|
if self.artifact_store.config.IS_IMMUTABLE_FILESYSTEM:
|
305
|
-
if not self.log_files_not_merged:
|
306
|
-
self.next_merged_file_name = (
|
307
|
-
self._get_timestamped_filename()
|
308
|
-
)
|
309
306
|
_logs_uri = self._get_timestamped_filename()
|
310
|
-
self.log_files_not_merged.append(_logs_uri)
|
311
307
|
with self.artifact_store.open(
|
312
308
|
os.path.join(
|
313
309
|
self.logs_uri,
|
@@ -346,42 +342,40 @@ class StepLogsStorage:
|
|
346
342
|
and time.time() - self.last_merge_time > self.merge_files_interval
|
347
343
|
):
|
348
344
|
try:
|
349
|
-
self.merge_log_files(
|
350
|
-
self.next_merged_file_name, self.log_files_not_merged
|
351
|
-
)
|
345
|
+
self.merge_log_files()
|
352
346
|
except (OSError, IOError) as e:
|
353
347
|
logger.error(f"Error while trying to roll up logs: {e}")
|
354
|
-
else:
|
355
|
-
self.log_files_not_merged = []
|
356
348
|
finally:
|
357
349
|
self.last_merge_time = time.time()
|
358
350
|
|
359
|
-
def merge_log_files(
|
360
|
-
self,
|
361
|
-
file_name: Optional[str] = None,
|
362
|
-
files: Optional[List[str]] = None,
|
363
|
-
) -> None:
|
351
|
+
def merge_log_files(self, merge_all_files: bool = False) -> None:
|
364
352
|
"""Merges all log files into one in the given URI.
|
365
353
|
|
366
354
|
Called on the logging context exit.
|
367
355
|
|
368
356
|
Args:
|
369
|
-
|
370
|
-
files: The list of log files to merge.
|
357
|
+
merge_all_files: whether to merge all files or only raw files
|
371
358
|
"""
|
372
359
|
if self.artifact_store.config.IS_IMMUTABLE_FILESYSTEM:
|
373
|
-
|
374
|
-
|
360
|
+
merged_file_suffix = "_merged"
|
361
|
+
files_ = self.artifact_store.listdir(self.logs_uri)
|
362
|
+
if not merge_all_files:
|
363
|
+
# already merged files will not be merged again
|
364
|
+
files_ = [f for f in files_ if merged_file_suffix not in f]
|
365
|
+
file_name_ = self._get_timestamped_filename(
|
366
|
+
suffix=merged_file_suffix
|
367
|
+
)
|
375
368
|
if len(files_) > 1:
|
376
369
|
files_.sort()
|
377
370
|
logger.debug("Log files count: %s", len(files_))
|
378
371
|
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
372
|
+
missing_files = set()
|
373
|
+
# dump all logs to a local file first
|
374
|
+
with self.artifact_store.open(
|
375
|
+
os.path.join(self.logs_uri, file_name_), "w"
|
376
|
+
) as merged_file:
|
377
|
+
for file in files_:
|
378
|
+
try:
|
385
379
|
merged_file.write(
|
386
380
|
str(
|
387
381
|
_load_file_from_artifact_store(
|
@@ -391,11 +385,12 @@ class StepLogsStorage:
|
|
391
385
|
)
|
392
386
|
)
|
393
387
|
)
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
388
|
+
except DoesNotExistException:
|
389
|
+
missing_files.add(file)
|
390
|
+
|
391
|
+
# clean up left over files
|
392
|
+
for file in files_:
|
393
|
+
if file not in missing_files:
|
399
394
|
self.artifact_store.remove(
|
400
395
|
os.path.join(self.logs_uri, str(file))
|
401
396
|
)
|
@@ -452,7 +447,6 @@ class StepLogsStorageContext:
|
|
452
447
|
Restores the `write` method of both stderr and stdout.
|
453
448
|
"""
|
454
449
|
self.storage.save_to_file(force=True)
|
455
|
-
self.storage.merge_log_files()
|
456
450
|
|
457
451
|
setattr(sys.stdout, "write", self.stdout_write)
|
458
452
|
setattr(sys.stdout, "flush", self.stdout_flush)
|
@@ -462,6 +456,11 @@ class StepLogsStorageContext:
|
|
462
456
|
|
463
457
|
redirected.set(False)
|
464
458
|
|
459
|
+
try:
|
460
|
+
self.storage.merge_log_files(merge_all_files=True)
|
461
|
+
except (OSError, IOError) as e:
|
462
|
+
logger.warning(f"Step logs roll-up failed: {e}")
|
463
|
+
|
465
464
|
def _wrap_write(self, method: Callable[..., Any]) -> Callable[..., Any]:
|
466
465
|
"""Wrapper function that utilizes the storage object to store logs.
|
467
466
|
|
zenml/models/__init__.py
CHANGED
@@ -389,6 +389,7 @@ from zenml.models.v2.core.server_settings import (
|
|
389
389
|
)
|
390
390
|
from zenml.models.v2.misc.stack_deployment import (
|
391
391
|
DeployedStack,
|
392
|
+
StackDeploymentConfig,
|
392
393
|
StackDeploymentInfo,
|
393
394
|
)
|
394
395
|
|
@@ -726,6 +727,7 @@ __all__ = [
|
|
726
727
|
"ServerModel",
|
727
728
|
"ServerDatabaseType",
|
728
729
|
"ServerDeploymentType",
|
730
|
+
"StackDeploymentConfig",
|
729
731
|
"StackDeploymentInfo",
|
730
732
|
"OAuthDeviceAuthorizationRequest",
|
731
733
|
"OAuthDeviceAuthorizationResponse",
|
@@ -15,8 +15,6 @@
|
|
15
15
|
|
16
16
|
from datetime import datetime
|
17
17
|
from typing import (
|
18
|
-
Any,
|
19
|
-
Dict,
|
20
18
|
Optional,
|
21
19
|
)
|
22
20
|
from uuid import UUID
|
@@ -57,10 +55,6 @@ class ServerSettingsUpdate(BaseZenModel):
|
|
57
55
|
default=None,
|
58
56
|
title="Whether to display notifications about ZenML updates in the dashboard.",
|
59
57
|
)
|
60
|
-
onboarding_state: Optional[Dict[str, Any]] = Field(
|
61
|
-
default=None,
|
62
|
-
title="The server's onboarding state.",
|
63
|
-
)
|
64
58
|
|
65
59
|
|
66
60
|
# ------------------ Response Model ------------------
|
@@ -96,11 +90,6 @@ class ServerSettingsResponseBody(BaseResponseBody):
|
|
96
90
|
class ServerSettingsResponseMetadata(BaseResponseMetadata):
|
97
91
|
"""Response metadata for server settings."""
|
98
92
|
|
99
|
-
onboarding_state: Dict[str, Any] = Field(
|
100
|
-
default={},
|
101
|
-
title="The server's onboarding state.",
|
102
|
-
)
|
103
|
-
|
104
93
|
|
105
94
|
class ServerSettingsResponseResources(BaseResponseResources):
|
106
95
|
"""Response resources for server settings."""
|
@@ -199,15 +188,6 @@ class ServerSettingsResponse(
|
|
199
188
|
"""
|
200
189
|
return self.get_body().updated
|
201
190
|
|
202
|
-
@property
|
203
|
-
def onboarding_state(self) -> Dict[str, Any]:
|
204
|
-
"""The `onboarding_state` property.
|
205
|
-
|
206
|
-
Returns:
|
207
|
-
the value of the property.
|
208
|
-
"""
|
209
|
-
return self.get_metadata().onboarding_state
|
210
|
-
|
211
191
|
|
212
192
|
# ------------------ Filter Model ------------------
|
213
193
|
|
@@ -40,6 +40,11 @@ class StackDeploymentInfo(BaseModel):
|
|
40
40
|
title="The instructions for post-deployment.",
|
41
41
|
description="The instructions for post-deployment.",
|
42
42
|
)
|
43
|
+
integrations: List[str] = Field(
|
44
|
+
title="ZenML integrations required for the stack.",
|
45
|
+
description="The list of ZenML integrations that need to be installed "
|
46
|
+
"for the stack to be usable.",
|
47
|
+
)
|
43
48
|
permissions: Dict[str, List[str]] = Field(
|
44
49
|
title="The permissions granted to ZenML to access the cloud resources.",
|
45
50
|
description="The permissions granted to ZenML to access the cloud "
|
@@ -52,6 +57,21 @@ class StackDeploymentInfo(BaseModel):
|
|
52
57
|
)
|
53
58
|
|
54
59
|
|
60
|
+
class StackDeploymentConfig(BaseModel):
|
61
|
+
"""Configuration about a stack deployment."""
|
62
|
+
|
63
|
+
deployment_url: str = Field(
|
64
|
+
title="The cloud provider console URL where the stack will be deployed.",
|
65
|
+
)
|
66
|
+
deployment_url_text: str = Field(
|
67
|
+
title="A textual description for the cloud provider console URL.",
|
68
|
+
)
|
69
|
+
configuration: Optional[str] = Field(
|
70
|
+
title="Configuration for the stack deployment that the user must "
|
71
|
+
"manually configure into the cloud provider console.",
|
72
|
+
)
|
73
|
+
|
74
|
+
|
55
75
|
class DeployedStack(BaseModel):
|
56
76
|
"""Information about a deployed stack."""
|
57
77
|
|
@@ -11,16 +11,12 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
12
12
|
# or implied. See the License for the specific language governing
|
13
13
|
# permissions and limitations under the License.
|
14
|
-
"""Functionality to deploy a ZenML stack to
|
14
|
+
"""Functionality to deploy a ZenML stack to AWS."""
|
15
15
|
|
16
|
-
import
|
17
|
-
from typing import ClassVar, Dict, List, Optional, Tuple
|
16
|
+
from typing import ClassVar, Dict, List
|
18
17
|
|
19
|
-
from zenml.
|
20
|
-
from zenml.
|
21
|
-
from zenml.models import (
|
22
|
-
DeployedStack,
|
23
|
-
)
|
18
|
+
from zenml.enums import StackDeploymentProvider
|
19
|
+
from zenml.models import StackDeploymentConfig
|
24
20
|
from zenml.stack_deployments.stack_deployment import ZenMLCloudStackDeployment
|
25
21
|
from zenml.utils.string_utils import random_str
|
26
22
|
|
@@ -31,6 +27,7 @@ class AWSZenMLCloudStackDeployment(ZenMLCloudStackDeployment):
|
|
31
27
|
"""AWS ZenML Cloud Stack Deployment."""
|
32
28
|
|
33
29
|
provider: ClassVar[StackDeploymentProvider] = StackDeploymentProvider.AWS
|
30
|
+
deployment: ClassVar[str] = AWS_DEPLOYMENT_TYPE
|
34
31
|
|
35
32
|
@classmethod
|
36
33
|
def description(cls) -> str:
|
@@ -65,9 +62,6 @@ to log into your AWS account and create a CloudFormation ZenML stack. The stack
|
|
65
62
|
parameters will be pre-filled with the necessary information to connect ZenML to
|
66
63
|
your AWS account, so you should only need to review and confirm the stack.
|
67
64
|
|
68
|
-
After the CloudFormation stack is deployed, you can return to the CLI to view
|
69
|
-
details about the associated ZenML stack automatically registered with ZenML.
|
70
|
-
|
71
65
|
**NOTE**: The CloudFormation stack will create the following new resources in
|
72
66
|
your AWS account. Please ensure you have the necessary permissions and are aware
|
73
67
|
of any potential costs:
|
@@ -84,6 +78,18 @@ The CloudFormation stack will automatically create an AWS secret key and
|
|
84
78
|
will share it with ZenML to give it permission to access the resources created
|
85
79
|
by the stack. You can revoke these permissions at any time by deleting the
|
86
80
|
CloudFormation stack.
|
81
|
+
|
82
|
+
**Estimated costs**
|
83
|
+
|
84
|
+
A small training job would cost around: $0.60
|
85
|
+
|
86
|
+
These are rough estimates and actual costs may vary based on your usage and specific AWS pricing.
|
87
|
+
Some services may be eligible for the AWS Free Tier. Use [the AWS Pricing Calculator](https://calculator.aws)
|
88
|
+
for a detailed estimate based on your usage.
|
89
|
+
|
90
|
+
💡 **After the CloudFormation stack is deployed, you can return to the CLI to
|
91
|
+
view details about the associated ZenML stack automatically registered with
|
92
|
+
ZenML.**
|
87
93
|
"""
|
88
94
|
|
89
95
|
@classmethod
|
@@ -103,6 +109,19 @@ to clean up the resources created by the stack by using the AWS CloudFormation
|
|
103
109
|
console.
|
104
110
|
"""
|
105
111
|
|
112
|
+
@classmethod
|
113
|
+
def integrations(cls) -> List[str]:
|
114
|
+
"""Return the ZenML integrations required for the stack.
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
The list of ZenML integrations that need to be installed for the
|
118
|
+
stack to be usable.
|
119
|
+
"""
|
120
|
+
return [
|
121
|
+
"aws",
|
122
|
+
"s3",
|
123
|
+
]
|
124
|
+
|
106
125
|
@classmethod
|
107
126
|
def permissions(cls) -> Dict[str, List[str]]:
|
108
127
|
"""Return the permissions granted to ZenML to access the cloud resources.
|
@@ -187,32 +206,34 @@ console.
|
|
187
206
|
"South America (São Paulo)": "sa-east-1",
|
188
207
|
}
|
189
208
|
|
190
|
-
def
|
209
|
+
def get_deployment_config(
|
191
210
|
self,
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
deploy the ZenML stack
|
199
|
-
as possible.
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
211
|
+
) -> StackDeploymentConfig:
|
212
|
+
"""Return the configuration to deploy the ZenML stack to the specified cloud provider.
|
213
|
+
|
214
|
+
The configuration should include:
|
215
|
+
|
216
|
+
* a cloud provider console URL where the user will be redirected to
|
217
|
+
deploy the ZenML stack. The URL should include as many pre-filled
|
218
|
+
URL query parameters as possible.
|
219
|
+
* a textual description of the URL
|
220
|
+
* some deployment providers may require additional configuration
|
221
|
+
parameters to be passed to the cloud provider in addition to the
|
222
|
+
deployment URL query parameters. Where that is the case, this method
|
223
|
+
should also return a string that the user can copy and paste into the
|
224
|
+
cloud provider console to deploy the ZenML stack (e.g. a set of
|
225
|
+
environment variables, or YAML configuration snippet etc.).
|
205
226
|
|
206
227
|
Returns:
|
207
|
-
The
|
208
|
-
|
228
|
+
The configuration to deploy the ZenML stack to the specified cloud
|
229
|
+
provider.
|
209
230
|
"""
|
210
231
|
params = dict(
|
211
232
|
stackName=self.stack_name,
|
212
233
|
templateURL="https://zenml-cf-templates.s3.eu-central-1.amazonaws.com/aws-ecr-s3-sagemaker.yaml",
|
213
234
|
param_ResourceName=f"zenml-{random_str(6).lower()}",
|
214
|
-
param_ZenMLServerURL=zenml_server_url,
|
215
|
-
param_ZenMLServerAPIToken=zenml_server_api_token,
|
235
|
+
param_ZenMLServerURL=self.zenml_server_url,
|
236
|
+
param_ZenMLServerAPIToken=self.zenml_server_api_token,
|
216
237
|
)
|
217
238
|
# Encode the parameters as URL query parameters
|
218
239
|
query_params = "&".join([f"{k}={v}" for k, v in params.items()])
|
@@ -221,69 +242,13 @@ console.
|
|
221
242
|
if self.location:
|
222
243
|
region = f"region={self.location}"
|
223
244
|
|
224
|
-
|
245
|
+
url = (
|
225
246
|
f"https://console.aws.amazon.com/cloudformation/home?"
|
226
|
-
f"{region}#/stacks/create/review?{query_params}"
|
227
|
-
"AWS CloudFormation Console",
|
247
|
+
f"{region}#/stacks/create/review?{query_params}"
|
228
248
|
)
|
229
249
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
This method is called to retrieve a ZenML stack matching the deployment
|
236
|
-
provider.
|
237
|
-
|
238
|
-
Args:
|
239
|
-
date_start: The date when the deployment started.
|
240
|
-
|
241
|
-
Returns:
|
242
|
-
The ZenML stack that was deployed and registered or None if a
|
243
|
-
matching stack was not found.
|
244
|
-
"""
|
245
|
-
client = Client()
|
246
|
-
|
247
|
-
# It's difficult to find a stack that matches the CloudFormation
|
248
|
-
# deployment 100% because the user can change the stack name before they
|
249
|
-
# deploy the stack in AWS.
|
250
|
-
#
|
251
|
-
# We try to find a full AWS stack that matches the deployment provider
|
252
|
-
# that was registered after this deployment was created.
|
253
|
-
|
254
|
-
# Get all stacks created after the start date
|
255
|
-
stacks = client.list_stacks(
|
256
|
-
created=f"gt:{str(date_start.replace(microsecond=0))}"
|
257
|
-
if date_start
|
258
|
-
else None,
|
259
|
-
sort_by="desc:created",
|
260
|
-
size=50,
|
250
|
+
return StackDeploymentConfig(
|
251
|
+
deployment_url=url,
|
252
|
+
deployment_url_text="AWS CloudFormation Console",
|
253
|
+
configuration=None,
|
261
254
|
)
|
262
|
-
|
263
|
-
if not stacks.items:
|
264
|
-
return None
|
265
|
-
|
266
|
-
# Find a stack that best matches the deployment provider
|
267
|
-
for stack in stacks.items:
|
268
|
-
if not stack.labels:
|
269
|
-
continue
|
270
|
-
|
271
|
-
if stack.labels.get("zenml:provider") != self.provider.value:
|
272
|
-
continue
|
273
|
-
|
274
|
-
if stack.labels.get("zenml:deployment") != AWS_DEPLOYMENT_TYPE:
|
275
|
-
continue
|
276
|
-
|
277
|
-
artifact_store = stack.components[
|
278
|
-
StackComponentType.ARTIFACT_STORE
|
279
|
-
][0]
|
280
|
-
|
281
|
-
if not artifact_store.connector:
|
282
|
-
continue
|
283
|
-
|
284
|
-
return DeployedStack(
|
285
|
-
stack=stack,
|
286
|
-
service_connector=artifact_store.connector,
|
287
|
-
)
|
288
|
-
|
289
|
-
return None
|