zenml-nightly 0.61.0.dev20240711__py3-none-any.whl → 0.61.0.dev20240713__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. zenml/VERSION +1 -1
  2. zenml/cli/stack.py +220 -71
  3. zenml/constants.py +6 -0
  4. zenml/enums.py +16 -0
  5. zenml/integrations/gcp/service_connectors/gcp_service_connector.py +54 -6
  6. zenml/logging/step_logging.py +34 -35
  7. zenml/models/__init__.py +2 -0
  8. zenml/models/v2/core/server_settings.py +0 -20
  9. zenml/models/v2/misc/stack_deployment.py +20 -0
  10. zenml/orchestrators/step_launcher.py +1 -0
  11. zenml/stack_deployments/aws_stack_deployment.py +56 -91
  12. zenml/stack_deployments/gcp_stack_deployment.py +260 -0
  13. zenml/stack_deployments/stack_deployment.py +103 -25
  14. zenml/stack_deployments/utils.py +4 -0
  15. zenml/zen_server/routers/devices_endpoints.py +4 -1
  16. zenml/zen_server/routers/server_endpoints.py +29 -2
  17. zenml/zen_server/routers/stack_deployment_endpoints.py +34 -20
  18. zenml/zen_stores/migrations/versions/b4fca5241eea_migrate_onboarding_state.py +167 -0
  19. zenml/zen_stores/rest_zen_store.py +45 -21
  20. zenml/zen_stores/schemas/server_settings_schemas.py +23 -11
  21. zenml/zen_stores/sql_zen_store.py +117 -19
  22. zenml/zen_stores/zen_store_interface.py +6 -5
  23. {zenml_nightly-0.61.0.dev20240711.dist-info → zenml_nightly-0.61.0.dev20240713.dist-info}/METADATA +1 -1
  24. {zenml_nightly-0.61.0.dev20240711.dist-info → zenml_nightly-0.61.0.dev20240713.dist-info}/RECORD +27 -25
  25. {zenml_nightly-0.61.0.dev20240711.dist-info → zenml_nightly-0.61.0.dev20240713.dist-info}/LICENSE +0 -0
  26. {zenml_nightly-0.61.0.dev20240711.dist-info → zenml_nightly-0.61.0.dev20240713.dist-info}/WHEEL +0 -0
  27. {zenml_nightly-0.61.0.dev20240711.dist-info → zenml_nightly-0.61.0.dev20240713.dist-info}/entry_points.txt +0 -0
@@ -240,8 +240,6 @@ class StepLogsStorage:
240
240
 
241
241
  # Immutable filesystems state
242
242
  self.last_merge_time = time.time()
243
- self.log_files_not_merged: List[str] = []
244
- self.next_merged_file_name: str = self._get_timestamped_filename()
245
243
 
246
244
  @property
247
245
  def artifact_store(self) -> "BaseArtifactStore":
@@ -279,13 +277,16 @@ class StepLogsStorage:
279
277
  or time.time() - self.last_save_time >= self.time_interval
280
278
  )
281
279
 
282
- def _get_timestamped_filename(self) -> str:
280
+ def _get_timestamped_filename(self, suffix: str = "") -> str:
283
281
  """Returns a timestamped filename.
284
282
 
283
+ Args:
284
+ suffix: optional suffix for the file name
285
+
285
286
  Returns:
286
287
  The timestamped filename.
287
288
  """
288
- return f"{time.time()}{LOGS_EXTENSION}"
289
+ return f"{time.time()}{suffix}{LOGS_EXTENSION}"
289
290
 
290
291
  def save_to_file(self, force: bool = False) -> None:
291
292
  """Method to save the buffer to the given URI.
@@ -302,12 +303,7 @@ class StepLogsStorage:
302
303
  try:
303
304
  if self.buffer:
304
305
  if self.artifact_store.config.IS_IMMUTABLE_FILESYSTEM:
305
- if not self.log_files_not_merged:
306
- self.next_merged_file_name = (
307
- self._get_timestamped_filename()
308
- )
309
306
  _logs_uri = self._get_timestamped_filename()
310
- self.log_files_not_merged.append(_logs_uri)
311
307
  with self.artifact_store.open(
312
308
  os.path.join(
313
309
  self.logs_uri,
@@ -346,42 +342,40 @@ class StepLogsStorage:
346
342
  and time.time() - self.last_merge_time > self.merge_files_interval
347
343
  ):
348
344
  try:
349
- self.merge_log_files(
350
- self.next_merged_file_name, self.log_files_not_merged
351
- )
345
+ self.merge_log_files()
352
346
  except (OSError, IOError) as e:
353
347
  logger.error(f"Error while trying to roll up logs: {e}")
354
- else:
355
- self.log_files_not_merged = []
356
348
  finally:
357
349
  self.last_merge_time = time.time()
358
350
 
359
- def merge_log_files(
360
- self,
361
- file_name: Optional[str] = None,
362
- files: Optional[List[str]] = None,
363
- ) -> None:
351
+ def merge_log_files(self, merge_all_files: bool = False) -> None:
364
352
  """Merges all log files into one in the given URI.
365
353
 
366
354
  Called on the logging context exit.
367
355
 
368
356
  Args:
369
- file_name: The name of the merged log file.
370
- files: The list of log files to merge.
357
+ merge_all_files: whether to merge all files or only raw files
371
358
  """
372
359
  if self.artifact_store.config.IS_IMMUTABLE_FILESYSTEM:
373
- files_ = files or self.artifact_store.listdir(self.logs_uri)
374
- file_name_ = file_name or self._get_timestamped_filename()
360
+ merged_file_suffix = "_merged"
361
+ files_ = self.artifact_store.listdir(self.logs_uri)
362
+ if not merge_all_files:
363
+ # already merged files will not be merged again
364
+ files_ = [f for f in files_ if merged_file_suffix not in f]
365
+ file_name_ = self._get_timestamped_filename(
366
+ suffix=merged_file_suffix
367
+ )
375
368
  if len(files_) > 1:
376
369
  files_.sort()
377
370
  logger.debug("Log files count: %s", len(files_))
378
371
 
379
- try:
380
- # dump all logs to a local file first
381
- with self.artifact_store.open(
382
- os.path.join(self.logs_uri, file_name_), "w"
383
- ) as merged_file:
384
- for file in files_:
372
+ missing_files = set()
373
+ # dump all logs to a local file first
374
+ with self.artifact_store.open(
375
+ os.path.join(self.logs_uri, file_name_), "w"
376
+ ) as merged_file:
377
+ for file in files_:
378
+ try:
385
379
  merged_file.write(
386
380
  str(
387
381
  _load_file_from_artifact_store(
@@ -391,11 +385,12 @@ class StepLogsStorage:
391
385
  )
392
386
  )
393
387
  )
394
- except Exception as e:
395
- logger.warning(f"Failed to merge log files. {e}")
396
- else:
397
- # clean up left over files
398
- for file in files_:
388
+ except DoesNotExistException:
389
+ missing_files.add(file)
390
+
391
+ # clean up left over files
392
+ for file in files_:
393
+ if file not in missing_files:
399
394
  self.artifact_store.remove(
400
395
  os.path.join(self.logs_uri, str(file))
401
396
  )
@@ -452,7 +447,6 @@ class StepLogsStorageContext:
452
447
  Restores the `write` method of both stderr and stdout.
453
448
  """
454
449
  self.storage.save_to_file(force=True)
455
- self.storage.merge_log_files()
456
450
 
457
451
  setattr(sys.stdout, "write", self.stdout_write)
458
452
  setattr(sys.stdout, "flush", self.stdout_flush)
@@ -462,6 +456,11 @@ class StepLogsStorageContext:
462
456
 
463
457
  redirected.set(False)
464
458
 
459
+ try:
460
+ self.storage.merge_log_files(merge_all_files=True)
461
+ except (OSError, IOError) as e:
462
+ logger.warning(f"Step logs roll-up failed: {e}")
463
+
465
464
  def _wrap_write(self, method: Callable[..., Any]) -> Callable[..., Any]:
466
465
  """Wrapper function that utilizes the storage object to store logs.
467
466
 
zenml/models/__init__.py CHANGED
@@ -389,6 +389,7 @@ from zenml.models.v2.core.server_settings import (
389
389
  )
390
390
  from zenml.models.v2.misc.stack_deployment import (
391
391
  DeployedStack,
392
+ StackDeploymentConfig,
392
393
  StackDeploymentInfo,
393
394
  )
394
395
 
@@ -726,6 +727,7 @@ __all__ = [
726
727
  "ServerModel",
727
728
  "ServerDatabaseType",
728
729
  "ServerDeploymentType",
730
+ "StackDeploymentConfig",
729
731
  "StackDeploymentInfo",
730
732
  "OAuthDeviceAuthorizationRequest",
731
733
  "OAuthDeviceAuthorizationResponse",
@@ -15,8 +15,6 @@
15
15
 
16
16
  from datetime import datetime
17
17
  from typing import (
18
- Any,
19
- Dict,
20
18
  Optional,
21
19
  )
22
20
  from uuid import UUID
@@ -57,10 +55,6 @@ class ServerSettingsUpdate(BaseZenModel):
57
55
  default=None,
58
56
  title="Whether to display notifications about ZenML updates in the dashboard.",
59
57
  )
60
- onboarding_state: Optional[Dict[str, Any]] = Field(
61
- default=None,
62
- title="The server's onboarding state.",
63
- )
64
58
 
65
59
 
66
60
  # ------------------ Response Model ------------------
@@ -96,11 +90,6 @@ class ServerSettingsResponseBody(BaseResponseBody):
96
90
  class ServerSettingsResponseMetadata(BaseResponseMetadata):
97
91
  """Response metadata for server settings."""
98
92
 
99
- onboarding_state: Dict[str, Any] = Field(
100
- default={},
101
- title="The server's onboarding state.",
102
- )
103
-
104
93
 
105
94
  class ServerSettingsResponseResources(BaseResponseResources):
106
95
  """Response resources for server settings."""
@@ -199,15 +188,6 @@ class ServerSettingsResponse(
199
188
  """
200
189
  return self.get_body().updated
201
190
 
202
- @property
203
- def onboarding_state(self) -> Dict[str, Any]:
204
- """The `onboarding_state` property.
205
-
206
- Returns:
207
- the value of the property.
208
- """
209
- return self.get_metadata().onboarding_state
210
-
211
191
 
212
192
  # ------------------ Filter Model ------------------
213
193
 
@@ -40,6 +40,11 @@ class StackDeploymentInfo(BaseModel):
40
40
  title="The instructions for post-deployment.",
41
41
  description="The instructions for post-deployment.",
42
42
  )
43
+ integrations: List[str] = Field(
44
+ title="ZenML integrations required for the stack.",
45
+ description="The list of ZenML integrations that need to be installed "
46
+ "for the stack to be usable.",
47
+ )
43
48
  permissions: Dict[str, List[str]] = Field(
44
49
  title="The permissions granted to ZenML to access the cloud resources.",
45
50
  description="The permissions granted to ZenML to access the cloud "
@@ -52,6 +57,21 @@ class StackDeploymentInfo(BaseModel):
52
57
  )
53
58
 
54
59
 
60
+ class StackDeploymentConfig(BaseModel):
61
+ """Configuration about a stack deployment."""
62
+
63
+ deployment_url: str = Field(
64
+ title="The cloud provider console URL where the stack will be deployed.",
65
+ )
66
+ deployment_url_text: str = Field(
67
+ title="A textual description for the cloud provider console URL.",
68
+ )
69
+ configuration: Optional[str] = Field(
70
+ title="Configuration for the stack deployment that the user must "
71
+ "manually configure into the cloud provider console.",
72
+ )
73
+
74
+
55
75
  class DeployedStack(BaseModel):
56
76
  """Information about a deployed stack."""
57
77
 
@@ -291,6 +291,7 @@ class StepLauncher:
291
291
  logger.error(
292
292
  f"Failed to run step `{self._step_name}` after {max_retries} retries. Exiting."
293
293
  )
294
+ logger.exception(e)
294
295
  publish_utils.publish_failed_step_run(
295
296
  step_run_response.id
296
297
  )
@@ -11,16 +11,12 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
12
  # or implied. See the License for the specific language governing
13
13
  # permissions and limitations under the License.
14
- """Functionality to deploy a ZenML stack to a cloud provider."""
14
+ """Functionality to deploy a ZenML stack to AWS."""
15
15
 
16
- import datetime
17
- from typing import ClassVar, Dict, List, Optional, Tuple
16
+ from typing import ClassVar, Dict, List
18
17
 
19
- from zenml.client import Client
20
- from zenml.enums import StackComponentType, StackDeploymentProvider
21
- from zenml.models import (
22
- DeployedStack,
23
- )
18
+ from zenml.enums import StackDeploymentProvider
19
+ from zenml.models import StackDeploymentConfig
24
20
  from zenml.stack_deployments.stack_deployment import ZenMLCloudStackDeployment
25
21
  from zenml.utils.string_utils import random_str
26
22
 
@@ -31,6 +27,7 @@ class AWSZenMLCloudStackDeployment(ZenMLCloudStackDeployment):
31
27
  """AWS ZenML Cloud Stack Deployment."""
32
28
 
33
29
  provider: ClassVar[StackDeploymentProvider] = StackDeploymentProvider.AWS
30
+ deployment: ClassVar[str] = AWS_DEPLOYMENT_TYPE
34
31
 
35
32
  @classmethod
36
33
  def description(cls) -> str:
@@ -65,9 +62,6 @@ to log into your AWS account and create a CloudFormation ZenML stack. The stack
65
62
  parameters will be pre-filled with the necessary information to connect ZenML to
66
63
  your AWS account, so you should only need to review and confirm the stack.
67
64
 
68
- After the CloudFormation stack is deployed, you can return to the CLI to view
69
- details about the associated ZenML stack automatically registered with ZenML.
70
-
71
65
  **NOTE**: The CloudFormation stack will create the following new resources in
72
66
  your AWS account. Please ensure you have the necessary permissions and are aware
73
67
  of any potential costs:
@@ -84,6 +78,18 @@ The CloudFormation stack will automatically create an AWS secret key and
84
78
  will share it with ZenML to give it permission to access the resources created
85
79
  by the stack. You can revoke these permissions at any time by deleting the
86
80
  CloudFormation stack.
81
+
82
+ **Estimated costs**
83
+
84
+ A small training job would cost around: $0.60
85
+
86
+ These are rough estimates and actual costs may vary based on your usage and specific AWS pricing.
87
+ Some services may be eligible for the AWS Free Tier. Use [the AWS Pricing Calculator](https://calculator.aws)
88
+ for a detailed estimate based on your usage.
89
+
90
+ 💡 **After the CloudFormation stack is deployed, you can return to the CLI to
91
+ view details about the associated ZenML stack automatically registered with
92
+ ZenML.**
87
93
  """
88
94
 
89
95
  @classmethod
@@ -103,6 +109,19 @@ to clean up the resources created by the stack by using the AWS CloudFormation
103
109
  console.
104
110
  """
105
111
 
112
+ @classmethod
113
+ def integrations(cls) -> List[str]:
114
+ """Return the ZenML integrations required for the stack.
115
+
116
+ Returns:
117
+ The list of ZenML integrations that need to be installed for the
118
+ stack to be usable.
119
+ """
120
+ return [
121
+ "aws",
122
+ "s3",
123
+ ]
124
+
106
125
  @classmethod
107
126
  def permissions(cls) -> Dict[str, List[str]]:
108
127
  """Return the permissions granted to ZenML to access the cloud resources.
@@ -187,32 +206,34 @@ console.
187
206
  "South America (São Paulo)": "sa-east-1",
188
207
  }
189
208
 
190
- def deploy_url(
209
+ def get_deployment_config(
191
210
  self,
192
- zenml_server_url: str,
193
- zenml_server_api_token: str,
194
- ) -> Tuple[str, str]:
195
- """Return the URL to deploy the ZenML stack to the specified cloud provider.
196
-
197
- The URL should point to a cloud provider console where the user can
198
- deploy the ZenML stack and should include as many pre-filled parameters
199
- as possible.
200
-
201
- Args:
202
- zenml_server_url: The URL of the ZenML server.
203
- zenml_server_api_token: The API token to authenticate with the ZenML
204
- server.
211
+ ) -> StackDeploymentConfig:
212
+ """Return the configuration to deploy the ZenML stack to the specified cloud provider.
213
+
214
+ The configuration should include:
215
+
216
+ * a cloud provider console URL where the user will be redirected to
217
+ deploy the ZenML stack. The URL should include as many pre-filled
218
+ URL query parameters as possible.
219
+ * a textual description of the URL
220
+ * some deployment providers may require additional configuration
221
+ parameters to be passed to the cloud provider in addition to the
222
+ deployment URL query parameters. Where that is the case, this method
223
+ should also return a string that the user can copy and paste into the
224
+ cloud provider console to deploy the ZenML stack (e.g. a set of
225
+ environment variables, or YAML configuration snippet etc.).
205
226
 
206
227
  Returns:
207
- The URL to deploy the ZenML stack to the specified cloud provider
208
- and a text description of the URL.
228
+ The configuration to deploy the ZenML stack to the specified cloud
229
+ provider.
209
230
  """
210
231
  params = dict(
211
232
  stackName=self.stack_name,
212
233
  templateURL="https://zenml-cf-templates.s3.eu-central-1.amazonaws.com/aws-ecr-s3-sagemaker.yaml",
213
234
  param_ResourceName=f"zenml-{random_str(6).lower()}",
214
- param_ZenMLServerURL=zenml_server_url,
215
- param_ZenMLServerAPIToken=zenml_server_api_token,
235
+ param_ZenMLServerURL=self.zenml_server_url,
236
+ param_ZenMLServerAPIToken=self.zenml_server_api_token,
216
237
  )
217
238
  # Encode the parameters as URL query parameters
218
239
  query_params = "&".join([f"{k}={v}" for k, v in params.items()])
@@ -221,69 +242,13 @@ console.
221
242
  if self.location:
222
243
  region = f"region={self.location}"
223
244
 
224
- return (
245
+ url = (
225
246
  f"https://console.aws.amazon.com/cloudformation/home?"
226
- f"{region}#/stacks/create/review?{query_params}",
227
- "AWS CloudFormation Console",
247
+ f"{region}#/stacks/create/review?{query_params}"
228
248
  )
229
249
 
230
- def get_stack(
231
- self, date_start: Optional[datetime.datetime] = None
232
- ) -> Optional[DeployedStack]:
233
- """Return the ZenML stack that was deployed and registered.
234
-
235
- This method is called to retrieve a ZenML stack matching the deployment
236
- provider.
237
-
238
- Args:
239
- date_start: The date when the deployment started.
240
-
241
- Returns:
242
- The ZenML stack that was deployed and registered or None if a
243
- matching stack was not found.
244
- """
245
- client = Client()
246
-
247
- # It's difficult to find a stack that matches the CloudFormation
248
- # deployment 100% because the user can change the stack name before they
249
- # deploy the stack in AWS.
250
- #
251
- # We try to find a full AWS stack that matches the deployment provider
252
- # that was registered after this deployment was created.
253
-
254
- # Get all stacks created after the start date
255
- stacks = client.list_stacks(
256
- created=f"gt:{str(date_start.replace(microsecond=0))}"
257
- if date_start
258
- else None,
259
- sort_by="desc:created",
260
- size=50,
250
+ return StackDeploymentConfig(
251
+ deployment_url=url,
252
+ deployment_url_text="AWS CloudFormation Console",
253
+ configuration=None,
261
254
  )
262
-
263
- if not stacks.items:
264
- return None
265
-
266
- # Find a stack that best matches the deployment provider
267
- for stack in stacks.items:
268
- if not stack.labels:
269
- continue
270
-
271
- if stack.labels.get("zenml:provider") != self.provider.value:
272
- continue
273
-
274
- if stack.labels.get("zenml:deployment") != AWS_DEPLOYMENT_TYPE:
275
- continue
276
-
277
- artifact_store = stack.components[
278
- StackComponentType.ARTIFACT_STORE
279
- ][0]
280
-
281
- if not artifact_store.connector:
282
- continue
283
-
284
- return DeployedStack(
285
- stack=stack,
286
- service_connector=artifact_store.connector,
287
- )
288
-
289
- return None