zenml-nightly 0.66.0.dev20240922__py3-none-any.whl → 0.66.0.dev20240924__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. zenml/VERSION +1 -1
  2. zenml/cli/base.py +2 -2
  3. zenml/cli/utils.py +14 -11
  4. zenml/client.py +68 -3
  5. zenml/config/step_configurations.py +0 -5
  6. zenml/enums.py +2 -0
  7. zenml/integrations/aws/flavors/sagemaker_orchestrator_flavor.py +76 -7
  8. zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py +81 -43
  9. zenml/integrations/tensorboard/visualizers/tensorboard_visualizer.py +1 -1
  10. zenml/models/v2/base/filter.py +315 -149
  11. zenml/models/v2/base/scoped.py +5 -2
  12. zenml/models/v2/core/artifact_version.py +69 -8
  13. zenml/models/v2/core/model.py +43 -6
  14. zenml/models/v2/core/model_version.py +49 -1
  15. zenml/models/v2/core/model_version_artifact.py +18 -3
  16. zenml/models/v2/core/model_version_pipeline_run.py +18 -4
  17. zenml/models/v2/core/pipeline.py +108 -1
  18. zenml/models/v2/core/pipeline_run.py +110 -20
  19. zenml/models/v2/core/run_template.py +53 -1
  20. zenml/models/v2/core/stack.py +33 -5
  21. zenml/models/v2/core/step_run.py +7 -0
  22. zenml/new/pipelines/pipeline.py +4 -0
  23. zenml/utils/env_utils.py +54 -1
  24. zenml/utils/string_utils.py +50 -0
  25. zenml/zen_stores/sql_zen_store.py +1 -0
  26. {zenml_nightly-0.66.0.dev20240922.dist-info → zenml_nightly-0.66.0.dev20240924.dist-info}/METADATA +1 -1
  27. {zenml_nightly-0.66.0.dev20240922.dist-info → zenml_nightly-0.66.0.dev20240924.dist-info}/RECORD +30 -30
  28. {zenml_nightly-0.66.0.dev20240922.dist-info → zenml_nightly-0.66.0.dev20240924.dist-info}/LICENSE +0 -0
  29. {zenml_nightly-0.66.0.dev20240922.dist-info → zenml_nightly-0.66.0.dev20240924.dist-info}/WHEEL +0 -0
  30. {zenml_nightly-0.66.0.dev20240922.dist-info → zenml_nightly-0.66.0.dev20240924.dist-info}/entry_points.txt +0 -0
@@ -25,7 +25,7 @@ from sagemaker.network import NetworkConfig
25
25
  from sagemaker.processing import ProcessingInput, ProcessingOutput
26
26
  from sagemaker.workflow.execution_variables import ExecutionVariables
27
27
  from sagemaker.workflow.pipeline import Pipeline
28
- from sagemaker.workflow.steps import ProcessingStep
28
+ from sagemaker.workflow.steps import ProcessingStep, TrainingStep
29
29
 
30
30
  from zenml.config.base_settings import BaseSettings
31
31
  from zenml.constants import (
@@ -238,47 +238,61 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
238
238
  ExecutionVariables.PIPELINE_EXECUTION_ARN
239
239
  )
240
240
 
241
- # Retrieve Processor arguments provided in the Step settings.
242
- processor_args_for_step = step_settings.processor_args or {}
241
+ use_training_step = (
242
+ step_settings.use_training_step
243
+ if step_settings.use_training_step is not None
244
+ else (
245
+ self.config.use_training_step
246
+ if self.config.use_training_step is not None
247
+ else True
248
+ )
249
+ )
250
+
251
+ # Retrieve Executor arguments provided in the Step settings.
252
+ if use_training_step:
253
+ args_for_step_executor = step_settings.estimator_args or {}
254
+ else:
255
+ args_for_step_executor = step_settings.processor_args or {}
243
256
 
244
257
  # Set default values from configured orchestrator Component to arguments
245
258
  # to be used when they are not present in processor_args.
246
- processor_args_for_step.setdefault(
247
- "instance_type", step_settings.instance_type
248
- )
249
- processor_args_for_step.setdefault(
259
+ args_for_step_executor.setdefault(
250
260
  "role",
251
- step_settings.processor_role or self.config.execution_role,
261
+ step_settings.execution_role or self.config.execution_role,
252
262
  )
253
- processor_args_for_step.setdefault(
263
+ args_for_step_executor.setdefault(
254
264
  "volume_size_in_gb", step_settings.volume_size_in_gb
255
265
  )
256
- processor_args_for_step.setdefault(
266
+ args_for_step_executor.setdefault(
257
267
  "max_runtime_in_seconds", step_settings.max_runtime_in_seconds
258
268
  )
259
- processor_args_for_step.setdefault(
269
+ tags = step_settings.tags
270
+ args_for_step_executor.setdefault(
260
271
  "tags",
261
- [
262
- {"Key": key, "Value": value}
263
- for key, value in step_settings.processor_tags.items()
264
- ]
265
- if step_settings.processor_tags
266
- else None,
272
+ (
273
+ [
274
+ {"Key": key, "Value": value}
275
+ for key, value in tags.items()
276
+ ]
277
+ if tags
278
+ else None
279
+ ),
280
+ )
281
+ args_for_step_executor.setdefault(
282
+ "instance_type", step_settings.instance_type
267
283
  )
268
284
 
269
285
  # Set values that cannot be overwritten
270
- processor_args_for_step["image_uri"] = image
271
- processor_args_for_step["instance_count"] = 1
272
- processor_args_for_step["sagemaker_session"] = session
273
- processor_args_for_step["entrypoint"] = entrypoint
274
- processor_args_for_step["base_job_name"] = orchestrator_run_name
275
- processor_args_for_step["env"] = environment
286
+ args_for_step_executor["image_uri"] = image
287
+ args_for_step_executor["instance_count"] = 1
288
+ args_for_step_executor["sagemaker_session"] = session
289
+ args_for_step_executor["base_job_name"] = orchestrator_run_name
276
290
 
277
291
  # Convert network_config to sagemaker.network.NetworkConfig if present
278
- network_config = processor_args_for_step.get("network_config")
292
+ network_config = args_for_step_executor.get("network_config")
279
293
  if network_config and isinstance(network_config, dict):
280
294
  try:
281
- processor_args_for_step["network_config"] = NetworkConfig(
295
+ args_for_step_executor["network_config"] = NetworkConfig(
282
296
  **network_config
283
297
  )
284
298
  except TypeError:
@@ -317,17 +331,21 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
317
331
 
318
332
  # Construct S3 outputs from container for step
319
333
  outputs = None
334
+ output_path = None
320
335
 
321
336
  if step_settings.output_data_s3_uri is None:
322
337
  pass
323
338
  elif isinstance(step_settings.output_data_s3_uri, str):
324
- outputs = [
325
- ProcessingOutput(
326
- source="/opt/ml/processing/output/data",
327
- destination=step_settings.output_data_s3_uri,
328
- s3_upload_mode=step_settings.output_data_s3_mode,
329
- )
330
- ]
339
+ if use_training_step:
340
+ output_path = step_settings.output_data_s3_uri
341
+ else:
342
+ outputs = [
343
+ ProcessingOutput(
344
+ source="/opt/ml/processing/output/data",
345
+ destination=step_settings.output_data_s3_uri,
346
+ s3_upload_mode=step_settings.output_data_s3_mode,
347
+ )
348
+ ]
331
349
  elif isinstance(step_settings.output_data_s3_uri, dict):
332
350
  outputs = []
333
351
  for (
@@ -342,17 +360,37 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
342
360
  )
343
361
  )
344
362
 
345
- # Create Processor and ProcessingStep
346
- processor = sagemaker.processing.Processor(
347
- **processor_args_for_step
348
- )
349
- sagemaker_step = ProcessingStep(
350
- name=step_name,
351
- processor=processor,
352
- depends_on=step.spec.upstream_steps,
353
- inputs=inputs,
354
- outputs=outputs,
355
- )
363
+ if use_training_step:
364
+ # Create Estimator and TrainingStep
365
+ estimator = sagemaker.estimator.Estimator(
366
+ keep_alive_period_in_seconds=step_settings.keep_alive_period_in_seconds,
367
+ output_path=output_path,
368
+ environment=environment,
369
+ container_entry_point=entrypoint,
370
+ **args_for_step_executor,
371
+ )
372
+ sagemaker_step = TrainingStep(
373
+ name=step_name,
374
+ depends_on=step.spec.upstream_steps,
375
+ inputs=inputs,
376
+ estimator=estimator,
377
+ )
378
+ else:
379
+ # Create Processor and ProcessingStep
380
+ processor = sagemaker.processing.Processor(
381
+ entrypoint=entrypoint,
382
+ env=environment,
383
+ **args_for_step_executor,
384
+ )
385
+
386
+ sagemaker_step = ProcessingStep(
387
+ name=step_name,
388
+ processor=processor,
389
+ depends_on=step.spec.upstream_steps,
390
+ inputs=inputs,
391
+ outputs=outputs,
392
+ )
393
+
356
394
  sagemaker_steps.append(sagemaker_step)
357
395
 
358
396
  # construct the pipeline from the sagemaker_steps
@@ -194,7 +194,7 @@ def get_step(pipeline_name: str, step_name: str) -> "StepRunResponse":
194
194
  Raises:
195
195
  RuntimeError: If the step is not found.
196
196
  """
197
- runs = Client().list_pipeline_runs(pipeline_name=pipeline_name)
197
+ runs = Client().list_pipeline_runs(pipeline=pipeline_name)
198
198
  if runs.total == 0:
199
199
  raise RuntimeError(
200
200
  f"No pipeline runs for pipeline `{pipeline_name}` were found"