zenml-nightly 0.83.1.dev20250702__py3-none-any.whl → 0.83.1.dev20250703__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. zenml/VERSION +1 -1
  2. zenml/cli/pipeline.py +54 -1
  3. zenml/cli/utils.py +2 -0
  4. zenml/constants.py +1 -0
  5. zenml/enums.py +6 -3
  6. zenml/exceptions.py +8 -0
  7. zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py +8 -4
  8. zenml/integrations/azure/orchestrators/azureml_orchestrator.py +5 -3
  9. zenml/integrations/gcp/orchestrators/vertex_orchestrator.py +7 -8
  10. zenml/integrations/kubernetes/flavors/kubernetes_orchestrator_flavor.py +3 -0
  11. zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator.py +88 -0
  12. zenml/integrations/kubernetes/orchestrators/kubernetes_orchestrator_entrypoint.py +36 -1
  13. zenml/integrations/kubernetes/orchestrators/manifest_utils.py +11 -3
  14. zenml/models/v2/core/pipeline_run.py +2 -2
  15. zenml/orchestrators/base_orchestrator.py +70 -0
  16. zenml/orchestrators/dag_runner.py +27 -8
  17. zenml/orchestrators/local_docker/local_docker_orchestrator.py +9 -0
  18. zenml/orchestrators/publish_utils.py +100 -13
  19. zenml/orchestrators/step_launcher.py +86 -4
  20. zenml/utils/run_utils.py +74 -0
  21. zenml/zen_server/routers/runs_endpoints.py +27 -23
  22. zenml/zen_stores/sql_zen_store.py +23 -3
  23. {zenml_nightly-0.83.1.dev20250702.dist-info → zenml_nightly-0.83.1.dev20250703.dist-info}/METADATA +1 -1
  24. {zenml_nightly-0.83.1.dev20250702.dist-info → zenml_nightly-0.83.1.dev20250703.dist-info}/RECORD +27 -26
  25. {zenml_nightly-0.83.1.dev20250702.dist-info → zenml_nightly-0.83.1.dev20250703.dist-info}/LICENSE +0 -0
  26. {zenml_nightly-0.83.1.dev20250702.dist-info → zenml_nightly-0.83.1.dev20250703.dist-info}/WHEEL +0 -0
  27. {zenml_nightly-0.83.1.dev20250702.dist-info → zenml_nightly-0.83.1.dev20250703.dist-info}/entry_points.txt +0 -0
zenml/VERSION CHANGED
@@ -1 +1 @@
1
- 0.83.1.dev20250702
1
+ 0.83.1.dev20250703
zenml/cli/pipeline.py CHANGED
@@ -34,7 +34,7 @@ from zenml.models import (
34
34
  ScheduleFilter,
35
35
  )
36
36
  from zenml.pipelines.pipeline_definition import Pipeline
37
- from zenml.utils import source_utils, uuid_utils
37
+ from zenml.utils import run_utils, source_utils, uuid_utils
38
38
  from zenml.utils.yaml_utils import write_yaml
39
39
 
40
40
  logger = get_logger(__name__)
@@ -511,6 +511,59 @@ def list_pipeline_runs(**kwargs: Any) -> None:
511
511
  cli_utils.print_page_info(pipeline_runs)
512
512
 
513
513
 
514
+ @runs.command("stop")
515
+ @click.argument("run_name_or_id", type=str, required=True)
516
+ @click.option(
517
+ "--graceful",
518
+ "-g",
519
+ is_flag=True,
520
+ default=False,
521
+ help="Use graceful shutdown (default is False).",
522
+ )
523
+ @click.option(
524
+ "--yes",
525
+ "-y",
526
+ is_flag=True,
527
+ default=False,
528
+ help="Don't ask for confirmation.",
529
+ )
530
+ def stop_pipeline_run(
531
+ run_name_or_id: str,
532
+ graceful: bool = False,
533
+ yes: bool = False,
534
+ ) -> None:
535
+ """Stop a running pipeline.
536
+
537
+ Args:
538
+ run_name_or_id: The name or ID of the pipeline run to stop.
539
+ graceful: If True, uses graceful shutdown. If False, forces immediate termination.
540
+ yes: If set, don't ask for confirmation.
541
+ """
542
+ # Ask for confirmation to stop run.
543
+ if not yes:
544
+ action = "gracefully stop" if graceful else "force stop"
545
+ confirmation = cli_utils.confirmation(
546
+ f"Are you sure you want to {action} pipeline run `{run_name_or_id}`?"
547
+ )
548
+ if not confirmation:
549
+ cli_utils.declare("Not stopping the pipeline run.")
550
+ return
551
+
552
+ # Stop run.
553
+ try:
554
+ run = Client().get_pipeline_run(name_id_or_prefix=run_name_or_id)
555
+ run_utils.stop_run(run=run, graceful=graceful)
556
+ action = "Gracefully stopped" if graceful else "Force stopped"
557
+ cli_utils.declare(f"{action} pipeline run '{run.name}'.")
558
+ except NotImplementedError:
559
+ cli_utils.error(
560
+ "The orchestrator used for this pipeline run does not support "
561
+ f"{'gracefully' if graceful else 'forcefully'} stopping runs."
562
+ )
563
+ except Exception as e:
564
+ cli_utils.error(f"Failed to stop pipeline run: {e}")
565
+
566
+
514
567
  @runs.command("delete")
515
568
  @click.argument("run_name_or_id", type=str, required=True)
516
569
  @click.option(
zenml/cli/utils.py CHANGED
@@ -2214,6 +2214,8 @@ def get_execution_status_emoji(status: "ExecutionStatus") -> str:
2214
2214
  return ":white_check_mark:"
2215
2215
  if status == ExecutionStatus.CACHED:
2216
2216
  return ":package:"
2217
+ if status == ExecutionStatus.STOPPED or status == ExecutionStatus.STOPPING:
2218
+ return ":stop_sign:"
2217
2219
  raise RuntimeError(f"Unknown status: {status}")
2218
2220
 
2219
2221
 
zenml/constants.py CHANGED
@@ -416,6 +416,7 @@ STATISTICS = "/statistics"
416
416
  STATUS = "/status"
417
417
  STEP_CONFIGURATION = "/step-configuration"
418
418
  STEPS = "/steps"
419
+ STOP = "/stop"
419
420
  TAGS = "/tags"
420
421
  TAG_RESOURCES = "/tag_resources"
421
422
  TRIGGERS = "/triggers"
zenml/enums.py CHANGED
@@ -71,25 +71,28 @@ class ZenMLServiceType(StrEnum):
71
71
 
72
72
 
73
73
  class ExecutionStatus(StrEnum):
74
- """Enum that represents the current status of a step or pipeline run."""
74
+ """Enum that represents the execution status of a step or pipeline run."""
75
75
 
76
76
  INITIALIZING = "initializing"
77
77
  FAILED = "failed"
78
78
  COMPLETED = "completed"
79
79
  RUNNING = "running"
80
80
  CACHED = "cached"
81
+ STOPPED = "stopped"
82
+ STOPPING = "stopping"
81
83
 
82
84
  @property
83
85
  def is_finished(self) -> bool:
84
- """Whether the execution status refers to a finished execution.
86
+ """Returns whether the execution status is in a finished state.
85
87
 
86
88
  Returns:
87
- Whether the execution status refers to a finished execution.
89
+ Whether the execution status is finished.
88
90
  """
89
91
  return self in {
90
92
  ExecutionStatus.FAILED,
91
93
  ExecutionStatus.COMPLETED,
92
94
  ExecutionStatus.CACHED,
95
+ ExecutionStatus.STOPPED,
93
96
  }
94
97
 
95
98
 
zenml/exceptions.py CHANGED
@@ -122,6 +122,14 @@ class IllegalOperationError(ZenMLBaseException):
122
122
  """Raised when an illegal operation is attempted."""
123
123
 
124
124
 
125
+ class RunStoppedException(ZenMLBaseException):
126
+ """Raised when a ZenML pipeline run gets stopped by the user."""
127
+
128
+
129
+ class RunInterruptedException(ZenMLBaseException):
130
+ """Raised when a ZenML step gets interrupted for an unknown reason."""
131
+
132
+
125
133
  class MethodNotAllowedError(ZenMLBaseException):
126
134
  """Raised when the server does not allow a request method."""
127
135
 
@@ -853,12 +853,16 @@ class SagemakerOrchestrator(ContainerizedOrchestrator):
853
853
  )["PipelineExecutionStatus"]
854
854
 
855
855
  # Map the potential outputs to ZenML ExecutionStatus. Potential values:
856
- # https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/PipelineState
857
- if status in ["Executing", "Stopping"]:
856
+ # https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribePipelineExecution.html
857
+ if status == "Executing":
858
858
  return ExecutionStatus.RUNNING
859
- elif status in ["Stopped", "Failed"]:
859
+ elif status == "Stopping":
860
+ return ExecutionStatus.STOPPING
861
+ elif status == "Stopped":
862
+ return ExecutionStatus.STOPPED
863
+ elif status == "Failed":
860
864
  return ExecutionStatus.FAILED
861
- elif status in ["Succeeded"]:
865
+ elif status == "Succeeded":
862
866
  return ExecutionStatus.COMPLETED
863
867
  else:
864
868
  raise ValueError("Unknown status for the pipeline execution.")
@@ -515,14 +515,16 @@ class AzureMLOrchestrator(ContainerizedOrchestrator):
515
515
  return ExecutionStatus.INITIALIZING
516
516
  elif status in ["Running", "Finalizing"]:
517
517
  return ExecutionStatus.RUNNING
518
+ elif status == "CancelRequested":
519
+ return ExecutionStatus.STOPPING
520
+ elif status == "Canceled":
521
+ return ExecutionStatus.STOPPED
518
522
  elif status in [
519
- "CancelRequested",
520
523
  "Failed",
521
- "Canceled",
522
524
  "NotResponding",
523
525
  ]:
524
526
  return ExecutionStatus.FAILED
525
- elif status in ["Completed"]:
527
+ elif status == "Completed":
526
528
  return ExecutionStatus.COMPLETED
527
529
  else:
528
530
  raise ValueError("Unknown status for the pipeline job.")
@@ -942,7 +942,7 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
942
942
 
943
943
  # Map the potential outputs to ZenML ExecutionStatus. Potential values:
944
944
  # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/describe_pipeline_execution.html#
945
- if status in [PipelineState.PIPELINE_STATE_UNSPECIFIED]:
945
+ if status == PipelineState.PIPELINE_STATE_UNSPECIFIED:
946
946
  return run.status
947
947
  elif status in [
948
948
  PipelineState.PIPELINE_STATE_QUEUED,
@@ -954,14 +954,13 @@ class VertexOrchestrator(ContainerizedOrchestrator, GoogleCredentialsMixin):
954
954
  PipelineState.PIPELINE_STATE_PAUSED,
955
955
  ]:
956
956
  return ExecutionStatus.RUNNING
957
- elif status in [PipelineState.PIPELINE_STATE_SUCCEEDED]:
957
+ elif status == PipelineState.PIPELINE_STATE_SUCCEEDED:
958
958
  return ExecutionStatus.COMPLETED
959
-
960
- elif status in [
961
- PipelineState.PIPELINE_STATE_FAILED,
962
- PipelineState.PIPELINE_STATE_CANCELLING,
963
- PipelineState.PIPELINE_STATE_CANCELLED,
964
- ]:
959
+ elif status == PipelineState.PIPELINE_STATE_CANCELLING:
960
+ return ExecutionStatus.STOPPING
961
+ elif status == PipelineState.PIPELINE_STATE_CANCELLED:
962
+ return ExecutionStatus.STOPPED
963
+ elif status == PipelineState.PIPELINE_STATE_FAILED:
965
964
  return ExecutionStatus.FAILED
966
965
  else:
967
966
  raise ValueError("Unknown status for the pipeline job.")
@@ -69,6 +69,8 @@ class KubernetesOrchestratorSettings(BaseSettings):
69
69
  scheduling a pipeline.
70
70
  prevent_orchestrator_pod_caching: If `True`, the orchestrator pod will
71
71
  not try to compute cached steps before starting the step pods.
72
+ pod_stop_grace_period: When stopping a pipeline run, the amount of
73
+ seconds to wait for a step pod to shutdown gracefully.
72
74
  """
73
75
 
74
76
  synchronous: bool = True
@@ -88,6 +90,7 @@ class KubernetesOrchestratorSettings(BaseSettings):
88
90
  failed_jobs_history_limit: Optional[NonNegativeInt] = None
89
91
  ttl_seconds_after_finished: Optional[NonNegativeInt] = None
90
92
  prevent_orchestrator_pod_caching: bool = False
93
+ pod_stop_grace_period: PositiveInt = 30
91
94
 
92
95
 
93
96
  class KubernetesOrchestratorConfig(
@@ -545,6 +545,7 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
545
545
  successful_jobs_history_limit=settings.successful_jobs_history_limit,
546
546
  failed_jobs_history_limit=settings.failed_jobs_history_limit,
547
547
  ttl_seconds_after_finished=settings.ttl_seconds_after_finished,
548
+ termination_grace_period_seconds=settings.pod_stop_grace_period,
548
549
  labels=orchestrator_pod_labels,
549
550
  )
550
551
 
@@ -570,6 +571,7 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
570
571
  env=environment,
571
572
  labels=orchestrator_pod_labels,
572
573
  mount_local_stores=self.config.is_local,
574
+ termination_grace_period_seconds=settings.pod_stop_grace_period,
573
575
  )
574
576
 
575
577
  kube_utils.create_and_wait_for_pod_to_start(
@@ -663,6 +665,92 @@ class KubernetesOrchestrator(ContainerizedOrchestrator):
663
665
  f"{ENV_ZENML_KUBERNETES_RUN_ID}."
664
666
  )
665
667
 
668
+ def _stop_run(
669
+ self, run: "PipelineRunResponse", graceful: bool = True
670
+ ) -> None:
671
+ """Stops a specific pipeline run by terminating step pods.
672
+
673
+ Args:
674
+ run: The run that was executed by this orchestrator.
675
+ graceful: If True, does nothing (lets the orchestrator and steps finish naturally).
676
+ If False, stops all running step pods.
677
+
678
+ Raises:
679
+ RuntimeError: If we fail to stop the run.
680
+ """
681
+ # If graceful, do nothing and let the orchestrator handle the stop naturally
682
+ if graceful:
683
+ logger.info(
684
+ "Graceful stop requested - the orchestrator pod will handle "
685
+ "stopping naturally"
686
+ )
687
+ return
688
+
689
+ pods_stopped = []
690
+ errors = []
691
+
692
+ # Find all pods with the orchestrator run ID label
693
+ label_selector = f"run_id={kube_utils.sanitize_label(str(run.id))}"
694
+ try:
695
+ pods = self._k8s_core_api.list_namespaced_pod(
696
+ namespace=self.config.kubernetes_namespace,
697
+ label_selector=label_selector,
698
+ )
699
+ except Exception as e:
700
+ raise RuntimeError(
701
+ f"Failed to list step pods with run ID {run.id}: {e}"
702
+ )
703
+
704
+ # Filter to only include running or pending pods
705
+ for pod in pods.items:
706
+ if pod.status.phase not in ["Running", "Pending"]:
707
+ logger.debug(
708
+ f"Skipping pod {pod.metadata.name} with status {pod.status.phase}"
709
+ )
710
+ continue
711
+
712
+ try:
713
+ self._k8s_core_api.delete_namespaced_pod(
714
+ name=pod.metadata.name,
715
+ namespace=self.config.kubernetes_namespace,
716
+ )
717
+ pods_stopped.append(f"step pod: {pod.metadata.name}")
718
+ logger.debug(
719
+ f"Successfully initiated graceful stop of step pod: {pod.metadata.name}"
720
+ )
721
+ except Exception as e:
722
+ error_msg = f"Failed to stop step pod {pod.metadata.name}: {e}"
723
+ logger.warning(error_msg)
724
+ errors.append(error_msg)
725
+
726
+ # Summary logging
727
+ settings = cast(KubernetesOrchestratorSettings, self.get_settings(run))
728
+ grace_period_seconds = settings.pod_stop_grace_period
729
+ if pods_stopped:
730
+ logger.debug(
731
+ f"Successfully initiated graceful termination of: {', '.join(pods_stopped)}. "
732
+ f"Pods will terminate within {grace_period_seconds} seconds."
733
+ )
734
+
735
+ if errors:
736
+ error_summary = "; ".join(errors)
737
+ if not pods_stopped:
738
+ # If nothing was stopped successfully, raise an error
739
+ raise RuntimeError(
740
+ f"Failed to stop pipeline run: {error_summary}"
741
+ )
742
+ else:
743
+ # If some things were stopped but others failed, raise an error
744
+ raise RuntimeError(
745
+ f"Partial stop operation completed with errors: {error_summary}"
746
+ )
747
+
748
+ # If no step pods were found and no errors occurred
749
+ if not pods_stopped and not errors:
750
+ logger.info(
751
+ f"No running step pods found for pipeline run with ID: {run.id}"
752
+ )
753
+
666
754
  def get_pipeline_run_metadata(
667
755
  self, run_id: UUID
668
756
  ) -> Dict[str, "MetadataType"]:
@@ -18,6 +18,7 @@ import socket
18
18
  from typing import Callable, Dict, Optional, cast
19
19
 
20
20
  from kubernetes import client as k8s_client
21
+ from kubernetes.client.rest import ApiException
21
22
 
22
23
  from zenml.client import Client
23
24
  from zenml.entrypoints.step_entrypoint_configuration import (
@@ -248,6 +249,7 @@ def main() -> None:
248
249
  or settings.service_account_name,
249
250
  mount_local_stores=mount_local_stores,
250
251
  owner_references=owner_references,
252
+ termination_grace_period_seconds=settings.pod_stop_grace_period,
251
253
  labels=step_pod_labels,
252
254
  )
253
255
 
@@ -330,6 +332,38 @@ def main() -> None:
330
332
  # as the pipeline run status will already have been published.
331
333
  pass
332
334
 
335
+ def check_pipeline_cancellation() -> bool:
336
+ """Check if the pipeline should continue execution.
337
+
338
+ Returns:
339
+ True if execution should continue, False if it should stop.
340
+ """
341
+ try:
342
+ run = client.get_pipeline_run(
343
+ name_id_or_prefix=pipeline_run.id,
344
+ project=pipeline_run.project_id,
345
+ hydrate=False, # We only need status, not full hydration
346
+ )
347
+
348
+ # If the run is STOPPING or STOPPED, we should stop the execution
349
+ if run.status in [
350
+ ExecutionStatus.STOPPING,
351
+ ExecutionStatus.STOPPED,
352
+ ]:
353
+ logger.info(
354
+ f"Pipeline run is in {run.status} state, stopping execution"
355
+ )
356
+ return False
357
+
358
+ return True
359
+
360
+ except Exception as e:
361
+ # If we can't check the status, assume we should continue
362
+ logger.warning(
363
+ f"Failed to check pipeline cancellation status: {e}"
364
+ )
365
+ return True
366
+
333
367
  parallel_node_startup_waiting_period = (
334
368
  orchestrator.config.parallel_step_startup_waiting_period or 0.0
335
369
  )
@@ -344,6 +378,7 @@ def main() -> None:
344
378
  run_fn=run_step_on_kubernetes,
345
379
  preparation_fn=pre_step_run,
346
380
  finalize_fn=finalize_run,
381
+ continue_fn=check_pipeline_cancellation,
347
382
  parallel_node_startup_waiting_period=parallel_node_startup_waiting_period,
348
383
  max_parallelism=pipeline_settings.max_parallelism,
349
384
  ).run()
@@ -360,7 +395,7 @@ def main() -> None:
360
395
  namespace=namespace,
361
396
  secret_name=secret_name,
362
397
  )
363
- except k8s_client.rest.ApiException as e:
398
+ except ApiException as e:
364
399
  logger.error(f"Error cleaning up secret {secret_name}: {e}")
365
400
 
366
401
 
@@ -106,6 +106,7 @@ def build_pod_manifest(
106
106
  labels: Optional[Dict[str, str]] = None,
107
107
  mount_local_stores: bool = False,
108
108
  owner_references: Optional[List[k8s_client.V1OwnerReference]] = None,
109
+ termination_grace_period_seconds: Optional[int] = 30,
109
110
  ) -> k8s_client.V1Pod:
110
111
  """Build a Kubernetes pod manifest for a ZenML run or step.
111
112
 
@@ -124,6 +125,8 @@ def build_pod_manifest(
124
125
  mount_local_stores: Whether to mount the local stores path inside the
125
126
  pod.
126
127
  owner_references: List of owner references for the pod.
128
+ termination_grace_period_seconds: The amount of seconds to wait for a
129
+ pod to shutdown gracefully.
127
130
 
128
131
  Returns:
129
132
  Pod manifest.
@@ -154,19 +157,20 @@ def build_pod_manifest(
154
157
  containers=[container_spec],
155
158
  restart_policy="Never",
156
159
  image_pull_secrets=image_pull_secrets,
160
+ termination_grace_period_seconds=termination_grace_period_seconds,
157
161
  )
158
162
 
159
163
  if service_account_name is not None:
160
164
  pod_spec.service_account_name = service_account_name
161
165
 
166
+ # Apply pod settings if provided
162
167
  labels = labels or {}
163
168
 
164
169
  if pod_settings:
165
170
  add_pod_settings(pod_spec, pod_settings)
166
171
 
167
- # Add pod_settings.labels to the labels
168
- if pod_settings.labels:
169
- labels.update(pod_settings.labels)
172
+ if pod_settings and pod_settings.labels:
173
+ labels.update(pod_settings.labels)
170
174
 
171
175
  pod_metadata = k8s_client.V1ObjectMeta(
172
176
  name=pod_name,
@@ -273,6 +277,7 @@ def build_cron_job_manifest(
273
277
  successful_jobs_history_limit: Optional[int] = None,
274
278
  failed_jobs_history_limit: Optional[int] = None,
275
279
  ttl_seconds_after_finished: Optional[int] = None,
280
+ termination_grace_period_seconds: Optional[int] = 30,
276
281
  ) -> k8s_client.V1CronJob:
277
282
  """Create a manifest for launching a pod as scheduled CRON job.
278
283
 
@@ -295,6 +300,8 @@ def build_cron_job_manifest(
295
300
  failed_jobs_history_limit: The number of failed jobs to retain.
296
301
  ttl_seconds_after_finished: The amount of seconds to keep finished jobs
297
302
  before deleting them.
303
+ termination_grace_period_seconds: The amount of seconds to wait for a
304
+ pod to shutdown gracefully.
298
305
 
299
306
  Returns:
300
307
  CRON job manifest.
@@ -310,6 +317,7 @@ def build_cron_job_manifest(
310
317
  env=env,
311
318
  labels=labels,
312
319
  mount_local_stores=mount_local_stores,
320
+ termination_grace_period_seconds=termination_grace_period_seconds,
313
321
  )
314
322
 
315
323
  job_spec = k8s_client.V1CronJobSpec(
@@ -343,7 +343,7 @@ class PipelineRunResponse(
343
343
  if self.stack is None:
344
344
  raise ValueError(
345
345
  "The stack that this pipeline run response was executed on"
346
- "has been deleted."
346
+ "is either not accessible or has been deleted."
347
347
  )
348
348
 
349
349
  # Create the orchestrator instance
@@ -358,7 +358,7 @@ class PipelineRunResponse(
358
358
  if len(orchestrator_list) == 0:
359
359
  raise ValueError(
360
360
  "The orchestrator that this pipeline run response was "
361
- "executed with has been deleted."
361
+ "executed with is either not accessible or has been deleted."
362
362
  )
363
363
 
364
364
  orchestrator = cast(
@@ -38,6 +38,7 @@ from zenml.logger import get_logger
38
38
  from zenml.metadata.metadata_types import MetadataType
39
39
  from zenml.orchestrators.publish_utils import (
40
40
  publish_pipeline_run_metadata,
41
+ publish_pipeline_run_status_update,
41
42
  publish_schedule_metadata,
42
43
  )
43
44
  from zenml.orchestrators.step_launcher import StepLauncher
@@ -210,6 +211,8 @@ class BaseOrchestrator(StackComponent, ABC):
210
211
  This will be deleted in case the pipeline deployment failed.
211
212
 
212
213
  Raises:
214
+ KeyboardInterrupt: If the orchestrator is synchronous and the
215
+ pipeline run is keyboard interrupted.
213
216
  RunMonitoringError: If a failure happened while monitoring the
214
217
  pipeline run.
215
218
  """
@@ -324,8 +327,17 @@ class BaseOrchestrator(StackComponent, ABC):
324
327
  if submission_result.wait_for_completion:
325
328
  try:
326
329
  submission_result.wait_for_completion()
330
+ except KeyboardInterrupt:
331
+ error_message = "Received KeyboardInterrupt. Note that the run is still executing. "
332
+ if placeholder_run:
333
+ error_message += (
334
+ "If you want to stop the pipeline run, please use: "
335
+ f"`zenml pipeline runs stop {placeholder_run.id}`"
336
+ )
337
+ raise KeyboardInterrupt(error_message)
327
338
  except BaseException as e:
328
339
  raise RunMonitoringError(original_exception=e)
340
+
329
341
  finally:
330
342
  self._cleanup_run()
331
343
 
@@ -391,6 +403,64 @@ class BaseOrchestrator(StackComponent, ABC):
391
403
  f"'{self.__class__.__name__}' orchestrator."
392
404
  )
393
405
 
406
+ def stop_run(
407
+ self, run: "PipelineRunResponse", graceful: bool = False
408
+ ) -> None:
409
+ """Stops a specific pipeline run.
410
+
411
+ This method should only be called if the orchestrator's
412
+ supports_cancellation property is True.
413
+
414
+ Args:
415
+ run: A pipeline run response to stop.
416
+ graceful: If True, allows for graceful shutdown where possible.
417
+ If False, forces immediate termination. Default is False.
418
+
419
+ Raises:
420
+ NotImplementedError: If any orchestrator inheriting from the base
421
+ class does not implement this logic.
422
+ """
423
+ # Check if the orchestrator supports cancellation
424
+ if (
425
+ getattr(self._stop_run, "__func__", None)
426
+ is BaseOrchestrator._stop_run
427
+ ):
428
+ raise NotImplementedError(
429
+ f"The '{self.__class__.__name__}' orchestrator does not "
430
+ "support stopping pipeline runs."
431
+ )
432
+
433
+ # Update pipeline status to STOPPING before calling concrete implementation
434
+ publish_pipeline_run_status_update(
435
+ pipeline_run_id=run.id,
436
+ status=ExecutionStatus.STOPPING,
437
+ )
438
+
439
+ # Now call the concrete implementation
440
+ self._stop_run(run=run, graceful=graceful)
441
+
442
+ def _stop_run(
443
+ self, run: "PipelineRunResponse", graceful: bool = False
444
+ ) -> None:
445
+ """Concrete implementation of pipeline stopping logic.
446
+
447
+ This method should be implemented by concrete orchestrator classes
448
+ instead of stop_run to ensure proper status management.
449
+
450
+ Args:
451
+ run: A pipeline run response to stop (already updated to STOPPING status).
452
+ graceful: If True, allows for graceful shutdown where possible.
453
+ If False, forces immediate termination. Default is True.
454
+
455
+ Raises:
456
+ NotImplementedError: If any orchestrator inheriting from the base
457
+ class does not implement this logic.
458
+ """
459
+ raise NotImplementedError(
460
+ "The stop run functionality is not implemented for the "
461
+ f"'{self.__class__.__name__}' orchestrator."
462
+ )
463
+
394
464
 
395
465
  class BaseOrchestratorFlavor(Flavor):
396
466
  """Base orchestrator flavor class."""