PyPI - vellum-workflow-server - Versions diffs - 0.14.78__tar.gz → 0.14.79__tar.gz - Mend

vellum-workflow-server 0.14.78tar.gz → 0.14.79tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vellum-workflow-server might be problematic. Click here for more details.

Files changed (33) hide show

{vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vellum-workflow-server
-Version: 0.14.78
+Version: 0.14.79
 Summary:
 License: AGPL
 Requires-Python: >=3.9.0,<4
@@ -29,7 +29,7 @@ Requires-Dist: pyjwt (==2.10.0)
 Requires-Dist: python-dotenv (==1.0.1)
 Requires-Dist: retrying (==1.3.4)
 Requires-Dist: sentry-sdk[flask] (==2.20.0)
-Requires-Dist: vellum-ai (==0.14.78)
+Requires-Dist: vellum-ai (==0.14.79)
 Description-Content-Type: text/markdown
 # Vellum Workflow Runner Server

{vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "vellum-workflow-server"
 [tool.poetry]
 name = "vellum-workflow-server"
-version = "0.14.78"
+version = "0.14.79"
 description = ""
 readme = "README.md"
 authors = []
@@ -45,7 +45,7 @@ flask = "2.3.3"
 orderly-set = "5.2.2"
 pebble = "5.0.7"
 gunicorn = "23.0.0"
-vellum-ai = "0.14.78"
+vellum-ai = "0.14.79"
 python-dotenv = "1.0.1"
 retrying = "1.3.4"
 sentry-sdk = {extras = ["flask"], version = "2.20.0"}

{vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/api/tests/test_workflow_view_stream_workflow_route.py RENAMED Viewed

@@ -32,6 +32,25 @@ def flask_stream(request_body: dict) -> tuple[int, list]:
         ]
+@mock.patch("workflow_server.api.workflow_view.ENABLE_PROCESS_WRAPPER", False)
+def flask_stream_disable_process_wrapper(request_body: dict) -> tuple[int, list]:
+    flask_app = create_app()
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/stream", json=request_body)
+        status_code = response.status_code
+        return status_code, [
+            json.loads(line)
+            for line in response.data.decode().split("\n")
+            if line
+            and line
+            not in [
+                "WAITING",
+                "END",
+            ]
+        ]
 def code_exec_stream(request_body: dict) -> tuple[int, list]:
     output = io.StringIO()
@@ -48,7 +67,7 @@ def code_exec_stream(request_body: dict) -> tuple[int, list]:
     return 200, events
-@pytest.fixture(params=[flask_stream, code_exec_stream])
+@pytest.fixture(params=[flask_stream, code_exec_stream, flask_stream_disable_process_wrapper])
 def both_stream_types(request):
     return request.param

{vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/api/workflow_view.py RENAMED Viewed

@@ -8,6 +8,7 @@ import os
 import pkgutil
 from queue import Empty
 import sys
+from threading import Event as ThreadingEvent
 import time
 import traceback
 from uuid import uuid4
@@ -18,9 +19,10 @@ from pydantic import ValidationError
 from vellum_ee.workflows.display.nodes.get_node_display_class import get_node_display_class
 from vellum_ee.workflows.display.types import WorkflowDisplayContext
+from vellum.workflows.exceptions import WorkflowInitializationException
 from vellum.workflows.nodes import BaseNode
 from vellum.workflows.utils.names import pascal_to_title_case
-from workflow_server.config import MEMORY_LIMIT_MB
+from workflow_server.config import ENABLE_PROCESS_WRAPPER, MEMORY_LIMIT_MB
 from workflow_server.core.events import (
     SPAN_ID_EVENT,
     STREAM_FINISHED_EVENT,
@@ -30,7 +32,7 @@ from workflow_server.core.events import (
     VembdaExecutionInitiatedBody,
     VembdaExecutionInitiatedEvent,
 )
-from workflow_server.core.executor import stream_node_pebble_timeout, stream_workflow_process_timeout
+from workflow_server.core.executor import stream_node_pebble_timeout, stream_workflow, stream_workflow_process_timeout
 from workflow_server.core.utils import create_vembda_rejected_event, serialize_vembda_rejected_event
 from workflow_server.core.workflow_executor_context import (
     DEFAULT_TIMEOUT_SECONDS,
@@ -73,7 +75,7 @@ def stream_workflow_route() -> Response:
     logger.info(
         f"Starting workflow stream, execution ID: {context.execution_id}, "
-        f"process count: {get_active_process_count()}"
+        f"process count: {get_active_process_count()}, process wrapper: {ENABLE_PROCESS_WRAPPER}"
     )
     # Create this event up here so timestamps are fully from the start to account for any unknown overhead
@@ -108,33 +110,66 @@ def stream_workflow_route() -> Response:
             headers=headers,
         )
+    cancel_signal = ThreadingEvent()
     process: Optional[Process] = None
-    try:
-        process = stream_workflow_process_timeout(
-            executor_context=context,
-            queue=process_output_queue,
-        )
-        increment_process_count(1)
-    except Exception as e:
-        logger.exception(e)
+    if ENABLE_PROCESS_WRAPPER:
+        try:
+            process = stream_workflow_process_timeout(
+                executor_context=context,
+                queue=process_output_queue,
+                cancel_signal=cancel_signal,
+            )
+            increment_process_count(1)
+        except Exception as e:
+            logger.exception(e)
-        process_output_queue.put(create_vembda_rejected_event(context, traceback.format_exc()))
+            process_output_queue.put(create_vembda_rejected_event(context, traceback.format_exc()))
-    try:
-        first_item = process_output_queue.get(timeout=WORKFLOW_INITIATION_TIMEOUT_SECONDS)
-    except Empty:
-        logger.error("Request timed out trying to initiate the Workflow")
+        try:
+            first_item = process_output_queue.get(timeout=WORKFLOW_INITIATION_TIMEOUT_SECONDS)
+        except Empty:
+            logger.error("Request timed out trying to initiate the Workflow")
+            if process and process.is_alive():
+                process.kill()
+            increment_process_count(-1)
+            return Response(
+                json.dumps({"detail": "Request timed out trying to initiate the Workflow"}),
+                status=408,
+                content_type="application/json",
+                headers=headers,
+            )
+    else:
-        if process and process.is_alive():
-            process.kill()
-        increment_process_count(-1)
+        def workflow_stream_processor() -> Iterator[Union[dict, str]]:
+            span_id_emitted = False
+            try:
+                workflow_iterator, span_id = stream_workflow(
+                    context,
+                    disable_redirect=True,
+                    cancel_signal=cancel_signal,
+                )
+                yield f"{SPAN_ID_EVENT}:{span_id}"
+                span_id_emitted = True
+                for event in workflow_iterator:
+                    yield event
+            except WorkflowInitializationException as e:
+                if not span_id_emitted:
+                    yield f"{SPAN_ID_EVENT}:{uuid4()}"
+                yield serialize_vembda_rejected_event(context, str(e))
+            except Exception as e:
+                if not span_id_emitted:
+                    yield f"{SPAN_ID_EVENT}:{uuid4()}"
-        return Response(
-            json.dumps({"detail": "Request timed out trying to initiate the Workflow"}),
-            status=408,
-            content_type="application/json",
-            headers=headers,
-        )
+                logger.exception(e)
+                yield serialize_vembda_rejected_event(context, "Internal Server Error")
+        stream_iterator = workflow_stream_processor()
+        first_item = next(stream_iterator)
+        increment_process_count(1)
     if isinstance(first_item, str) and first_item.startswith(SPAN_ID_EVENT):
         span_id = first_item.split(":")[1]
@@ -151,21 +186,30 @@ def stream_workflow_route() -> Response:
     def process_events(queue: Queue) -> Iterator[Union[str, dict]]:
         event: Union[str, dict]
         loops = 0
+        timed_out_time: Optional[float] = None
         while True:
             loops += 1
             # Check if we timed out and kill the process if so. Set the timeout a little under what
             # the default is (30m) since the connection limit is 30m and otherwise we may not receive
-            # the timeout event.
-            if min(context.timeout, DEFAULT_TIMEOUT_SECONDS - 90) < (
-                (time.time_ns() - context.request_start_time) / 1_000_000_000
+            # the timeout event. After cancelling the workflow wait 5 seconds for the workflow to emit
+            # any cancel events before ending the stream.
+            if (
+                min(context.timeout, DEFAULT_TIMEOUT_SECONDS - 90)
+                < ((time.time_ns() - context.request_start_time) / 1_000_000_000)
+                and not timed_out_time
             ):
-                logger.error("Workflow timed out")
+                logger.error("Workflow timed out, waiting 5 seconds before ending request...")
+                cancel_signal.set()
+                timed_out_time = time.time()
-                if process and process.is_alive():
+            if timed_out_time is not None and timed_out_time + 5 < time.time():
+                logger.warning("Killing request after workflow timeout")
+                if ENABLE_PROCESS_WRAPPER and process and process.is_alive():
                     process.kill()
-                if process:
+                if not ENABLE_PROCESS_WRAPPER or process:
                     increment_process_count(-1)
                 yield VembdaExecutionFulfilledEvent(
@@ -198,15 +242,18 @@ def stream_workflow_route() -> Response:
                 break
             try:
-                item = queue.get(timeout=0.1)
-                event = item
+                if ENABLE_PROCESS_WRAPPER:
+                    item = queue.get(timeout=0.1)
+                    event = item
+                else:
+                    event = next(stream_iterator)
             except Empty:
                 # Emit waiting event if were just sitting around to attempt to keep the line
                 # open to trick knative
                 if loops % 20 == 0:
                     yield "WAITING"
-                    if process and not process.is_alive():
+                    if ENABLE_PROCESS_WRAPPER and process and not process.is_alive():
                         logger.error("Workflow process exited abnormally")
                         yield create_vembda_rejected_event(
@@ -216,6 +263,8 @@ def stream_workflow_route() -> Response:
                         break
                 continue
+            except StopIteration:
+                break
             except Exception as e:
                 logger.exception(e)
                 break
@@ -263,13 +312,16 @@ def stream_workflow_route() -> Response:
             yield "\n"
             return
         finally:
-            try:
-                if process and process.is_alive():
-                    process.kill()
-                if process:
-                    increment_process_count(-1)
-            except Exception as e:
-                logger.error("Failed to kill process", e)
+            if ENABLE_PROCESS_WRAPPER:
+                try:
+                    if process and process.is_alive():
+                        process.kill()
+                    if process:
+                        increment_process_count(-1)
+                except Exception as e:
+                    logger.error("Failed to kill process", e)
+            else:
+                increment_process_count(-1)
     resp = Response(
         stream_with_context(generator()),

{vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/config.py RENAMED Viewed

@@ -28,6 +28,7 @@ PORT = os.getenv("PORT", "8000")
 VELLUM_API_URL_HOST = os.getenv("VELLUM_API_URL_HOST", "localhost")
 VELLUM_API_URL_PORT = os.getenv("VELLUM_API_URL_PORT", 8000)
 CONCURRENCY = int(os.getenv("CONCURRENCY", "8"))
+ENABLE_PROCESS_WRAPPER = os.getenv("ENABLE_PROCESS_WRAPPER", "true").lower() == "true"
 def is_development() -> bool:

{vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/core/executor.py RENAMED Viewed

@@ -84,10 +84,14 @@ def _stream_node_wrapper(executor_context: NodeExecutorContext, queue: Queue) ->
         )
-def _stream_workflow_wrapper(executor_context: WorkflowExecutorContext, queue: Queue) -> None:
+def _stream_workflow_wrapper(
+    executor_context: WorkflowExecutorContext,
+    queue: Queue,
+    cancel_signal: Optional[ThreadingEvent],
+) -> None:
     span_id_emitted = False
     try:
-        stream_iterator, span_id = stream_workflow(executor_context=executor_context)
+        stream_iterator, span_id = stream_workflow(executor_context=executor_context, cancel_signal=cancel_signal)
         queue.put(f"{SPAN_ID_EVENT}:{span_id}")
         span_id_emitted = True
@@ -114,12 +118,14 @@ def _stream_workflow_wrapper(executor_context: WorkflowExecutorContext, queue: Q
 def stream_workflow_process_timeout(
     executor_context: WorkflowExecutorContext,
     queue: Queue,
+    cancel_signal: Optional[ThreadingEvent],
 ) -> Process:
     workflow_process = Process(
         target=_stream_workflow_wrapper,
         args=(
             executor_context,
             queue,
+            cancel_signal,
         ),
     )
     workflow_process.start()
@@ -145,6 +151,7 @@ def stream_workflow_process_timeout(
 def stream_workflow(
     executor_context: WorkflowExecutorContext,
     disable_redirect: bool = True,
+    cancel_signal: Optional[ThreadingEvent] = None,
 ) -> tuple[Iterator[dict], UUID]:
     workflow, namespace = _gather_workflow(executor_context)
     workflow_inputs = _get_workflow_inputs(executor_context)
@@ -164,7 +171,7 @@ def stream_workflow(
     )
     cancel_watcher_kill_switch = ThreadingEvent()
-    cancel_signal = ThreadingEvent()
+    cancel_signal = cancel_signal or ThreadingEvent()
     cancel_watcher = CancelWorkflowWatcherThread(
         kill_switch=cancel_watcher_kill_switch,
         execution_id=executor_context.execution_id,
@@ -188,6 +195,7 @@ def stream_workflow(
             cancel_signal=cancel_signal,
             entrypoint_nodes=[run_from_node] if run_from_node else None,
         )
     except Exception:
         cancel_watcher_kill_switch.set()
         logger.exception("Failed to generate Workflow Stream")

{vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/start.py RENAMED Viewed

@@ -5,7 +5,7 @@ from typing import Any, Optional
 from gunicorn import glogging
 import gunicorn.app.base
-from workflow_server.config import PORT, is_development
+from workflow_server.config import ENABLE_PROCESS_WRAPPER, PORT, is_development
 from workflow_server.server import app
 from workflow_server.utils.exit_handler import gunicorn_exit_handler, init_signal_handlers
 from workflow_server.utils.oom_killer import start_oom_killer_worker
@@ -48,7 +48,9 @@ def start() -> None:
     options = {
         "bind": f"0.0.0.0:{PORT}",
         "workers": int(os.getenv("GUNICORN_WORKERS", 2)),
-        "threads": int(os.getenv("GUNICORN_THREADS", 6)),
+        "threads": int(os.getenv("GUNICORN_THREADS", 9 if ENABLE_PROCESS_WRAPPER else 6)),
+        # Try to avoid memory leaks when using non process mode
+        "max_requests": 0 if ENABLE_PROCESS_WRAPPER else 20,
         "worker_class": "gthread",
         "timeout": int(os.getenv("GUNICORN_TIMEOUT", 1800)),
         "logger_class": CustomGunicornLogger,