PyPI - vellum-workflow-server - Versions diffs - 1.9.0.post2__tar.gz → 1.9.2__tar.gz - Mend

vellum-workflow-server 1.9.0.post2tar.gz → 1.9.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{vellum_workflow_server-1.9.0.post2 → vellum_workflow_server-1.9.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vellum-workflow-server
-Version: 1.9.0.post2
+Version: 1.9.2
 Summary:
 License: AGPL
 Requires-Python: >=3.9.0,<4
@@ -29,7 +29,7 @@ Requires-Dist: pyjwt (==2.10.0)
 Requires-Dist: python-dotenv (==1.0.1)
 Requires-Dist: retrying (==1.3.4)
 Requires-Dist: sentry-sdk[flask] (==2.20.0)
-Requires-Dist: vellum-ai (==1.9.0)
+Requires-Dist: vellum-ai (==1.9.2)
 Description-Content-Type: text/markdown
 # Vellum Workflow Runner Server

{vellum_workflow_server-1.9.0.post2 → vellum_workflow_server-1.9.2}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "vellum-workflow-server"
 [tool.poetry]
 name = "vellum-workflow-server"
-version = "1.9.0.post2"
+version = "1.9.2"
 description = ""
 readme = "README.md"
 authors = []
@@ -45,7 +45,7 @@ flask = "2.3.3"
 orderly-set = "5.2.2"
 pebble = "5.0.7"
 gunicorn = "23.0.0"
-vellum-ai = "1.9.0"
+vellum-ai = "1.9.2"
 python-dotenv = "1.0.1"
 retrying = "1.3.4"
 sentry-sdk = {extras = ["flask"], version = "2.20.0"}

{vellum_workflow_server-1.9.0.post2 → vellum_workflow_server-1.9.2}/src/workflow_server/api/workflow_view.py RENAMED Viewed

@@ -8,6 +8,7 @@ import os
 import pkgutil
 from queue import Empty
 import sys
+import threading
 import time
 import traceback
 from uuid import uuid4
@@ -71,19 +72,195 @@ WORKFLOW_INITIATION_TIMEOUT_SECONDS = 60
 @bp.route("/stream", methods=["POST"])
 def stream_workflow_route() -> Response:
     data = request.get_json()
+    try:
+        context = WorkflowExecutorContext.model_validate(data)
+    except ValidationError as e:
+        error_message = e.errors()[0]["msg"]
+        error_location = e.errors()[0]["loc"]
+        return Response(
+            json.dumps({"detail": f"Invalid context: {error_message} at {error_location}"}),
+            status=400,
+            content_type="application/json",
+        )
+    headers = _get_headers(context)
+    # We can exceed the concurrency count currently with long running workflows due to a knative issue. So here
+    # if we detect a memory problem just exit us early
+    if not wait_for_available_process():
+        return Response(
+            json.dumps(
+                {
+                    "detail": f"Workflow server concurrent request rate exceeded. "
+                    f"Process count: {get_active_process_count()}"
+                }
+            ),
+            status=429,
+            content_type="application/json",
+            headers=headers,
+        )
+    start_workflow_state = _start_workflow(context)
+    if isinstance(start_workflow_state, Response):
+        return start_workflow_state
+    workflow_events, vembda_initiated_event, process, span_id, headers = start_workflow_state
+    def generator() -> Generator[str, None, None]:
+        try:
+            yield "\n"
+            yield vembda_initiated_event.model_dump_json()
+            yield "\n"
+            for row in workflow_events:
+                yield "\n"
+                if isinstance(row, dict):
+                    dump = json.dumps(row)
+                    yield dump
+                else:
+                    yield row
+                yield "\n"
+            # Sometimes the connections get hung after they finish with the vembda fulfilled event
+            # if it happens during a knative scale down event. So we emit an END string so that
+            # we don't have to do string compares on all the events for performance.
+            yield "\n"
+            yield "END"
+            yield "\n"
+            logger.info(
+                f"Workflow stream completed, execution ID: {span_id}, process count: {get_active_process_count()}"
+            )
+        except GeneratorExit:
+            # These can happen either from Vembda disconnects (possibily from predict disconnects) or
+            # from knative activator gateway timeouts which are caused by idleTimeout or responseStartSeconds
+            # being exceeded.
+            app.logger.error(
+                "Client disconnected in the middle of the Workflow Stream",
+                extra={
+                    "sentry_tags": {
+                        "server_version": vembda_initiated_event.body.server_version,
+                        "sdk_version": vembda_initiated_event.body.sdk_version,
+                    }
+                },
+            )
+            return
+        except Exception as e:
+            logger.exception("Error during workflow response stream generator", extra={"error": e})
+            yield "\n"
+            yield "END"
+            yield "\n"
+            return
+        finally:
+            if ENABLE_PROCESS_WRAPPER:
+                try:
+                    if process and process.is_alive():
+                        process.kill()
+                    if process:
+                        increment_process_count(-1)
+                        remove_active_span_id(span_id)
+                except Exception as e:
+                    logger.error("Failed to kill process", e)
+            else:
+                increment_process_count(-1)
+                remove_active_span_id(span_id)
+    resp = Response(
+        stream_with_context(generator()),
+        status=200,
+        content_type="application/x-ndjson",
+        headers=headers,
+    )
+    return resp
+@bp.route("/async-exec", methods=["POST"])
+def async_exec_workflow() -> Response:
+    data = request.get_json()
     try:
         context = WorkflowExecutorContext.model_validate(data)
     except ValidationError as e:
         error_message = e.errors()[0]["msg"]
         error_location = e.errors()[0]["loc"]
+        # TODO need to convert this to a vembda event so that trigger'd execs can me notified
+        #  can either do it here in the workflow server or
         return Response(
             json.dumps({"detail": f"Invalid context: {error_message} at {error_location}"}),
             status=400,
             content_type="application/json",
         )
+    # Reject back to the queue handler if were low on memory here, though maybe we should update the is_available
+    # route to look at memory too. Don't send this response as an event. Though we might want some logic to catch
+    # if they have a workflow server that can never start a workflow because the base image uses so much memory.
+    if not wait_for_available_process():
+        return Response(
+            json.dumps({"detail": f"Server resources low." f"Process count: {get_active_process_count()}"}),
+            status=429,
+            content_type="application/json",
+        )
+    def run_workflow_background() -> None:
+        process: Optional[Process] = None
+        span_id: Optional[str] = None
+        try:
+            start_workflow_result = _start_workflow(context)
+            if isinstance(start_workflow_result, Response):
+                # TODO same here, should return this response as en event or it will get yeeted to the nether
+                # return start_workflow_result
+                return
+            workflow_events, vembda_initiated_event, process, span_id, headers = start_workflow_result
+            for _ in workflow_events:
+                # This is way inefficient in process mode since were just having the main proc stream the events
+                # to nowhere wasting memory I/O and cpu.
+                continue
+            logger.info(
+                f"Workflow async exec completed, execution ID: {span_id}, process count: {get_active_process_count()}"
+            )
+        except Exception as e:
+            logger.exception("Error during workflow async background worker", e)
+        finally:
+            if ENABLE_PROCESS_WRAPPER:
+                try:
+                    if process and process.is_alive():
+                        process.kill()
+                    if process:
+                        increment_process_count(-1)
+                        if span_id:
+                            remove_active_span_id(span_id)
+                except Exception as e:
+                    logger.error("Failed to kill process", e)
+            else:
+                increment_process_count(-1)
+                if span_id:
+                    remove_active_span_id(span_id)
+    thread = threading.Thread(target=run_workflow_background)
+    thread.start()
+    return Response(
+        json.dumps({"success": True}),
+        status=200,
+        content_type="application/json",
+    )
+def _start_workflow(
+    context: WorkflowExecutorContext,
+) -> Union[
+    Response,
+    tuple[
+        Iterator[Union[str, dict]],
+        VembdaExecutionInitiatedEvent,
+        Optional[Process],
+        str,
+        dict[str, str],
+    ],
+]:
+    headers = _get_headers(context)
     logger.info(
         f"Starting Workflow Server Request, trace ID: {context.trace_id}, "
         f"process count: {get_active_process_count()}, process wrapper: {ENABLE_PROCESS_WRAPPER}"
@@ -100,29 +277,7 @@ def stream_workflow_route() -> Response:
         parent=None,
     )
-    process_output_queue: Queue[Union[str, dict]] = Queue()
-    headers = {
-        "X-Vellum-SDK-Version": vembda_initiated_event.body.sdk_version,
-        "X-Vellum-Server-Version": vembda_initiated_event.body.server_version,
-        "X-Vellum-Events-Emitted": str(is_events_emitting_enabled(context)),
-    }
-    # We can exceed the concurrency count currently with long running workflows due to a knative issue. So here
-    # if we detect a memory problem just exit us early
-    if not wait_for_available_process():
-        return Response(
-            json.dumps(
-                {
-                    "detail": f"Workflow server concurrent request rate exceeded. "
-                    f"Process count: {get_active_process_count()}"
-                }
-            ),
-            status=429,
-            content_type="application/json",
-            headers=headers,
-        )
+    output_queue: Queue[Union[str, dict]] = Queue()
     cancel_signal = MultiprocessingEvent()
     timeout_signal = MultiprocessingEvent()
@@ -131,7 +286,7 @@ def stream_workflow_route() -> Response:
         try:
             process = stream_workflow_process_timeout(
                 executor_context=context,
-                queue=process_output_queue,
+                queue=output_queue,
                 cancel_signal=cancel_signal,
                 timeout_signal=timeout_signal,
             )
@@ -139,10 +294,10 @@ def stream_workflow_route() -> Response:
         except Exception as e:
             logger.exception(e)
-            process_output_queue.put(create_vembda_rejected_event(context, traceback.format_exc()))
+            output_queue.put(create_vembda_rejected_event(context, traceback.format_exc()))
         try:
-            first_item = process_output_queue.get(timeout=WORKFLOW_INITIATION_TIMEOUT_SECONDS)
+            first_item = output_queue.get(timeout=WORKFLOW_INITIATION_TIMEOUT_SECONDS)
         except Empty:
             logger.error("Request timed out trying to initiate the Workflow")
@@ -291,72 +446,9 @@ def stream_workflow_route() -> Response:
                 break
             yield event
-    workflow_events = process_events(process_output_queue)
+    workflow_events = process_events(output_queue)
-    def generator() -> Generator[str, None, None]:
-        try:
-            yield "\n"
-            yield vembda_initiated_event.model_dump_json()
-            yield "\n"
-            for row in workflow_events:
-                yield "\n"
-                if isinstance(row, dict):
-                    dump = json.dumps(row)
-                    yield dump
-                else:
-                    yield row
-                yield "\n"
-            # Sometimes the connections get hung after they finish with the vembda fulfilled event
-            # if it happens during a knative scale down event. So we emit an END string so that
-            # we don't have to do string compares on all the events for performance.
-            yield "\n"
-            yield "END"
-            yield "\n"
-            logger.info(
-                f"Workflow stream completed, execution ID: {span_id}, process count: {get_active_process_count()}"
-            )
-        except GeneratorExit:
-            # These can happen either from Vembda disconnects (possibily from predict disconnects) or
-            # from knative activator gateway timeouts which are caused by idleTimeout or responseStartSeconds
-            # being exceeded.
-            app.logger.error(
-                "Client disconnected in the middle of the Workflow Stream",
-                extra={
-                    "sentry_tags": {
-                        "server_version": vembda_initiated_event.body.server_version,
-                        "sdk_version": vembda_initiated_event.body.sdk_version,
-                    }
-                },
-            )
-            return
-        except Exception as e:
-            logger.exception("Error during workflow response stream generator", extra={"error": e})
-            yield "\n"
-            yield "END"
-            yield "\n"
-            return
-        finally:
-            if ENABLE_PROCESS_WRAPPER:
-                try:
-                    if process and process.is_alive():
-                        process.kill()
-                    if process:
-                        increment_process_count(-1)
-                        remove_active_span_id(span_id)
-                except Exception as e:
-                    logger.error("Failed to kill process", e)
-            else:
-                increment_process_count(-1)
-                remove_active_span_id(span_id)
-    resp = Response(
-        stream_with_context(generator()),
-        status=200,
-        content_type="application/x-ndjson",
-        headers=headers,
-    )
-    return resp
+    return workflow_events, vembda_initiated_event, process, span_id, headers
 @bp.route("/stream-node", methods=["POST"])
@@ -564,3 +656,12 @@ def startup_error_generator(
             },
         )
         return
+def _get_headers(context: WorkflowExecutorContext) -> dict[str, Union[str, Any]]:
+    headers = {
+        "X-Vellum-SDK-Version": get_version()["sdk_version"],
+        "X-Vellum-Server-Version": get_version()["server_version"],
+        "X-Vellum-Events-Emitted": str(is_events_emitting_enabled(context)),
+    }
+    return headers

{vellum_workflow_server-1.9.0.post2 → vellum_workflow_server-1.9.2}/src/workflow_server/core/workflow_executor_context.py RENAMED Viewed

@@ -37,6 +37,9 @@ class BaseExecutorContext(UniversalBaseModel):
     feature_flags: Optional[dict[str, bool]] = None
     is_new_server: bool = False
     trigger_id: Optional[UUID] = None
+    # The actual 'execution id' of the workflow that we pass into the workflow
+    # when running in async mode.
+    workflow_span_id: Optional[UUID] = None
     @field_validator("inputs", mode="before")
     @classmethod

{vellum_workflow_server-1.9.0.post2 → vellum_workflow_server-1.9.2}/src/workflow_server/start.py RENAMED Viewed

@@ -33,6 +33,7 @@ class CustomGunicornLogger(glogging.Logger):
         logger = logging.getLogger("gunicorn.access")
         logger.addFilter(HealthCheckFilter())
         logger.addFilter(SignalFilter())
+        logger.addFilter(StatusIsAvailableFilter())
 class HealthCheckFilter(logging.Filter):
@@ -45,6 +46,11 @@ class SignalFilter(logging.Filter):
         return "SIGTERM" not in record.getMessage()
+class StatusIsAvailableFilter(logging.Filter):
+    def filter(self, record: Any) -> bool:
+        return "/status/is_available" not in record.getMessage()
 def start() -> None:
     if not is_development():
         start_oom_killer_worker()