PyPI - vellum-workflow-server - Versions diffs - 1.9.7.post1__tar.gz → 1.12.0.post1__tar.gz - Mend

vellum-workflow-server 1.9.7.post1tar.gz → 1.12.0.post1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.12.0.post1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vellum-workflow-server
-Version: 1.9.7.post1
+Version: 1.12.0.post1
 Summary:
 License: AGPL
 Requires-Python: >=3.9.0,<4
@@ -24,18 +24,21 @@ Requires-Dist: cryptography (==43.0.3)
 Requires-Dist: flask (==2.3.3)
 Requires-Dist: gunicorn (==23.0.0)
 Requires-Dist: orderly-set (==5.2.2)
+Requires-Dist: orjson (==3.11.4)
 Requires-Dist: pebble (==5.0.7)
 Requires-Dist: pyjwt (==2.10.0)
-Requires-Dist: python-dotenv (==1.0.1)
+Requires-Dist: python-dotenv (==1.2.1)
 Requires-Dist: retrying (==1.3.4)
 Requires-Dist: sentry-sdk[flask] (==2.20.0)
-Requires-Dist: vellum-ai (==1.9.7)
+Requires-Dist: vellum-ai (==1.12.0)
 Description-Content-Type: text/markdown
 # Vellum Workflow Runner Server
 This package is meant for installing on container images in order to use custom docker images when using Vellum Workflows.
 ## Example Dockerfile Usage:
 ```
 FROM python:3.11.6-slim-bookworm
@@ -48,7 +51,6 @@ RUN pip install --upgrade pip
 RUN pip --no-cache-dir install vellum-workflow-server==0.13.2
 ENV PYTHONUNBUFFERED 1
-ENV PYTHONDONTWRITEBYTECODE 1
 COPY ./base-image/code_exec_entrypoint.sh .
 RUN chmod +x /code_exec_entrypoint.sh
@@ -56,5 +58,6 @@ CMD ["vellum_start_server"]
 ```
 ## Skipping Publishes
 If you wish to automatically skip publishing a new version when merging to main you can add a [skip-publish] to your commit message. This is useful if your changes are not time sensitive and can just go out with the next release. This avoids causing new services being created causing extra cold starts for our customers and also keeps our public versioning more tidy.

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.12.0.post1}/README.md RENAMED Viewed

@@ -1,7 +1,9 @@
 # Vellum Workflow Runner Server
 This package is meant for installing on container images in order to use custom docker images when using Vellum Workflows.
 ## Example Dockerfile Usage:
 ```
 FROM python:3.11.6-slim-bookworm
@@ -14,7 +16,6 @@ RUN pip install --upgrade pip
 RUN pip --no-cache-dir install vellum-workflow-server==0.13.2
 ENV PYTHONUNBUFFERED 1
-ENV PYTHONDONTWRITEBYTECODE 1
 COPY ./base-image/code_exec_entrypoint.sh .
 RUN chmod +x /code_exec_entrypoint.sh
@@ -22,4 +23,5 @@ CMD ["vellum_start_server"]
 ```
 ## Skipping Publishes
 If you wish to automatically skip publishing a new version when merging to main you can add a [skip-publish] to your commit message. This is useful if your changes are not time sensitive and can just go out with the next release. This avoids causing new services being created causing extra cold starts for our customers and also keeps our public versioning more tidy.

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.12.0.post1}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "vellum-workflow-server"
 [tool.poetry]
 name = "vellum-workflow-server"
-version = "1.9.7.post1"
+version = "1.12.0.post1"
 description = ""
 readme = "README.md"
 authors = []
@@ -45,8 +45,9 @@ flask = "2.3.3"
 orderly-set = "5.2.2"
 pebble = "5.0.7"
 gunicorn = "23.0.0"
-vellum-ai = "1.9.7"
-python-dotenv = "1.0.1"
+orjson = "3.11.4"
+vellum-ai = "1.12.0"
+python-dotenv = "1.2.1"
 retrying = "1.3.4"
 sentry-sdk = {extras = ["flask"], version = "2.20.0"}

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.12.0.post1}/src/workflow_server/api/tests/test_workflow_view.py RENAMED Viewed

@@ -389,7 +389,15 @@ class MyAdditionNode(BaseNode):
         },
         "id": "2464b610-fb6d-495b-b17c-933ee147f19f",
         "label": "My Addition Node",
-        "outputs": [{"id": "f39d85c9-e7bf-45e1-bb67-f16225db0118", "name": "result", "type": "NUMBER", "value": None}],
+        "outputs": [
+            {
+                "id": "f39d85c9-e7bf-45e1-bb67-f16225db0118",
+                "name": "result",
+                "type": "NUMBER",
+                "value": None,
+                "schema": {"type": "integer"},
+            }
+        ],
         "ports": [{"id": "bc489295-cd8a-4aa2-88bb-34446374100d", "name": "default", "type": "DEFAULT"}],
         "trigger": {"id": "ff580cad-73d6-44fe-8f2c-4b8dc990ee70", "merge_behavior": "AWAIT_ATTRIBUTES"},
         "type": "GENERIC",

vellum_workflow_server-1.12.0.post1/src/workflow_server/api/tests/test_workflow_view_async_exec.py ADDED Viewed

@@ -0,0 +1,410 @@
+import pytest
+import logging
+import time
+from uuid import uuid4
+from workflow_server.server import create_app
+from workflow_server.utils.system_utils import get_active_process_count
+@pytest.fixture(autouse=True)
+def drain_background_threads():
+    """
+    Ensures background threads from previous tests complete before starting the next test.
+    This prevents cross-test interference in process count assertions.
+    """
+    baseline = get_active_process_count()
+    yield
+    deadline = time.time() + 15
+    while time.time() < deadline:
+        current_count = get_active_process_count()
+        if current_count == baseline:
+            break
+        time.sleep(0.1)
+def test_async_exec_route__happy_path():
+    """
+    Tests that the async-exec route successfully accepts a valid workflow and returns immediately.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    # AND a valid workflow request
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+class Workflow(BaseWorkflow):
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+        },
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 200 response
+    assert response.status_code == 200
+    # AND the response should indicate success
+    assert response.json == {"success": True}
+def test_async_exec_route__with_inputs():
+    """
+    Tests that the async-exec route handles workflows with inputs correctly.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    # AND a valid workflow request with inputs
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [
+            {"name": "foo", "type": "STRING", "value": "hello"},
+        ],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+from vellum.workflows.state import BaseState
+from .inputs import Inputs
+class Workflow(BaseWorkflow[Inputs, BaseState]):
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+            "inputs.py": """\
+from vellum.workflows.inputs import BaseInputs
+class Inputs(BaseInputs):
+    foo: str
+""",
+        },
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 200 response
+    assert response.status_code == 200
+    # AND the response should indicate success
+    assert response.json == {"success": True}
+def test_async_exec_route__with_state():
+    """
+    Tests that the async-exec route handles workflows with state correctly.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    # AND a valid workflow request with state
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "state": {"foo": "bar"},
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+from vellum.workflows.inputs import BaseInputs
+from .state import State
+class Workflow(BaseWorkflow[BaseInputs, State]):
+    class Outputs(BaseWorkflow.Outputs):
+        foo = State.foo
+""",
+            "state.py": """\
+from vellum.workflows.state import BaseState
+class State(BaseState):
+    foo: str
+""",
+        },
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 200 response
+    assert response.status_code == 200
+    # AND the response should indicate success
+    assert response.json == {"success": True}
+def test_async_exec_route__invalid_context():
+    """
+    Tests that the async-exec route returns 400 for invalid request context.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    # AND an invalid request missing required fields
+    request_body = {
+        "inputs": [],
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 400 response
+    assert response.status_code == 400
+    # AND the response should contain error details
+    assert "detail" in response.json
+    assert "Invalid context" in response.json["detail"]
+def test_async_exec_route__missing_files():
+    """
+    Tests that the async-exec route returns 400 when files are missing.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 400 response
+    assert response.status_code == 400
+    # AND the response should contain error details
+    assert "detail" in response.json
+    assert "Invalid context" in response.json["detail"]
+def test_async_exec_route__with_syntax_error_in_workflow():
+    """
+    Tests that the async-exec route handles workflows with syntax errors gracefully.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+class Workflow(BaseWorkflow)
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+        },
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 200 response (async execution is accepted)
+    assert response.status_code == 200
+    # AND the response should indicate success
+    assert response.json == {"success": True}
+def test_async_exec_route__with_invalid_inputs():
+    """
+    Tests that the async-exec route handles workflows with invalid inputs gracefully.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+from vellum.workflows.state import BaseState
+from .inputs import Inputs
+class Workflow(BaseWorkflow[Inputs, BaseState]):
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+            "inputs.py": """\
+from vellum.workflows.inputs import BaseInputs
+class Inputs(BaseInputs):
+    foo: str
+""",
+        },
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 200 response (async execution is accepted)
+    assert response.status_code == 200
+    # AND the response should indicate success
+    assert response.json == {"success": True}
+def test_async_exec_route__background_thread_completes(caplog):
+    """
+    Verifies that the async background worker thread runs to completion.
+    """
+    # GIVEN a Flask application with log capture enabled
+    caplog.set_level(logging.INFO, logger="workflow_server.api.workflow_view")
+    flask_app = create_app()
+    baseline = get_active_process_count()
+    # AND a valid workflow request
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+class Workflow(BaseWorkflow):
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+        },
+    }
+    # WHEN we call the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we get immediate acceptance
+    assert response.status_code == 200
+    assert response.json == {"success": True}
+    # AND the background thread should complete
+    completion_deadline = time.time() + 15
+    saw_completion_log = False
+    while time.time() < completion_deadline:
+        if any("Workflow async exec completed" in rec.message for rec in caplog.records):
+            saw_completion_log = True
+            break
+        time.sleep(0.1)
+    # THEN we should observe the completion log
+    assert saw_completion_log, "Did not observe background completion log within 15 seconds"
+    cleanup_deadline = time.time() + 15
+    process_count_returned = False
+    while time.time() < cleanup_deadline:
+        current_count = get_active_process_count()
+        if current_count == baseline:
+            process_count_returned = True
+            break
+        time.sleep(0.1)
+    current_count = get_active_process_count()
+    assert process_count_returned, (
+        f"Process count did not return to baseline within 15 seconds after completion log. "
+        f"Expected: {baseline}, Current: {current_count}"
+    )
+def test_async_exec_route__background_thread_completes_on_error(caplog):
+    """
+    Verifies that the background worker completes even when the workflow fails early.
+    """
+    # GIVEN a Flask application with log capture enabled
+    caplog.set_level(logging.INFO, logger="workflow_server.api.workflow_view")
+    flask_app = create_app()
+    baseline = get_active_process_count()
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+class Workflow(BaseWorkflow)
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+        },
+    }
+    # WHEN we call the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we get immediate acceptance
+    assert response.status_code == 200
+    assert response.json == {"success": True}
+    # AND the background thread should complete and clean up resources
+    deadline = time.time() + 15
+    process_count_returned = False
+    while time.time() < deadline:
+        current_count = get_active_process_count()
+        if current_count == baseline:
+            process_count_returned = True
+            break
+        time.sleep(0.1)
+    current_count = get_active_process_count()
+    assert process_count_returned, (
+        f"Process count did not return to baseline on error within 15 seconds. "
+        f"Expected: {baseline}, Current: {current_count}"
+    )

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.12.0.post1}/src/workflow_server/api/tests/test_workflow_view_stream_workflow_route.py RENAMED Viewed

@@ -549,7 +549,10 @@ class Inputs(BaseInputs):
     # AND the third event should be workflow execution rejected
     assert events[2]["name"] == "workflow.execution.rejected"
     assert events[1]["span_id"] == events[2]["span_id"]
-    assert "Required input variables foo should have defined value" in events[2]["body"]["error"]["message"]
+    actual_error_message = events[2]["body"]["error"]["message"]
+    assert "Required input variables" in actual_error_message
+    assert "foo" in actual_error_message
+    assert "should have defined value" in actual_error_message
     # AND the fourth event should be vembda execution fulfilled
     assert events[3]["name"] == "vembda.execution.fulfilled"
@@ -1241,3 +1244,145 @@ class InvalidWorkflow(BaseWorkflow):
     assert events[3]["name"] == "vembda.execution.fulfilled"
     assert events[3]["span_id"] == str(span_id)
     assert events[3]["body"]["exit_code"] == 0
+@mock.patch("workflow_server.api.workflow_view.get_is_oom_killed")
+def test_stream_workflow_route__oom_does_not_set_timed_out_flag(mock_get_is_oom_killed):
+    """
+    Tests that when an OOM error occurs, we don't set the timed_out flag in the vembda fulfilled event.
+    """
+    # GIVEN a workflow that takes some time to execute
+    span_id = uuid4()
+    request_body = {
+        "timeout": 10,
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+import time
+from vellum.workflows.nodes.bases.base import BaseNode
+from vellum.workflows.workflows.base import BaseWorkflow
+class SlowNode(BaseNode):
+    class Outputs(BaseNode.Outputs):
+        value: str
+    def run(self) -> Outputs:
+        time.sleep(2)
+        return self.Outputs(value="hello world")
+class OOMWorkflow(BaseWorkflow):
+    graph = SlowNode
+    class Outputs(BaseWorkflow.Outputs):
+        final_value = SlowNode.Outputs.value
+""",
+        },
+    }
+    # WHEN we mock the OOM killer to trigger after a few checks
+    call_count = [0]
+    def mock_oom_side_effect():
+        call_count[0] += 1
+        if call_count[0] > 3:
+            return True
+        return False
+    mock_get_is_oom_killed.side_effect = mock_oom_side_effect
+    # AND we call the stream route
+    status_code, events = flask_stream(request_body)
+    # THEN we get a 200 response
+    assert status_code == 200
+    # AND we get the expected events
+    event_names = [e["name"] for e in events]
+    assert "vembda.execution.initiated" in event_names
+    # THEN the key assertion: if there's a vembda.execution.fulfilled event, it should NOT have timed_out=True
+    vembda_fulfilled_event = next(e for e in events if e["name"] == "vembda.execution.fulfilled")
+    assert (
+        vembda_fulfilled_event["body"].get("timed_out") is not True
+    ), "timed_out flag should not be set when OOM occurs"
+@mock.patch("workflow_server.api.workflow_view.ENABLE_PROCESS_WRAPPER", False)
+def test_stream_workflow_route__client_disconnect_emits_rejected_event():
+    """
+    Tests that when a client disconnects mid-stream (GeneratorExit), we emit a workflow execution
+    rejected event to the events.create API.
+    """
+    # GIVEN a valid request body for a workflow that yields multiple events
+    span_id = uuid4()
+    trace_id = uuid4()
+    request_body = {
+        "timeout": 360,
+        "execution_id": str(span_id),
+        "execution_context": {
+            "trace_id": str(trace_id),
+        },
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+class Workflow(BaseWorkflow):
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+        },
+    }
+    # AND a mock to capture events.create calls
+    events_create_calls = []
+    def mock_events_create(request):
+        events_create_calls.append(request)
+    # WHEN we call the stream route and simulate a client disconnect
+    flask_app = create_app()
+    with flask_app.test_client() as test_client:
+        with mock.patch("workflow_server.core.workflow_executor_context.create_vellum_client") as mock_create_client:
+            mock_client = mock.MagicMock()
+            mock_client.events.create = mock_events_create
+            mock_create_client.return_value = mock_client
+            response = test_client.post("/workflow/stream", json=request_body)
+            # Get the response iterator and consume a few chunks to start the stream
+            response_iter = response.response
+            next(response_iter)
+            # Close the response to trigger GeneratorExit
+            response_iter.close()
+    # THEN the events.create API should have been called with rejected event
+    assert len(events_create_calls) > 0, "events.create should have been called on client disconnect"
+    # AND the call should include a workflow.execution.rejected event (sent as SDK event model)
+    last_call = events_create_calls[-1]
+    assert isinstance(last_call, list), "events.create should be called with a list"
+    assert len(last_call) == 1, "Should have exactly one rejected event"
+    rejected_event = last_call[0]
+    assert rejected_event.name == "workflow.execution.rejected", "Should be a rejected event"
+    # AND the rejected event should have the correct error message
+    assert "client disconnected" in rejected_event.body.error.message.lower()
+    # AND the rejected event should have a workflow_definition
+    # TODO: In the future, we should capture the real workflow_definition from the initiated event.
+    # For now, we use BaseWorkflow as a placeholder.
+    assert rejected_event.body.workflow_definition is not None, "Should have a workflow_definition"

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.12.0.post1}/src/workflow_server/api/workflow_view.py RENAMED Viewed

@@ -15,12 +15,21 @@ from uuid import uuid4
 from typing import Any, Dict, Generator, Iterator, Optional, Union, cast
 from flask import Blueprint, Response, current_app as app, request, stream_with_context
+import orjson
 from pydantic import ValidationError
 from vellum_ee.workflows.display.nodes.get_node_display_class import get_node_display_class
 from vellum_ee.workflows.display.types import WorkflowDisplayContext
 from vellum_ee.workflows.display.workflows import BaseWorkflowDisplay
 from vellum_ee.workflows.server.virtual_file_loader import VirtualFileFinder
+from vellum.workflows import BaseWorkflow
+from vellum.workflows.errors import WorkflowError, WorkflowErrorCode
+from vellum.workflows.events.workflow import (
+    WorkflowExecutionInitiatedBody,
+    WorkflowExecutionInitiatedEvent,
+    WorkflowExecutionRejectedBody,
+    WorkflowExecutionRejectedEvent,
+)
 from vellum.workflows.exceptions import WorkflowInitializationException
 from vellum.workflows.nodes import BaseNode
 from vellum.workflows.vellum_client import create_vellum_client
@@ -115,7 +124,7 @@ def stream_workflow_route() -> Response:
             for row in workflow_events:
                 yield "\n"
                 if isinstance(row, dict):
-                    dump = json.dumps(row)
+                    dump = orjson.dumps(row).decode("utf-8")
                     yield dump
                 else:
                     yield row
@@ -134,7 +143,7 @@ def stream_workflow_route() -> Response:
             # These can happen either from Vembda disconnects (possibily from predict disconnects) or
             # from knative activator gateway timeouts which are caused by idleTimeout or responseStartSeconds
             # being exceeded.
-            app.logger.error(
+            app.logger.warning(
                 "Client disconnected in the middle of the Workflow Stream",
                 extra={
                     "sentry_tags": {
@@ -143,6 +152,11 @@ def stream_workflow_route() -> Response:
                     }
                 },
             )
+            _emit_client_disconnect_events(
+                context,
+                span_id,
+                "Client disconnected in the middle of the Workflow Stream",
+            )
             return
         except Exception as e:
             logger.exception("Error during workflow response stream generator", extra={"error": e})
@@ -173,6 +187,75 @@ def stream_workflow_route() -> Response:
     return resp
+def _emit_async_error_events(
+    context: WorkflowExecutorContext, error_message: str, stacktrace: Optional[str] = None
+) -> None:
+    """
+    Emit workflow execution error events when async execution fails before or during workflow startup.
+    This ensures that errors in async mode are properly reported to Vellum's events API,
+    making them visible in the executions UI.
+    """
+    try:
+        workflow_span_id = context.workflow_span_id or str(uuid4())
+        initiated_event = WorkflowExecutionInitiatedEvent[Any, Any](
+            trace_id=context.trace_id,
+            span_id=workflow_span_id,
+            body=WorkflowExecutionInitiatedBody(inputs=context.inputs),
+            parent=context.execution_context.parent_context if context.execution_context else None,
+        )
+        rejected_event = WorkflowExecutionRejectedEvent(
+            trace_id=context.trace_id,
+            span_id=workflow_span_id,
+            body=WorkflowExecutionRejectedBody(
+                error=WorkflowError(
+                    message=error_message,
+                    code=WorkflowErrorCode.INTERNAL_ERROR,
+                ),
+                stacktrace=stacktrace,
+            ),
+            parent=context.execution_context.parent_context if context.execution_context else None,
+        )
+        context.vellum_client.events.create(request=[initiated_event, rejected_event])  # type: ignore[list-item]
+    except Exception as e:
+        logger.exception(f"Failed to emit async error events: {e}")
+def _emit_client_disconnect_events(
+    context: WorkflowExecutorContext,
+    workflow_span_id: str,
+    error_message: str,
+) -> None:
+    """
+    Emit workflow execution rejected event when a client disconnects mid-stream.
+    Since the workflow has already started streaming (the initiated event was already emitted),
+    we only need to emit the rejected event to properly close out the execution.
+    """
+    try:
+        # TODO: In the future, we should capture the real workflow_definition from the initiated event
+        # For now, we use BaseWorkflow as a placeholder
+        rejected_event = WorkflowExecutionRejectedEvent(
+            trace_id=context.trace_id,
+            span_id=workflow_span_id,
+            body=WorkflowExecutionRejectedBody(
+                workflow_definition=BaseWorkflow,
+                error=WorkflowError(
+                    message=error_message,
+                    code=WorkflowErrorCode.WORKFLOW_CANCELLED,
+                ),
+            ),
+            parent=context.execution_context.parent_context if context.execution_context else None,
+        )
+        context.vellum_client.events.create(request=[rejected_event])  # type: ignore[list-item]
+    except Exception as e:
+        logger.exception(f"Failed to emit client disconnect events: {e}")
 @bp.route("/async-exec", methods=["POST"])
 def async_exec_workflow() -> Response:
     data = request.get_json()
@@ -207,8 +290,8 @@ def async_exec_workflow() -> Response:
         try:
             start_workflow_result = _start_workflow(context)
             if isinstance(start_workflow_result, Response):
-                # TODO same here, should return this response as en event or it will get yeeted to the nether
-                # return start_workflow_result
+                error_detail = start_workflow_result.get_json().get("detail", "Unknown error during workflow startup")
+                _emit_async_error_events(context, error_detail)
                 return
             workflow_events, vembda_initiated_event, process, span_id, headers = start_workflow_result
@@ -222,6 +305,7 @@ def async_exec_workflow() -> Response:
             )
         except Exception as e:
             logger.exception("Error during workflow async background worker", e)
+            _emit_async_error_events(context, str(e), traceback.format_exc())
         finally:
             if ENABLE_PROCESS_WRAPPER:
                 try:
@@ -500,11 +584,11 @@ def stream_node_route() -> Response:
                 break
     def generator() -> Generator[str, None, None]:
-        yield json.dumps(vembda_initiated_event.model_dump(mode="json"))
+        yield orjson.dumps(vembda_initiated_event.model_dump(mode="json")).decode("utf-8")
         for row in node_events():
             yield "\n"
-            yield json.dumps(row)
+            yield orjson.dumps(row).decode("utf-8")
     headers = {
         "X-Vellum-SDK-Version": vembda_initiated_event.body.sdk_version,
@@ -530,11 +614,18 @@ def serialize_route() -> Response:
     is_new_server = data.get("is_new_server", False)
     module = data.get("module")
+    headers = {
+        "X-Vellum-Is-New-Server": str(is_new_server).lower(),
+    }
     if not files:
+        error_message = "No files received"
+        logger.warning(error_message)
         return Response(
-            json.dumps({"detail": "No files received"}),
+            json.dumps({"detail": error_message}),
             status=400,
             content_type="application/json",
+            headers=headers,
         )
     client = create_vellum_client(api_key=workspace_api_key)
@@ -543,10 +634,6 @@ def serialize_route() -> Response:
     namespace = get_random_namespace()
     virtual_finder = VirtualFileFinder(files, namespace, source_module=module)
-    headers = {
-        "X-Vellum-Is-New-Server": str(is_new_server).lower(),
-    }
     try:
         sys.meta_path.append(virtual_finder)
         result = BaseWorkflowDisplay.serialize_module(namespace, client=client, dry_run=True)

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.12.0.post1}/src/workflow_server/code_exec_runner.py RENAMED Viewed

@@ -1,11 +1,12 @@
 from datetime import datetime
-import json
 import logging
 import os
 from threading import Event as ThreadingEvent
 from uuid import uuid4
 from typing import Optional
+import orjson
 from workflow_server.core.events import VembdaExecutionInitiatedBody, VembdaExecutionInitiatedEvent
 from workflow_server.core.executor import stream_workflow
 from workflow_server.core.utils import serialize_vembda_rejected_event
@@ -29,7 +30,7 @@ def run_code_exec_stream() -> None:
         split_input = input_raw.split("\n--vellum-input-stop--\n")
         input_json = split_input[0]
-        input_data = json.loads(input_json)
+        input_data = orjson.loads(input_json)
         context = WorkflowExecutorContext.model_validate(input_data)
         print("--vellum-output-start--")  # noqa: T201
@@ -53,7 +54,7 @@ def run_code_exec_stream() -> None:
             cancel_signal=ThreadingEvent(),
         )
         for line in stream_iterator:
-            print(f"{_EVENT_LINE}{json.dumps(line)}")  # noqa: T201
+            print(f"{_EVENT_LINE}{orjson.dumps(line).decode('utf-8')}")  # noqa: T201
     except Exception as e:
         logger.exception(e)

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.12.0.post1}/src/workflow_server/core/cancel_workflow.py RENAMED Viewed

@@ -14,14 +14,18 @@ logger = logging.getLogger(__name__)
 def get_is_workflow_cancelled(execution_id: UUID, vembda_public_url: Optional[str]) -> bool:
-    response = requests.get(
-        f"{vembda_public_url}/vembda-public/cancel-workflow-execution-status/{execution_id}",
-        headers={"Accept": "application/json"},
-        timeout=5,
-    )
-    response.raise_for_status()
+    try:
+        response = requests.get(
+            f"{vembda_public_url}/vembda-public/cancel-workflow-execution-status/{execution_id}",
+            headers={"Accept": "application/json"},
+            timeout=5,
+        )
+        response.raise_for_status()
-    return response.json().get("cancelled")
+        return response.json().get("cancelled", False)
+    except Exception:
+        logger.exception("Error checking workflow cancellation status")
+        return False
 class CancelWorkflowWatcherThread(Thread):

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.12.0.post1}/src/workflow_server/core/executor.py RENAMED Viewed

@@ -1,6 +1,5 @@
 from datetime import datetime, timezone
 from io import StringIO
-import json
 import logging
 from multiprocessing import Process, Queue
 import os
@@ -11,9 +10,11 @@ from threading import Event as ThreadingEvent
 import time
 from traceback import format_exc
 from uuid import UUID, uuid4
-from typing import Any, Callable, Generator, Iterator, Optional, Tuple, Type
+from typing import Any, Callable, Generator, Iterator, Optional, Tuple
+import orjson
 from vellum_ee.workflows.display.utils.events import event_enricher
+from vellum_ee.workflows.display.utils.expressions import base_descriptor_validator
 from vellum_ee.workflows.server.virtual_file_loader import VirtualFileFinder
 from vellum.workflows import BaseWorkflow
@@ -104,7 +105,7 @@ def _stream_workflow_wrapper(
         span_id_emitted = True
         for event in stream_iterator:
-            queue.put(json.dumps(event))
+            queue.put(orjson.dumps(event).decode("utf-8"))
     except Exception as e:
         if not span_id_emitted:
@@ -177,6 +178,7 @@ def stream_workflow(
         node_output_mocks = MockNodeExecution.validate_all(
             executor_context.node_output_mocks,
             workflow.__class__,
+            descriptor_validator=base_descriptor_validator,
         )
         cancel_signal = cancel_signal or ThreadingEvent()
@@ -192,6 +194,7 @@ def stream_workflow(
             timeout=executor_context.timeout,
             trigger=trigger,
             execution_id=executor_context.workflow_span_id,
+            event_max_size=executor_context.event_max_size,
         )
     except WorkflowInitializationException as e:
         cancel_watcher_kill_switch.set()
@@ -273,32 +276,11 @@ def stream_node(
     disable_redirect: bool = True,
 ) -> Iterator[dict]:
     workflow, namespace = _create_workflow(executor_context)
-    Node: Optional[Type[BaseNode]] = None
-    for workflow_node in workflow.get_nodes():
-        if executor_context.node_id and workflow_node.__id__ == executor_context.node_id:
-            Node = workflow_node
-            break
-        elif (
-            executor_context.node_module
-            and executor_context.node_name
-            and workflow_node.__name__ == executor_context.node_name
-            and workflow_node.__module__ == f"{namespace}.{executor_context.node_module}"
-        ):
-            Node = workflow_node
-            break
-    if not Node:
-        identifier = executor_context.node_id or f"{executor_context.node_module}.{executor_context.node_name}"
-        raise WorkflowInitializationException(
-            message=f"Node '{identifier}' not found in workflow",
-            workflow_definition=workflow.__class__,
-        )
     def call_node() -> Generator[dict[str, Any], Any, None]:
         executor_context.stream_start_time = time.time_ns()
-        for event in workflow.run_node(Node, inputs=executor_context.inputs):  # type: ignore[arg-type]
+        for event in workflow.run_node(executor_context.node_ref, inputs=executor_context.inputs):
             yield event.model_dump(mode="json")
     return _call_stream(

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.12.0.post1}/src/workflow_server/core/utils.py RENAMED Viewed

@@ -2,6 +2,7 @@ from datetime import datetime
 from uuid import uuid4
 from typing import Optional
+from workflow_server.config import IS_ASYNC_MODE
 from workflow_server.core.events import VembdaExecutionFulfilledBody, VembdaExecutionFulfilledEvent
 from workflow_server.core.workflow_executor_context import BaseExecutorContext
@@ -46,6 +47,9 @@ def serialize_vembda_rejected_event(
 def is_events_emitting_enabled(executor_context: Optional[BaseExecutorContext]) -> bool:
+    if IS_ASYNC_MODE:
+        return True
     if not executor_context:
         return False

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.12.0.post1}/src/workflow_server/core/workflow_executor_context.py RENAMED Viewed

@@ -3,7 +3,7 @@ from functools import cached_property
 import os
 import time
 from uuid import UUID
-from typing import Any, Optional
+from typing import Any, Optional, Union
 from typing_extensions import Self
 from flask import has_request_context, request
@@ -41,6 +41,7 @@ class BaseExecutorContext(UniversalBaseModel):
     # when running in async mode.
     workflow_span_id: Optional[UUID] = None
     vembda_service_initiated_timestamp: Optional[int] = None
+    event_max_size: Optional[int] = None
     @field_validator("inputs", mode="before")
     @classmethod
@@ -91,6 +92,18 @@ class NodeExecutorContext(BaseExecutorContext):
     node_module: Optional[str] = None
     node_name: Optional[str] = None
+    @property
+    def node_ref(self) -> Union[UUID, str]:
+        """
+        Returns the node reference for use with workflow.run_node().
+        Returns node_id if it exists, otherwise returns the combination
+        of node_module and node_name as a fully qualified string.
+        """
+        if self.node_id:
+            return self.node_id
+        return f"{self.node_module}.{self.node_name}"
     @model_validator(mode="after")
     def validate_node_identification(self) -> Self:
         if not self.node_id and not (self.node_module and self.node_name):

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.12.0.post1}/src/workflow_server/start.py RENAMED Viewed

@@ -64,8 +64,8 @@ def start() -> None:
         "workers": int(os.getenv("GUNICORN_WORKERS", 2)),
         "threads": int(os.getenv("GUNICORN_THREADS", 9 if ENABLE_PROCESS_WRAPPER else 6)),
         # Aggressively try to avoid memory leaks when using non process mode
-        "max_requests": 120 if ENABLE_PROCESS_WRAPPER else 20,
-        "max_requests_jitter": 30 if ENABLE_PROCESS_WRAPPER else 10,
+        "max_requests": int(os.getenv("GUNICORN_MAX_REQUESTS", 120 if ENABLE_PROCESS_WRAPPER else 20)),
+        "max_requests_jitter": int(os.getenv("GUNICORN_MAX_REQUESTS_JITTER", 30 if ENABLE_PROCESS_WRAPPER else 10)),
         "worker_class": "gthread",
         "timeout": max_workflow_runtime_seconds,
         "logger_class": CustomGunicornLogger,

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.12.0.post1}/src/workflow_server/utils/utils.py RENAMED Viewed

@@ -59,10 +59,19 @@ def convert_json_inputs_to_vellum(inputs: List[dict]) -> dict:
 def get_version() -> dict:
+    # Return hotswappable lock file so we can save it and reuse it
+    lock_file = None
+    try:
+        with open("/app/uv.lock", "r") as f:
+            lock_file = f.read()
+    except Exception:
+        pass
     return {
         "sdk_version": version("vellum-ai"),
         "server_version": "local" if is_development() else version("vellum-workflow-server"),
         "container_image": CONTAINER_IMAGE,
+        "lock_file": lock_file,
     }