PyPI - vellum-workflow-server - Versions diffs - 1.9.7.post1__tar.gz → 1.11.0.post1__tar.gz - Mend

vellum-workflow-server 1.9.7.post1tar.gz → 1.11.0.post1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.11.0.post1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vellum-workflow-server
-Version: 1.9.7.post1
+Version: 1.11.0.post1
 Summary:
 License: AGPL
 Requires-Python: >=3.9.0,<4
@@ -24,12 +24,13 @@ Requires-Dist: cryptography (==43.0.3)
 Requires-Dist: flask (==2.3.3)
 Requires-Dist: gunicorn (==23.0.0)
 Requires-Dist: orderly-set (==5.2.2)
+Requires-Dist: orjson (==3.11.4)
 Requires-Dist: pebble (==5.0.7)
 Requires-Dist: pyjwt (==2.10.0)
-Requires-Dist: python-dotenv (==1.0.1)
+Requires-Dist: python-dotenv (==1.2.1)
 Requires-Dist: retrying (==1.3.4)
 Requires-Dist: sentry-sdk[flask] (==2.20.0)
-Requires-Dist: vellum-ai (==1.9.7)
+Requires-Dist: vellum-ai (==1.11.0)
 Description-Content-Type: text/markdown
 # Vellum Workflow Runner Server

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.11.0.post1}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "vellum-workflow-server"
 [tool.poetry]
 name = "vellum-workflow-server"
-version = "1.9.7.post1"
+version = "1.11.0.post1"
 description = ""
 readme = "README.md"
 authors = []
@@ -45,8 +45,9 @@ flask = "2.3.3"
 orderly-set = "5.2.2"
 pebble = "5.0.7"
 gunicorn = "23.0.0"
-vellum-ai = "1.9.7"
-python-dotenv = "1.0.1"
+orjson = "3.11.4"
+vellum-ai = "1.11.0"
+python-dotenv = "1.2.1"
 retrying = "1.3.4"
 sentry-sdk = {extras = ["flask"], version = "2.20.0"}

vellum_workflow_server-1.11.0.post1/src/workflow_server/api/tests/test_workflow_view_async_exec.py ADDED Viewed

@@ -0,0 +1,410 @@
+import pytest
+import logging
+import time
+from uuid import uuid4
+from workflow_server.server import create_app
+from workflow_server.utils.system_utils import get_active_process_count
+@pytest.fixture(autouse=True)
+def drain_background_threads():
+    """
+    Ensures background threads from previous tests complete before starting the next test.
+    This prevents cross-test interference in process count assertions.
+    """
+    baseline = get_active_process_count()
+    yield
+    deadline = time.time() + 15
+    while time.time() < deadline:
+        current_count = get_active_process_count()
+        if current_count == baseline:
+            break
+        time.sleep(0.1)
+def test_async_exec_route__happy_path():
+    """
+    Tests that the async-exec route successfully accepts a valid workflow and returns immediately.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    # AND a valid workflow request
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+class Workflow(BaseWorkflow):
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+        },
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 200 response
+    assert response.status_code == 200
+    # AND the response should indicate success
+    assert response.json == {"success": True}
+def test_async_exec_route__with_inputs():
+    """
+    Tests that the async-exec route handles workflows with inputs correctly.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    # AND a valid workflow request with inputs
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [
+            {"name": "foo", "type": "STRING", "value": "hello"},
+        ],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+from vellum.workflows.state import BaseState
+from .inputs import Inputs
+class Workflow(BaseWorkflow[Inputs, BaseState]):
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+            "inputs.py": """\
+from vellum.workflows.inputs import BaseInputs
+class Inputs(BaseInputs):
+    foo: str
+""",
+        },
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 200 response
+    assert response.status_code == 200
+    # AND the response should indicate success
+    assert response.json == {"success": True}
+def test_async_exec_route__with_state():
+    """
+    Tests that the async-exec route handles workflows with state correctly.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    # AND a valid workflow request with state
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "state": {"foo": "bar"},
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+from vellum.workflows.inputs import BaseInputs
+from .state import State
+class Workflow(BaseWorkflow[BaseInputs, State]):
+    class Outputs(BaseWorkflow.Outputs):
+        foo = State.foo
+""",
+            "state.py": """\
+from vellum.workflows.state import BaseState
+class State(BaseState):
+    foo: str
+""",
+        },
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 200 response
+    assert response.status_code == 200
+    # AND the response should indicate success
+    assert response.json == {"success": True}
+def test_async_exec_route__invalid_context():
+    """
+    Tests that the async-exec route returns 400 for invalid request context.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    # AND an invalid request missing required fields
+    request_body = {
+        "inputs": [],
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 400 response
+    assert response.status_code == 400
+    # AND the response should contain error details
+    assert "detail" in response.json
+    assert "Invalid context" in response.json["detail"]
+def test_async_exec_route__missing_files():
+    """
+    Tests that the async-exec route returns 400 when files are missing.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 400 response
+    assert response.status_code == 400
+    # AND the response should contain error details
+    assert "detail" in response.json
+    assert "Invalid context" in response.json["detail"]
+def test_async_exec_route__with_syntax_error_in_workflow():
+    """
+    Tests that the async-exec route handles workflows with syntax errors gracefully.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+class Workflow(BaseWorkflow)
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+        },
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 200 response (async execution is accepted)
+    assert response.status_code == 200
+    # AND the response should indicate success
+    assert response.json == {"success": True}
+def test_async_exec_route__with_invalid_inputs():
+    """
+    Tests that the async-exec route handles workflows with invalid inputs gracefully.
+    """
+    # GIVEN a Flask application
+    flask_app = create_app()
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+from vellum.workflows.state import BaseState
+from .inputs import Inputs
+class Workflow(BaseWorkflow[Inputs, BaseState]):
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+            "inputs.py": """\
+from vellum.workflows.inputs import BaseInputs
+class Inputs(BaseInputs):
+    foo: str
+""",
+        },
+    }
+    # WHEN we make a request to the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we should get a 200 response (async execution is accepted)
+    assert response.status_code == 200
+    # AND the response should indicate success
+    assert response.json == {"success": True}
+def test_async_exec_route__background_thread_completes(caplog):
+    """
+    Verifies that the async background worker thread runs to completion.
+    """
+    # GIVEN a Flask application with log capture enabled
+    caplog.set_level(logging.INFO, logger="workflow_server.api.workflow_view")
+    flask_app = create_app()
+    baseline = get_active_process_count()
+    # AND a valid workflow request
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+class Workflow(BaseWorkflow):
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+        },
+    }
+    # WHEN we call the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we get immediate acceptance
+    assert response.status_code == 200
+    assert response.json == {"success": True}
+    # AND the background thread should complete
+    completion_deadline = time.time() + 15
+    saw_completion_log = False
+    while time.time() < completion_deadline:
+        if any("Workflow async exec completed" in rec.message for rec in caplog.records):
+            saw_completion_log = True
+            break
+        time.sleep(0.1)
+    # THEN we should observe the completion log
+    assert saw_completion_log, "Did not observe background completion log within 15 seconds"
+    cleanup_deadline = time.time() + 15
+    process_count_returned = False
+    while time.time() < cleanup_deadline:
+        current_count = get_active_process_count()
+        if current_count == baseline:
+            process_count_returned = True
+            break
+        time.sleep(0.1)
+    current_count = get_active_process_count()
+    assert process_count_returned, (
+        f"Process count did not return to baseline within 15 seconds after completion log. "
+        f"Expected: {baseline}, Current: {current_count}"
+    )
+def test_async_exec_route__background_thread_completes_on_error(caplog):
+    """
+    Verifies that the background worker completes even when the workflow fails early.
+    """
+    # GIVEN a Flask application with log capture enabled
+    caplog.set_level(logging.INFO, logger="workflow_server.api.workflow_view")
+    flask_app = create_app()
+    baseline = get_active_process_count()
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+class Workflow(BaseWorkflow)
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+        },
+    }
+    # WHEN we call the async-exec route
+    with flask_app.test_client() as test_client:
+        response = test_client.post("/workflow/async-exec", json=request_body)
+    # THEN we get immediate acceptance
+    assert response.status_code == 200
+    assert response.json == {"success": True}
+    # AND the background thread should complete and clean up resources
+    deadline = time.time() + 15
+    process_count_returned = False
+    while time.time() < deadline:
+        current_count = get_active_process_count()
+        if current_count == baseline:
+            process_count_returned = True
+            break
+        time.sleep(0.1)
+    current_count = get_active_process_count()
+    assert process_count_returned, (
+        f"Process count did not return to baseline on error within 15 seconds. "
+        f"Expected: {baseline}, Current: {current_count}"
+    )

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.11.0.post1}/src/workflow_server/api/tests/test_workflow_view_stream_workflow_route.py RENAMED Viewed

@@ -1241,3 +1241,72 @@ class InvalidWorkflow(BaseWorkflow):
     assert events[3]["name"] == "vembda.execution.fulfilled"
     assert events[3]["span_id"] == str(span_id)
     assert events[3]["body"]["exit_code"] == 0
+@mock.patch("workflow_server.api.workflow_view.get_is_oom_killed")
+def test_stream_workflow_route__oom_does_not_set_timed_out_flag(mock_get_is_oom_killed):
+    """
+    Tests that when an OOM error occurs, we don't set the timed_out flag in the vembda fulfilled event.
+    """
+    # GIVEN a workflow that takes some time to execute
+    span_id = uuid4()
+    request_body = {
+        "timeout": 10,
+        "execution_id": str(span_id),
+        "inputs": [],
+        "environment_api_key": "test",
+        "module": "workflow",
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+import time
+from vellum.workflows.nodes.bases.base import BaseNode
+from vellum.workflows.workflows.base import BaseWorkflow
+class SlowNode(BaseNode):
+    class Outputs(BaseNode.Outputs):
+        value: str
+    def run(self) -> Outputs:
+        time.sleep(2)
+        return self.Outputs(value="hello world")
+class OOMWorkflow(BaseWorkflow):
+    graph = SlowNode
+    class Outputs(BaseWorkflow.Outputs):
+        final_value = SlowNode.Outputs.value
+""",
+        },
+    }
+    # WHEN we mock the OOM killer to trigger after a few checks
+    call_count = [0]
+    def mock_oom_side_effect():
+        call_count[0] += 1
+        if call_count[0] > 3:
+            return True
+        return False
+    mock_get_is_oom_killed.side_effect = mock_oom_side_effect
+    # AND we call the stream route
+    status_code, events = flask_stream(request_body)
+    # THEN we get a 200 response
+    assert status_code == 200
+    # AND we get the expected events
+    event_names = [e["name"] for e in events]
+    assert "vembda.execution.initiated" in event_names
+    # THEN the key assertion: if there's a vembda.execution.fulfilled event, it should NOT have timed_out=True
+    vembda_fulfilled_event = next(e for e in events if e["name"] == "vembda.execution.fulfilled")
+    assert (
+        vembda_fulfilled_event["body"].get("timed_out") is not True
+    ), "timed_out flag should not be set when OOM occurs"

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.11.0.post1}/src/workflow_server/api/workflow_view.py RENAMED Viewed

@@ -15,6 +15,7 @@ from uuid import uuid4
 from typing import Any, Dict, Generator, Iterator, Optional, Union, cast
 from flask import Blueprint, Response, current_app as app, request, stream_with_context
+import orjson
 from pydantic import ValidationError
 from vellum_ee.workflows.display.nodes.get_node_display_class import get_node_display_class
 from vellum_ee.workflows.display.types import WorkflowDisplayContext
@@ -115,7 +116,7 @@ def stream_workflow_route() -> Response:
             for row in workflow_events:
                 yield "\n"
                 if isinstance(row, dict):
-                    dump = json.dumps(row)
+                    dump = orjson.dumps(row).decode("utf-8")
                     yield dump
                 else:
                     yield row
@@ -500,11 +501,11 @@ def stream_node_route() -> Response:
                 break
     def generator() -> Generator[str, None, None]:
-        yield json.dumps(vembda_initiated_event.model_dump(mode="json"))
+        yield orjson.dumps(vembda_initiated_event.model_dump(mode="json")).decode("utf-8")
         for row in node_events():
             yield "\n"
-            yield json.dumps(row)
+            yield orjson.dumps(row).decode("utf-8")
     headers = {
         "X-Vellum-SDK-Version": vembda_initiated_event.body.sdk_version,

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.11.0.post1}/src/workflow_server/code_exec_runner.py RENAMED Viewed

@@ -1,11 +1,12 @@
 from datetime import datetime
-import json
 import logging
 import os
 from threading import Event as ThreadingEvent
 from uuid import uuid4
 from typing import Optional
+import orjson
 from workflow_server.core.events import VembdaExecutionInitiatedBody, VembdaExecutionInitiatedEvent
 from workflow_server.core.executor import stream_workflow
 from workflow_server.core.utils import serialize_vembda_rejected_event
@@ -29,7 +30,7 @@ def run_code_exec_stream() -> None:
         split_input = input_raw.split("\n--vellum-input-stop--\n")
         input_json = split_input[0]
-        input_data = json.loads(input_json)
+        input_data = orjson.loads(input_json)
         context = WorkflowExecutorContext.model_validate(input_data)
         print("--vellum-output-start--")  # noqa: T201
@@ -53,7 +54,7 @@ def run_code_exec_stream() -> None:
             cancel_signal=ThreadingEvent(),
         )
         for line in stream_iterator:
-            print(f"{_EVENT_LINE}{json.dumps(line)}")  # noqa: T201
+            print(f"{_EVENT_LINE}{orjson.dumps(line).decode('utf-8')}")  # noqa: T201
     except Exception as e:
         logger.exception(e)

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.11.0.post1}/src/workflow_server/core/executor.py RENAMED Viewed

@@ -1,6 +1,5 @@
 from datetime import datetime, timezone
 from io import StringIO
-import json
 import logging
 from multiprocessing import Process, Queue
 import os
@@ -11,8 +10,9 @@ from threading import Event as ThreadingEvent
 import time
 from traceback import format_exc
 from uuid import UUID, uuid4
-from typing import Any, Callable, Generator, Iterator, Optional, Tuple, Type
+from typing import Any, Callable, Generator, Iterator, Optional, Tuple
+import orjson
 from vellum_ee.workflows.display.utils.events import event_enricher
 from vellum_ee.workflows.server.virtual_file_loader import VirtualFileFinder
@@ -104,7 +104,7 @@ def _stream_workflow_wrapper(
         span_id_emitted = True
         for event in stream_iterator:
-            queue.put(json.dumps(event))
+            queue.put(orjson.dumps(event).decode("utf-8"))
     except Exception as e:
         if not span_id_emitted:
@@ -273,32 +273,11 @@ def stream_node(
     disable_redirect: bool = True,
 ) -> Iterator[dict]:
     workflow, namespace = _create_workflow(executor_context)
-    Node: Optional[Type[BaseNode]] = None
-    for workflow_node in workflow.get_nodes():
-        if executor_context.node_id and workflow_node.__id__ == executor_context.node_id:
-            Node = workflow_node
-            break
-        elif (
-            executor_context.node_module
-            and executor_context.node_name
-            and workflow_node.__name__ == executor_context.node_name
-            and workflow_node.__module__ == f"{namespace}.{executor_context.node_module}"
-        ):
-            Node = workflow_node
-            break
-    if not Node:
-        identifier = executor_context.node_id or f"{executor_context.node_module}.{executor_context.node_name}"
-        raise WorkflowInitializationException(
-            message=f"Node '{identifier}' not found in workflow",
-            workflow_definition=workflow.__class__,
-        )
     def call_node() -> Generator[dict[str, Any], Any, None]:
         executor_context.stream_start_time = time.time_ns()
-        for event in workflow.run_node(Node, inputs=executor_context.inputs):  # type: ignore[arg-type]
+        for event in workflow.run_node(executor_context.node_ref, inputs=executor_context.inputs):
             yield event.model_dump(mode="json")
     return _call_stream(

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.11.0.post1}/src/workflow_server/core/utils.py RENAMED Viewed

@@ -2,6 +2,7 @@ from datetime import datetime
 from uuid import uuid4
 from typing import Optional
+from workflow_server.config import IS_ASYNC_MODE
 from workflow_server.core.events import VembdaExecutionFulfilledBody, VembdaExecutionFulfilledEvent
 from workflow_server.core.workflow_executor_context import BaseExecutorContext
@@ -46,6 +47,9 @@ def serialize_vembda_rejected_event(
 def is_events_emitting_enabled(executor_context: Optional[BaseExecutorContext]) -> bool:
+    if IS_ASYNC_MODE:
+        return True
     if not executor_context:
         return False

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.11.0.post1}/src/workflow_server/core/workflow_executor_context.py RENAMED Viewed

@@ -3,7 +3,7 @@ from functools import cached_property
 import os
 import time
 from uuid import UUID
-from typing import Any, Optional
+from typing import Any, Optional, Union
 from typing_extensions import Self
 from flask import has_request_context, request
@@ -91,6 +91,18 @@ class NodeExecutorContext(BaseExecutorContext):
     node_module: Optional[str] = None
     node_name: Optional[str] = None
+    @property
+    def node_ref(self) -> Union[UUID, str]:
+        """
+        Returns the node reference for use with workflow.run_node().
+        Returns node_id if it exists, otherwise returns the combination
+        of node_module and node_name as a fully qualified string.
+        """
+        if self.node_id:
+            return self.node_id
+        return f"{self.node_module}.{self.node_name}"
     @model_validator(mode="after")
     def validate_node_identification(self) -> Self:
         if not self.node_id and not (self.node_module and self.node_name):

{vellum_workflow_server-1.9.7.post1 → vellum_workflow_server-1.11.0.post1}/src/workflow_server/start.py RENAMED Viewed

@@ -64,8 +64,8 @@ def start() -> None:
         "workers": int(os.getenv("GUNICORN_WORKERS", 2)),
         "threads": int(os.getenv("GUNICORN_THREADS", 9 if ENABLE_PROCESS_WRAPPER else 6)),
         # Aggressively try to avoid memory leaks when using non process mode
-        "max_requests": 120 if ENABLE_PROCESS_WRAPPER else 20,
-        "max_requests_jitter": 30 if ENABLE_PROCESS_WRAPPER else 10,
+        "max_requests": int(os.getenv("GUNICORN_MAX_REQUESTS", 120 if ENABLE_PROCESS_WRAPPER else 20)),
+        "max_requests_jitter": int(os.getenv("GUNICORN_MAX_REQUESTS_JITTER", 30 if ENABLE_PROCESS_WRAPPER else 10)),
         "worker_class": "gthread",
         "timeout": max_workflow_runtime_seconds,
         "logger_class": CustomGunicornLogger,