PyPI - vellum-workflow-server - Versions diffs - 0.14.70.post130__py3-none-any.whl → 0.14.70.post131__py3-none-any.whl - Mend

vellum-workflow-server 0.14.70.post130py3-none-any.whl → 0.14.70.post131py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vellum-workflow-server might be problematic. Click here for more details.

Files changed (10) hide show

{vellum_workflow_server-0.14.70.post130.dist-info → vellum_workflow_server-0.14.70.post131.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vellum-workflow-server
-Version: 0.14.70.post130
+Version: 0.14.70.post131
 Summary:
 License: AGPL
 Requires-Python: >=3.9.0,<4

{vellum_workflow_server-0.14.70.post130.dist-info → vellum_workflow_server-0.14.70.post131.dist-info}/RECORD RENAMED Viewed

@@ -5,10 +5,10 @@ workflow_server/api/healthz_view.py,sha256=itiRvBDBXncrw8Kbbc73UZLwqMAhgHOR3uSre
 workflow_server/api/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 workflow_server/api/tests/test_input_display_mapping.py,sha256=drBZqMudFyB5wgiUOcMgRXz7E7ge-Qgxbstw4E4f0zE,2211
 workflow_server/api/tests/test_workflow_view.py,sha256=wlVFBmKcoI-RdzfGPioeW46k6zaXyUeIerPc6m4aQls,7150
-workflow_server/api/tests/test_workflow_view_stream_workflow_route.py,sha256=5k2caHDaR2pBk-o8JPffnO38VLMB1f1dsSfvztlU_3Q,22826
-workflow_server/api/workflow_view.py,sha256=duiMnAZ7PRpoPz63s9z37pxUxGR-9yAi3qxG9APXCao,14244
+workflow_server/api/tests/test_workflow_view_stream_workflow_route.py,sha256=2gro4GD3FBuaA8T2-0oQxOOXh6zTf6hwxKb9CGU3x8g,24813
+workflow_server/api/workflow_view.py,sha256=zuc2wm3y_F3zIcyP2HXsJKiaGpE2YvQZNm-ea6rZSeE,16205
 workflow_server/code_exec_runner.py,sha256=tfijklTVkX4y45jeFTfrY2hVhdwo0VrLFc3SMeIiVYs,3096
-workflow_server/config.py,sha256=Jk1kmncI7g2LTIujssIpncD_eQoIowL_5oARQ6XpkJ0,1281
+workflow_server/config.py,sha256=K5Tavm7wiqCZt0RWWue7zzb8N6e8aWnFOTNlBqEJPcI,1330
 workflow_server/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 workflow_server/core/cancel_workflow.py,sha256=Ffkc3mzmrdMEUcD-sHfEhX4IwVrka-E--SxKA1dUfIU,2185
 workflow_server/core/events.py,sha256=iscGJv8bS7WGEYR-ODnALIANuHpwOs2TdKzqDPrCOh0,1370
@@ -19,12 +19,14 @@ workflow_server/start.py,sha256=DgtQhuCLc07BIWyJPLPZKZsQ8jwEFsvvfIo7MdwVrpw,1998
 workflow_server/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 workflow_server/utils/exit_handler.py,sha256=_FacDVi4zc3bfTA3D2mJsISePlJ8jpLrnGVo5-xZQFs,743
 workflow_server/utils/log_proxy.py,sha256=nugi6fOgAYKX2X9DIc39TG366rsmmDUPoEtG3gzma_Y,3088
-workflow_server/utils/oom_killer.py,sha256=8WB0nQWjmnjW9QzvNNwfYoBFB3yDHM3_OmnryeC8G3A,3657
+workflow_server/utils/oom_killer.py,sha256=4Sag_iRQWqbp62iIBn6nKP-pxUHguOF93DdVXZTtJDk,2809
 workflow_server/utils/sentry.py,sha256=Pr3xKvHdk0XFSpXgy-55bWI4J3bbf_36gjDyLOs7oVU,855
+workflow_server/utils/system_utils.py,sha256=fTzbdpmZ-0bXiNBLYYQdNJWtFAItZgIH8cLJdoXDuQQ,2114
 workflow_server/utils/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+workflow_server/utils/tests/test_system_utils.py,sha256=MdBxI9gxUOpR_JBAHpEz6dGFY6JjxhMSM2oExpqFvNA,4314
 workflow_server/utils/tests/test_utils.py,sha256=qwK5Rmy3RQyjtlUrYAuGuDlBeRzZKsf1yS-y2IpUizQ,6452
 workflow_server/utils/utils.py,sha256=Wqqn-1l2ugkGgy5paWWdt0AVxAyPMQCYcnRSSOMjXlA,4355
-vellum_workflow_server-0.14.70.post130.dist-info/METADATA,sha256=AvtHdGxUrP1n6e6wmVntOutz63X7FSKgIJvcAASmc1k,2245
-vellum_workflow_server-0.14.70.post130.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-vellum_workflow_server-0.14.70.post130.dist-info/entry_points.txt,sha256=uB_0yPkr7YV6RhEXzvFReUM8P4OQBlVXD6TN6eb9-oc,277
-vellum_workflow_server-0.14.70.post130.dist-info/RECORD,,
+vellum_workflow_server-0.14.70.post131.dist-info/METADATA,sha256=X61wAnql7REkFKkXFcOA24ElsigAS9MQE5JMAA3TgVY,2245
+vellum_workflow_server-0.14.70.post131.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+vellum_workflow_server-0.14.70.post131.dist-info/entry_points.txt,sha256=uB_0yPkr7YV6RhEXzvFReUM8P4OQBlVXD6TN6eb9-oc,277
+vellum_workflow_server-0.14.70.post131.dist-info/RECORD,,

workflow_server/api/tests/test_workflow_view_stream_workflow_route.py CHANGED Viewed

@@ -745,6 +745,74 @@ class EndNodeDisplay(BaseNodeDisplay[EndNode]):
     assert events[2]["body"]["inputs"] == {"fruit": "cherry"}
+@mock.patch("workflow_server.api.workflow_view.wait_for_available_process")
+def test_stream_workflow_route__concurrent_request_rate_exceeded(mock_wait_for_available_process):
+    # GIVEN a valid request body
+    span_id = uuid4()
+    request_body = {
+        "execution_id": str(span_id),
+        "inputs": [],
+        "workspace_api_key": "test",
+        "environment_api_key": "test",
+        "module": "workflow",
+        "timeout": 360,
+        "files": {
+            "__init__.py": "",
+            "workflow.py": """\
+from vellum.workflows import BaseWorkflow
+class Workflow(BaseWorkflow):
+    class Outputs(BaseWorkflow.Outputs):
+        foo = "hello"
+""",
+        },
+    }
+    # AND wait_for_available_process returns False
+    mock_wait_for_available_process.return_value = False
+    # WHEN we call the stream route
+    status_code, events = flask_stream(request_body)
+    # THEN we get a 200 response
+    assert status_code == 200, events
+    # THEN we get the expected events
+    assert events[0] == {
+        "id": mock.ANY,
+        "trace_id": mock.ANY,
+        "span_id": str(span_id),
+        "timestamp": mock.ANY,
+        "api_version": "2024-10-25",
+        "parent": None,
+        "name": "vembda.execution.initiated",
+        "body": {
+            "sdk_version": version("vellum-ai"),
+            "server_version": "local",
+        },
+    }
+    # AND we get a vembda.execution.fulfilled event with error
+    assert events[1] == {
+        "id": mock.ANY,
+        "trace_id": events[0]["trace_id"],
+        "span_id": str(span_id),
+        "timestamp": mock.ANY,
+        "api_version": "2024-10-25",
+        "parent": None,
+        "name": "vembda.execution.fulfilled",
+        "body": {
+            "log": "",
+            "exit_code": -1,
+            "stderr": "Workflow server concurrent request rate exceeded. Process count: 0",
+            "container_overhead_latency": mock.ANY,
+            "timed_out": False,
+        },
+    }
+    assert len(events) == 2
 def test_stream_workflow_route__with_environment_variables(both_stream_types):
     # GIVEN a valid request body with environment variables
     span_id = uuid4()

workflow_server/api/workflow_view.py CHANGED Viewed

@@ -32,7 +32,12 @@ from workflow_server.core.workflow_executor_context import (
     NodeExecutorContext,
     WorkflowExecutorContext,
 )
-from workflow_server.utils.oom_killer import get_active_process_count, get_is_oom_killed, increment_process_count
+from workflow_server.utils.oom_killer import get_is_oom_killed
+from workflow_server.utils.system_utils import (
+    get_active_process_count,
+    increment_process_count,
+    wait_for_available_process,
+)
 from workflow_server.utils.utils import convert_json_inputs_to_vellum, get_version
 bp = Blueprint("exec", __name__)
@@ -61,7 +66,8 @@ def stream_workflow_route() -> Response:
         )
     logger.info(
-        f"Starting workflow stream, execution ID: {context.execution_id}, process count: {get_active_process_count()}"
+        f"Starting workflow stream, execution ID: {context.execution_id}, "
+        f"process count: {get_active_process_count()}"
     )
     # Create this event up here so timestamps are fully from the start to account for any unknown overhead
@@ -76,6 +82,26 @@ def stream_workflow_route() -> Response:
     process_output_queue: Queue[dict] = Queue()
+    # We can exceed the concurrency count currently with long running workflows due to a knative issue. So here
+    # if we detect a memory problem just exit us early
+    if not wait_for_available_process():
+        return Response(
+            stream_with_context(
+                startup_error_generator(
+                    context=context,
+                    message=f"Workflow server concurrent request rate exceeded. "
+                    f"Process count: {get_active_process_count()}",
+                    vembda_initiated_event=vembda_initiated_event,
+                )
+            ),
+            status=200,
+            content_type='application/x-ndjson"',
+            headers={
+                "X-Vellum-SDK-Version": vembda_initiated_event.body.sdk_version,
+                "X-Vellum-Server-Version": vembda_initiated_event.body.server_version,
+            },
+        )
     try:
         process = stream_workflow_process_timeout(
             executor_context=context,
@@ -388,3 +414,32 @@ def get_node_request_context(data: dict) -> NodeExecutorContext:
     }
     return NodeExecutorContext.model_validate(context_data)
+def startup_error_generator(
+    vembda_initiated_event: VembdaExecutionInitiatedEvent, message: str, context: WorkflowExecutorContext
+) -> Generator[str, None, None]:
+    try:
+        yield "\n"
+        yield vembda_initiated_event.model_dump_json()
+        yield "\n"
+        yield VembdaExecutionFulfilledEvent(
+            id=uuid4(),
+            timestamp=datetime.now(),
+            trace_id=context.trace_id,
+            span_id=context.execution_id,
+            body=VembdaExecutionFulfilledBody(
+                exit_code=-1,
+                container_overhead_latency=context.container_overhead_latency,
+                stderr=message,
+            ),
+            parent=None,
+        ).model_dump_json()
+        yield "\n"
+        yield "END"
+        yield "\n"
+        logger.error("Workflow stream could not start from resource constraints")
+    except GeneratorExit:
+        app.logger.error("Client disconnected in the middle of the stream")
+        return

workflow_server/config.py CHANGED Viewed

@@ -27,6 +27,7 @@ MEMORY_LIMIT_MB = int(os.getenv("MEMORY_LIMIT_MB", "2048"))
 PORT = os.getenv("PORT", "8000")
 VELLUM_API_URL_HOST = os.getenv("VELLUM_API_URL_HOST", "localhost")
 VELLUM_API_URL_PORT = os.getenv("VELLUM_API_URL_PORT", 8000)
+CONCURRENCY = int(os.getenv("CONCURRENCY", "8"))
 def is_development() -> bool:

workflow_server/utils/oom_killer.py CHANGED Viewed

@@ -11,37 +11,22 @@ from time import sleep
 from workflow_server.config import MEMORY_LIMIT_MB
 from workflow_server.utils.exit_handler import process_killed_switch
+from workflow_server.utils.system_utils import (
+    FORCE_GC_MEMORY_PERCENT,
+    WARN_MEMORY_PERCENT,
+    get_active_process_count,
+    get_memory_in_use_mb,
+)
 logger = logging.getLogger(__name__)
 _oom_killed_switch = multiprocessing.Event()
 _MAX_MEMORY_PERCENT = 0.97
-_WARN_MEMORY_PERCENT = 0.90
-_FORCE_GC_MEMORY_PERCENT = 0.75
 _FORCE_COLLECT_MEMORY_PERCENT = 0.90
-_ACTIVE_PROCESS_COUNT = multiprocessing.Value("i", 0)
-_ACTIVE_PROCESS_LOCK = multiprocessing.Lock()
 _KILL_GRACE_PERIOD = 5
-def increment_process_count(change: int) -> None:
-    result = _ACTIVE_PROCESS_LOCK.acquire(timeout=5)
-    try:
-        if result:
-            global _ACTIVE_PROCESS_COUNT
-            _ACTIVE_PROCESS_COUNT.value += change  # type: ignore
-        else:
-            logger.error("Failed to lock workflow server process count global.")
-    finally:
-        if result:
-            _ACTIVE_PROCESS_LOCK.release()
-def get_active_process_count() -> int:
-    return _ACTIVE_PROCESS_COUNT.value  # type: ignore
 def start_oom_killer_worker() -> None:
     logger.info("Starting oom killer watcher...")
     OomKillerThread(kill_switch=_oom_killed_switch).start()
@@ -79,19 +64,11 @@ class OomKillerThread(Thread):
             if process_killed_switch.is_set():
                 exit(1)
             sleep(1)
-            try:
-                with open("/sys/fs/cgroup/memory/memory.usage_in_bytes", "r") as file:
-                    memory_bytes = file.read()
-            except Exception:
-                logger.error("Unable to get current memory.")
-                return
-            if not memory_bytes:
-                logger.error("Unable to get current memory.")
+            memory_mb = get_memory_in_use_mb()
+            if not memory_mb:
                 return
-            memory_mb = int(memory_bytes) / 1024 / 1024
             if memory_mb > (MEMORY_LIMIT_MB * _MAX_MEMORY_PERCENT):
                 self._kill_switch.set()
                 logger.error(
@@ -103,13 +80,13 @@ class OomKillerThread(Thread):
                 os.kill(pid, signal.SIGKILL)
                 sys.exit(1)
-            if memory_mb > (MEMORY_LIMIT_MB * _WARN_MEMORY_PERCENT):
+            if memory_mb > (MEMORY_LIMIT_MB * WARN_MEMORY_PERCENT):
                 logger.warning(
                     f"Memory usage exceeded 90% of limit, memory: {memory_mb}MB, "
                     f"Process Count: {get_active_process_count()}"
                 )
-            if memory_mb > (MEMORY_LIMIT_MB * _FORCE_GC_MEMORY_PERCENT):
+            if memory_mb > (MEMORY_LIMIT_MB * FORCE_GC_MEMORY_PERCENT):
                 if time.time() - last_gc >= 20:
                     logger.info("Forcing garbage collect from memory pressure")
                     gc.collect()

workflow_server/utils/system_utils.py ADDED Viewed

@@ -0,0 +1,74 @@
+import logging
+import math
+import multiprocessing
+import time
+from typing import Optional
+from workflow_server.config import CONCURRENCY, MEMORY_LIMIT_MB
+logger = logging.getLogger(__name__)
+WARN_MEMORY_PERCENT = 0.90
+FORCE_GC_MEMORY_PERCENT = 0.75
+_MAX_PROCESS_COUNT = math.ceil(CONCURRENCY * 1.7)
+_MEMORY_CHECK_INTERVAL_SECONDS = 3
+_MAX_MEMORY_CHECK_ATTEMPTS = 5
+_ACTIVE_PROCESS_COUNT = multiprocessing.Value("i", 0)
+_ACTIVE_PROCESS_LOCK = multiprocessing.Lock()
+def increment_process_count(change: int) -> None:
+    result = _ACTIVE_PROCESS_LOCK.acquire(timeout=5)
+    try:
+        if result:
+            global _ACTIVE_PROCESS_COUNT
+            _ACTIVE_PROCESS_COUNT.value += change  # type: ignore
+        else:
+            logger.error("Failed to lock workflow server process count global.")
+    finally:
+        if result:
+            _ACTIVE_PROCESS_LOCK.release()
+def get_active_process_count() -> int:
+    return _ACTIVE_PROCESS_COUNT.value  # type: ignore
+def get_memory_in_use_mb() -> Optional[float]:
+    try:
+        with open("/sys/fs/cgroup/memory/memory.usage_in_bytes", "r") as file:
+            memory_bytes = file.read()
+    except Exception:
+        logger.error("Unable to get current memory.")
+        return None
+    if not memory_bytes:
+        logger.error("Unable to get current memory.")
+        return None
+    return int(memory_bytes) / 1024 / 1024
+def wait_for_available_process() -> bool:
+    memory_loops = 0
+    process_available = False
+    while memory_loops < _MAX_MEMORY_CHECK_ATTEMPTS:
+        memory_mb = get_memory_in_use_mb()
+        exceeded_warn_limit = memory_mb and memory_mb > (MEMORY_LIMIT_MB * WARN_MEMORY_PERCENT)
+        exceeded_process_limit = (
+            get_active_process_count() > _MAX_PROCESS_COUNT
+            and memory_mb
+            and memory_mb > (MEMORY_LIMIT_MB * FORCE_GC_MEMORY_PERCENT)
+        )
+        if not exceeded_process_limit and not exceeded_warn_limit:
+            process_available = True
+            break
+        memory_loops += 1
+        time.sleep(_MEMORY_CHECK_INTERVAL_SECONDS)
+    return process_available

workflow_server/utils/tests/test_system_utils.py ADDED Viewed

@@ -0,0 +1,114 @@
+from unittest.mock import mock_open, patch
+from workflow_server.config import MEMORY_LIMIT_MB
+from workflow_server.utils.system_utils import (
+    FORCE_GC_MEMORY_PERCENT,
+    WARN_MEMORY_PERCENT,
+    get_memory_in_use_mb,
+    wait_for_available_process,
+)
+def test_get_memory_in_use_mb_success():
+    # Test with 1GB of memory (1024MB)
+    test_memory_bytes = "1073741824"
+    with patch("builtins.open", mock_open(read_data=test_memory_bytes)):
+        result = get_memory_in_use_mb()
+        assert result == 1024.0
+def test_get_memory_in_use_mb_empty_file():
+    with patch("builtins.open", mock_open(read_data="")):
+        result = get_memory_in_use_mb()
+        assert result is None
+def test_get_memory_in_use_mb_file_not_found():
+    with patch("builtins.open", side_effect=FileNotFoundError()):
+        result = get_memory_in_use_mb()
+        assert result is None
+def test_get_memory_in_use_mb_zero_memory():
+    with patch("builtins.open", mock_open(read_data="0")):
+        result = get_memory_in_use_mb()
+        assert result == 0.0
+@patch("workflow_server.utils.system_utils.time.sleep")
+@patch("workflow_server.utils.system_utils.get_memory_in_use_mb")
+@patch("workflow_server.utils.system_utils.get_active_process_count")
+def test_wait_for_available_process_immediate_availability(mock_get_active_process_count, mock_get_memory, mock_sleep):
+    # Mock memory usage below warning limit and process limit below
+    mock_get_memory.return_value = MEMORY_LIMIT_MB * (WARN_MEMORY_PERCENT - 0.1)
+    mock_get_active_process_count.return_value = 10
+    result = wait_for_available_process()
+    assert result is True
+    # Should not sleep if immediately available
+    mock_sleep.assert_not_called()
+@patch("workflow_server.utils.system_utils.time.sleep")
+@patch("workflow_server.utils.system_utils.get_memory_in_use_mb")
+@patch("workflow_server.utils.system_utils.get_active_process_count")
+def test_wait_for_available_process_becomes_available(mock_get_active_process_count, mock_get_memory, mock_sleep):
+    # First two calls indicate high memory usage, third call shows available memory
+    mock_get_memory.side_effect = [
+        MEMORY_LIMIT_MB * (WARN_MEMORY_PERCENT + 0.1),
+        MEMORY_LIMIT_MB * (WARN_MEMORY_PERCENT + 0.1),
+        MEMORY_LIMIT_MB * (WARN_MEMORY_PERCENT - 0.1),
+    ]
+    mock_get_active_process_count.return_value = 10
+    result = wait_for_available_process()
+    assert result is True
+    # Should sleep twice before becoming available
+    assert mock_sleep.call_count == 2
+@patch("workflow_server.utils.system_utils.time.sleep")
+@patch("workflow_server.utils.system_utils.get_memory_in_use_mb")
+@patch("workflow_server.utils.system_utils.get_active_process_count")
+def test_wait_for_available_process_never_available(mock_get_active_process_count, mock_get_memory, mock_sleep):
+    # Return false if process isn't available from high memory usage
+    mock_get_memory.return_value = MEMORY_LIMIT_MB * (WARN_MEMORY_PERCENT + 0.1)
+    mock_get_active_process_count.return_value = 13
+    result = wait_for_available_process()
+    assert result is False
+    # Should sleep for each attempt
+    assert mock_sleep.call_count == 5
+@patch("workflow_server.utils.system_utils.time.sleep")
+@patch("workflow_server.utils.system_utils.get_memory_in_use_mb")
+@patch("workflow_server.utils.system_utils.get_active_process_count")
+def test_wait_for_available_process_memory_none(mock_get_active_process_count, mock_get_memory, mock_sleep):
+    # Test when memory reading fails that result is still true
+    mock_get_memory.return_value = None
+    mock_get_active_process_count.return_value = 10
+    result = wait_for_available_process()
+    assert result is True
+@patch("workflow_server.utils.system_utils.time.sleep")
+@patch("workflow_server.utils.system_utils.get_memory_in_use_mb")
+@patch("workflow_server.utils.system_utils.get_active_process_count")
+def test_wait_for_available_process_high_process_count_but_low_memory(
+    mock_get_active_process_count, mock_get_memory, mock_sleep
+):
+    # Test when process count is high but memory is low
+    mock_get_memory.return_value = MEMORY_LIMIT_MB * (FORCE_GC_MEMORY_PERCENT - 0.1)
+    mock_get_active_process_count.return_value = 13
+    result = wait_for_available_process()
+    assert result is True

{vellum_workflow_server-0.14.70.post130.dist-info → vellum_workflow_server-0.14.70.post131.dist-info}/WHEEL RENAMED Viewed

File without changes

{vellum_workflow_server-0.14.70.post130.dist-info → vellum_workflow_server-0.14.70.post131.dist-info}/entry_points.txt RENAMED Viewed

File without changes

vellum-workflow-server 0.14.70.post130__py3-none-any.whl → 0.14.70.post131__py3-none-any.whl

Potentially problematic release.

vellum-workflow-server 0.14.70.post130py3-none-any.whl → 0.14.70.post131py3-none-any.whl