PyPI - vellum-workflow-server - Versions diffs - 1.8.6.post4__py3-none-any.whl → 1.10.0__py3-none-any.whl - Mend

vellum-workflow-server 1.8.6.post4py3-none-any.whl → 1.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{vellum_workflow_server-1.8.6.post4.dist-info → vellum_workflow_server-1.10.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vellum-workflow-server
-Version: 1.8.6.post4
+Version: 1.10.0
 Summary:
 License: AGPL
 Requires-Python: >=3.9.0,<4
@@ -29,7 +29,7 @@ Requires-Dist: pyjwt (==2.10.0)
 Requires-Dist: python-dotenv (==1.0.1)
 Requires-Dist: retrying (==1.3.4)
 Requires-Dist: sentry-sdk[flask] (==2.20.0)
-Requires-Dist: vellum-ai (==1.8.6)
+Requires-Dist: vellum-ai (==1.10.0)
 Description-Content-Type: text/markdown
 # Vellum Workflow Runner Server

{vellum_workflow_server-1.8.6.post4.dist-info → vellum_workflow_server-1.10.0.dist-info}/RECORD RENAMED Viewed

@@ -5,22 +5,22 @@ workflow_server/api/healthz_view.py,sha256=itiRvBDBXncrw8Kbbc73UZLwqMAhgHOR3uSre
 workflow_server/api/status_view.py,sha256=Jah8dBAVL4uOcRfsjKAOyfVONFyk9HQjXeRfjcIqhmA,514
 workflow_server/api/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 workflow_server/api/tests/test_input_display_mapping.py,sha256=drBZqMudFyB5wgiUOcMgRXz7E7ge-Qgxbstw4E4f0zE,2211
-workflow_server/api/tests/test_workflow_view.py,sha256=81kAHpijNp0rvb3ZjvceB5uFEriVWPeWHnK78-xoeTc,32343
-workflow_server/api/tests/test_workflow_view_stream_workflow_route.py,sha256=WFgQYAySbFx5TpT-vB3qGlU8jP8gTo2pTPuuc5wz6RM,39664
-workflow_server/api/workflow_view.py,sha256=pJRUpAE83KXz0QvokORSmX4jDtniNQmlc_CkrQmHhxo,20753
+workflow_server/api/tests/test_workflow_view.py,sha256=B6B8mCirt3FvpPKRP_AyzPJ199k_gwLzAcQuWRkzEfA,32343
+workflow_server/api/tests/test_workflow_view_stream_workflow_route.py,sha256=Yrp_DlLbbwZJe5WRLwdlFT17R8CQoCK9-jlQ1jUT_eM,40377
+workflow_server/api/workflow_view.py,sha256=RiRO0Z_gCIbdcG9XX_PcB9j8Qx5K_2dXxxtkib6fezY,24601
 workflow_server/code_exec_runner.py,sha256=DLNNrinCRbnkSvlqVvSZ1wv_etI7r_kKAXNPGMj3jBk,2196
 workflow_server/config.py,sha256=I4hfTsjIbHxoSKylPCjKnrysPV0jO5nfRKwpKvEcfAE,2193
 workflow_server/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 workflow_server/core/cancel_workflow.py,sha256=QcEeYUIrxq4pub-z9BlGi5fLI3gVRml-56rMCW7j5Hc,2212
 workflow_server/core/events.py,sha256=24MA66DVQuaLJJcZrS8IL1Zq4Ohi9CoouKZ5VgoH3Cs,1402
-workflow_server/core/executor.py,sha256=K7W_F2lqJxhrdzqzNhKym_k8enJjJucYJQRLsS_sw3Q,17895
-workflow_server/core/utils.py,sha256=si0NB4Suurc-mn8NYdn59xM9CkPrfOP1aWEVrZvifDI,1929
-workflow_server/core/workflow_executor_context.py,sha256=7Vp714LNVx_J5ERbgRHy5pJo_MaXsccIePWEW3IBshw,3234
+workflow_server/core/executor.py,sha256=xbySFdb9KHoqFDfiKMR77fViFVo3XEQ5ER54C1PlS8c,16948
+workflow_server/core/utils.py,sha256=mecVPqQkthrC4mpop3r8J3IWnBmKbDgqfCrSagyzVEg,2021
+workflow_server/core/workflow_executor_context.py,sha256=8faOdpU4cBeIbmOvg9VzD3eS5i_PKcH7tyNGzx_rehg,3899
 workflow_server/logging_config.py,sha256=Hvx1t8uhqMMinl-5qcef7ufUvzs6x14VRnCb7YZxEAg,1206
 workflow_server/server.py,sha256=pBl0OQmrLE-PbTDwTgsVmxgz_Ai3TVhFRaMnr6PX6Yk,1849
-workflow_server/start.py,sha256=xSIobowtSLoZI86bbMkmEw3pqJHQaFdDyNffk4kGYL8,2544
+workflow_server/start.py,sha256=Ams5ycqVbBorC7s6EI95BYzjpxzlo5mQbBnMNOkJS0w,2753
 workflow_server/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-workflow_server/utils/exit_handler.py,sha256=_FacDVi4zc3bfTA3D2mJsISePlJ8jpLrnGVo5-xZQFs,743
+workflow_server/utils/exit_handler.py,sha256=PzRpzmia4Ki33sJTWjsvjD5oLP4_qfS5SZg2uXnyqxE,1767
 workflow_server/utils/log_proxy.py,sha256=nugi6fOgAYKX2X9DIc39TG366rsmmDUPoEtG3gzma_Y,3088
 workflow_server/utils/oom_killer.py,sha256=dzaqSzi0jQ3MvALwwiYIO9r6VWLa5Ln9AY6l11WEexo,3050
 workflow_server/utils/sentry.py,sha256=pmGDoaFhJwUprjP_Vmz6bETitqKQulJ0vwRP-gYb2w4,2145
@@ -30,7 +30,7 @@ workflow_server/utils/tests/test_sentry_integration.py,sha256=14PfuW8AaQNNtqLmBs
 workflow_server/utils/tests/test_system_utils.py,sha256=_4GwXvVvU5BrATxUEWwQIPg0bzQXMWBtiBmjP8MTxJM,4314
 workflow_server/utils/tests/test_utils.py,sha256=0Nq6du8o-iBtTrip9_wgHES53JSiJbVdSXaBnPobw3s,6930
 workflow_server/utils/utils.py,sha256=m7iMJtor5SQLWu7jlJw-X5Q3nmbq69BCxTMv6qnFYrA,4835
-vellum_workflow_server-1.8.6.post4.dist-info/METADATA,sha256=c1qUerOxt_TK40PtyU8uWZ8J7YzeCHRa-Z4Zw4OB2Xo,2273
-vellum_workflow_server-1.8.6.post4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-vellum_workflow_server-1.8.6.post4.dist-info/entry_points.txt,sha256=uB_0yPkr7YV6RhEXzvFReUM8P4OQBlVXD6TN6eb9-oc,277
-vellum_workflow_server-1.8.6.post4.dist-info/RECORD,,
+vellum_workflow_server-1.10.0.dist-info/METADATA,sha256=KLswbGouJhCTrF98d2iVsu_YgsnF8rb4DY_K6t1CWRc,2269
+vellum_workflow_server-1.10.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+vellum_workflow_server-1.10.0.dist-info/entry_points.txt,sha256=uB_0yPkr7YV6RhEXzvFReUM8P4OQBlVXD6TN6eb9-oc,277
+vellum_workflow_server-1.10.0.dist-info/RECORD,,

workflow_server/api/tests/test_workflow_view.py CHANGED Viewed

@@ -63,11 +63,11 @@ class TestNode(BaseNode):
             "comment": {"expanded": True, "value": "A test node for processing data."},
             "position": {"x": 0.0, "y": 0.0},
         },
-        "id": "7a8b251d-f5ca-462a-b293-071d219460fb",
+        "id": "6f4c9178-9f46-4723-bcb7-0bd59db54eca",
         "label": "Test Node",
         "outputs": [],
-        "ports": [{"id": "a3a0eefd-45d0-4f13-8c58-a836a9f7f9ed", "name": "default", "type": "DEFAULT"}],
-        "trigger": {"id": "a022e36c-9852-4772-9be3-3c6c147fd811", "merge_behavior": "AWAIT_ATTRIBUTES"},
+        "ports": [{"id": "4394823f-79a8-4dbc-99ae-06a1df6c7408", "name": "default", "type": "DEFAULT"}],
+        "trigger": {"id": "07240af1-67c6-4460-b53d-53f0b0f1b90e", "merge_behavior": "AWAIT_ATTRIBUTES"},
         "type": "GENERIC",
     }
@@ -127,11 +127,11 @@ class SomeOtherNode(BaseNode):
             "comment": {"expanded": True, "value": "This is Some Node."},
             "position": {"x": 0.0, "y": 0.0},
         },
-        "id": "1e559c2e-db82-41f0-9ceb-5e89b0c5a0a3",
+        "id": "89e84bac-5a5f-4f64-8083-7d3ebec98be1",
         "label": "Some Node",
         "outputs": [],
-        "ports": [{"id": "48e39e97-5fd4-471e-b4f2-51d3baf06456", "name": "default", "type": "DEFAULT"}],
-        "trigger": {"id": "e3381fb7-61fc-4c46-ae8e-51fc463b6a59", "merge_behavior": "AWAIT_ATTRIBUTES"},
+        "ports": [{"id": "2983ea5c-1d29-483a-b896-53098f5de4f1", "name": "default", "type": "DEFAULT"}],
+        "trigger": {"id": "6996efb0-5a20-4719-8835-34fe6552764a", "merge_behavior": "AWAIT_ATTRIBUTES"},
         "type": "GENERIC",
     }
@@ -150,11 +150,11 @@ class SomeOtherNode(BaseNode):
             "comment": {"expanded": True, "value": "This is Some Other Node."},
             "position": {"x": 0.0, "y": 0.0},
         },
-        "id": "7aee541b-b245-4c8a-9137-3e4631d5100c",
+        "id": "3cdbba02-8a34-4e0f-8b94-770a944dcaa3",
         "label": "Some Other Node",
         "outputs": [],
-        "ports": [{"id": "fb66b46a-d970-4bc9-83ea-70c154c57ddd", "name": "default", "type": "DEFAULT"}],
-        "trigger": {"id": "13fa2714-20b3-4bc3-ab79-621a188e3bfa", "merge_behavior": "AWAIT_ATTRIBUTES"},
+        "ports": [{"id": "1839bde5-2ad4-4723-b21b-2c55fa833a7a", "name": "default", "type": "DEFAULT"}],
+        "trigger": {"id": "c36df8a8-5624-45be-99c9-826cf511a951", "merge_behavior": "AWAIT_ATTRIBUTES"},
         "type": "GENERIC",
     }
@@ -222,11 +222,11 @@ class HelperClass:
                 "comment": {"expanded": True, "value": "Processes input data."},
                 "position": {"x": 0.0, "y": 0.0},
             },
-            "id": "f92c09f0-0434-46cb-829d-a73f801d6343",
+            "id": "7121bcb9-98a1-4907-bf9b-9734d773fd15",
             "label": "Processing Node",
             "outputs": [],
-            "ports": [{"id": "abaa2984-b312-4491-b069-e689759f72c8", "name": "default", "type": "DEFAULT"}],
-            "trigger": {"id": "35378c2b-f089-44af-ac37-efe4ea42c817", "merge_behavior": "AWAIT_ATTRIBUTES"},
+            "ports": [{"id": "de27da74-30e9-4e7b-95c2-92bdfc5bf042", "name": "default", "type": "DEFAULT"}],
+            "trigger": {"id": "e02bd85e-8b03-4b21-8b3e-f411042334ce", "merge_behavior": "AWAIT_ATTRIBUTES"},
             "type": "GENERIC",
         }
@@ -240,11 +240,11 @@ class HelperClass:
                 "comment": {"expanded": True, "value": "Transforms data format."},
                 "position": {"x": 0.0, "y": 0.0},
             },
-            "id": "09ca32f7-c8f2-4469-97e5-1f288f85127a",
+            "id": "6a785cb0-f631-4f03-94c6-e82331c14c1a",
             "label": "Transformation Node",
             "outputs": [],
-            "ports": [{"id": "88778117-fbfc-4b44-964b-5a4994aa2f24", "name": "default", "type": "DEFAULT"}],
-            "trigger": {"id": "5d096263-7fbf-490a-83b7-e441852b5fb6", "merge_behavior": "AWAIT_ATTRIBUTES"},
+            "ports": [{"id": "67a13ea0-fd6b-44dc-af46-c72da06aa11f", "name": "default", "type": "DEFAULT"}],
+            "trigger": {"id": "08d4e317-baa8-478f-b278-99362e50e6b4", "merge_behavior": "AWAIT_ATTRIBUTES"},
             "type": "GENERIC",
         }
@@ -306,11 +306,11 @@ class BrokenNode(BaseNode)
             "comment": {"expanded": True, "value": "This is Some Node."},
             "position": {"x": 0.0, "y": 0.0},
         },
-        "id": "1e559c2e-db82-41f0-9ceb-5e89b0c5a0a3",
+        "id": "a2706730-074b-4ea3-968a-25e68af1caed",
         "label": "Some Node",
         "outputs": [],
-        "ports": [{"id": "48e39e97-5fd4-471e-b4f2-51d3baf06456", "name": "default", "type": "DEFAULT"}],
-        "trigger": {"id": "e3381fb7-61fc-4c46-ae8e-51fc463b6a59", "merge_behavior": "AWAIT_ATTRIBUTES"},
+        "ports": [{"id": "e0ee3653-e071-4b91-9dfc-5e1dca9c665b", "name": "default", "type": "DEFAULT"}],
+        "trigger": {"id": "8d931b01-30ca-4c0d-b1b7-7c18379c83e6", "merge_behavior": "AWAIT_ATTRIBUTES"},
         "type": "GENERIC",
     }
@@ -371,12 +371,12 @@ class MyAdditionNode(BaseNode):
         "adornments": None,
         "attributes": [
             {
-                "id": "aed3bcbb-d243-4a77-bb5e-409e9a28e868",
+                "id": "4223b340-447f-46c2-b35d-30ef16c5ae17",
                 "name": "arg1",
                 "value": None,
             },
             {
-                "id": "9225d225-a41b-4642-8964-f28f58dcf4bf",
+                "id": "1de0f46a-95f6-4cd0-bb0f-e2414054d507",
                 "name": "arg2",
                 "value": None,
             },
@@ -387,11 +387,11 @@ class MyAdditionNode(BaseNode):
             "comment": {"expanded": True, "value": "Custom node that performs simple addition."},
             "position": {"x": 0.0, "y": 0.0},
         },
-        "id": "195cd69d-3d2d-41e4-a432-16c433cb8d34",
+        "id": "2464b610-fb6d-495b-b17c-933ee147f19f",
         "label": "My Addition Node",
-        "outputs": [{"id": "3d8e40cb-2aa8-44bd-ae6a-708a9fbc4779", "name": "result", "type": "NUMBER", "value": None}],
-        "ports": [{"id": "9a9e4ef6-febf-4093-a515-217bbb1373db", "name": "default", "type": "DEFAULT"}],
-        "trigger": {"id": "a5298668-d808-4a45-a62e-790943948e8a", "merge_behavior": "AWAIT_ATTRIBUTES"},
+        "outputs": [{"id": "f39d85c9-e7bf-45e1-bb67-f16225db0118", "name": "result", "type": "NUMBER", "value": None}],
+        "ports": [{"id": "bc489295-cd8a-4aa2-88bb-34446374100d", "name": "default", "type": "DEFAULT"}],
+        "trigger": {"id": "ff580cad-73d6-44fe-8f2c-4b8dc990ee70", "merge_behavior": "AWAIT_ATTRIBUTES"},
         "type": "GENERIC",
         "should_file_merge": True,
     }

workflow_server/api/tests/test_workflow_view_stream_workflow_route.py CHANGED Viewed

@@ -5,6 +5,7 @@ import io
 import json
 from queue import Empty
 import re
+import time
 from unittest import mock
 from uuid import uuid4
@@ -133,6 +134,8 @@ class Workflow(BaseWorkflow):
     with mock.patch("builtins.open", mock.mock_open(read_data="104857600")):
         # WHEN we call the stream route
+        ts_ns = time.time_ns()
+        request_body["vembda_service_initiated_timestamp"] = ts_ns
         status_code, events = both_stream_types(request_body)
     # THEN we get a 200 response
@@ -177,6 +180,15 @@ class Workflow(BaseWorkflow):
     assert "is_new_server" in server_metadata
     assert server_metadata["is_new_server"] is False
+    # AND the initiated event should have initiated_latency within a reasonable range
+    assert "initiated_latency" in server_metadata, "initiated_latency should be present in server_metadata"
+    initiated_latency = server_metadata["initiated_latency"]
+    assert isinstance(initiated_latency, int), "initiated_latency should be an integer (nanoseconds)"
+    # Latency should be positive and less than 60 seconds (60_000_000_000 nanoseconds) for CI
+    assert (
+        0 < initiated_latency < 60_000_000_000
+    ), f"initiated_latency should be between 0 and 60 seconds, got {initiated_latency} ns"
     assert events[2]["name"] == "workflow.execution.fulfilled", events[2]
     assert events[2]["body"]["workflow_definition"]["module"] == ["test", "workflow"]

workflow_server/api/workflow_view.py CHANGED Viewed

@@ -8,6 +8,7 @@ import os
 import pkgutil
 from queue import Empty
 import sys
+import threading
 import time
 import traceback
 from uuid import uuid4
@@ -71,19 +72,195 @@ WORKFLOW_INITIATION_TIMEOUT_SECONDS = 60
 @bp.route("/stream", methods=["POST"])
 def stream_workflow_route() -> Response:
     data = request.get_json()
+    try:
+        context = WorkflowExecutorContext.model_validate(data)
+    except ValidationError as e:
+        error_message = e.errors()[0]["msg"]
+        error_location = e.errors()[0]["loc"]
+        return Response(
+            json.dumps({"detail": f"Invalid context: {error_message} at {error_location}"}),
+            status=400,
+            content_type="application/json",
+        )
+    headers = _get_headers(context)
+    # We can exceed the concurrency count currently with long running workflows due to a knative issue. So here
+    # if we detect a memory problem just exit us early
+    if not wait_for_available_process():
+        return Response(
+            json.dumps(
+                {
+                    "detail": f"Workflow server concurrent request rate exceeded. "
+                    f"Process count: {get_active_process_count()}"
+                }
+            ),
+            status=429,
+            content_type="application/json",
+            headers=headers,
+        )
+    start_workflow_state = _start_workflow(context)
+    if isinstance(start_workflow_state, Response):
+        return start_workflow_state
+    workflow_events, vembda_initiated_event, process, span_id, headers = start_workflow_state
+    def generator() -> Generator[str, None, None]:
+        try:
+            yield "\n"
+            yield vembda_initiated_event.model_dump_json()
+            yield "\n"
+            for row in workflow_events:
+                yield "\n"
+                if isinstance(row, dict):
+                    dump = json.dumps(row)
+                    yield dump
+                else:
+                    yield row
+                yield "\n"
+            # Sometimes the connections get hung after they finish with the vembda fulfilled event
+            # if it happens during a knative scale down event. So we emit an END string so that
+            # we don't have to do string compares on all the events for performance.
+            yield "\n"
+            yield "END"
+            yield "\n"
+            logger.info(
+                f"Workflow stream completed, execution ID: {span_id}, process count: {get_active_process_count()}"
+            )
+        except GeneratorExit:
+            # These can happen either from Vembda disconnects (possibily from predict disconnects) or
+            # from knative activator gateway timeouts which are caused by idleTimeout or responseStartSeconds
+            # being exceeded.
+            app.logger.error(
+                "Client disconnected in the middle of the Workflow Stream",
+                extra={
+                    "sentry_tags": {
+                        "server_version": vembda_initiated_event.body.server_version,
+                        "sdk_version": vembda_initiated_event.body.sdk_version,
+                    }
+                },
+            )
+            return
+        except Exception as e:
+            logger.exception("Error during workflow response stream generator", extra={"error": e})
+            yield "\n"
+            yield "END"
+            yield "\n"
+            return
+        finally:
+            if ENABLE_PROCESS_WRAPPER:
+                try:
+                    if process and process.is_alive():
+                        process.kill()
+                    if process:
+                        increment_process_count(-1)
+                        remove_active_span_id(span_id)
+                except Exception as e:
+                    logger.error("Failed to kill process", e)
+            else:
+                increment_process_count(-1)
+                remove_active_span_id(span_id)
+    resp = Response(
+        stream_with_context(generator()),
+        status=200,
+        content_type="application/x-ndjson",
+        headers=headers,
+    )
+    return resp
+@bp.route("/async-exec", methods=["POST"])
+def async_exec_workflow() -> Response:
+    data = request.get_json()
     try:
         context = WorkflowExecutorContext.model_validate(data)
     except ValidationError as e:
         error_message = e.errors()[0]["msg"]
         error_location = e.errors()[0]["loc"]
+        # TODO need to convert this to a vembda event so that trigger'd execs can me notified
+        #  can either do it here in the workflow server or
         return Response(
             json.dumps({"detail": f"Invalid context: {error_message} at {error_location}"}),
             status=400,
             content_type="application/json",
         )
+    # Reject back to the queue handler if were low on memory here, though maybe we should update the is_available
+    # route to look at memory too. Don't send this response as an event. Though we might want some logic to catch
+    # if they have a workflow server that can never start a workflow because the base image uses so much memory.
+    if not wait_for_available_process():
+        return Response(
+            json.dumps({"detail": f"Server resources low." f"Process count: {get_active_process_count()}"}),
+            status=429,
+            content_type="application/json",
+        )
+    def run_workflow_background() -> None:
+        process: Optional[Process] = None
+        span_id: Optional[str] = None
+        try:
+            start_workflow_result = _start_workflow(context)
+            if isinstance(start_workflow_result, Response):
+                # TODO same here, should return this response as en event or it will get yeeted to the nether
+                # return start_workflow_result
+                return
+            workflow_events, vembda_initiated_event, process, span_id, headers = start_workflow_result
+            for _ in workflow_events:
+                # This is way inefficient in process mode since were just having the main proc stream the events
+                # to nowhere wasting memory I/O and cpu.
+                continue
+            logger.info(
+                f"Workflow async exec completed, execution ID: {span_id}, process count: {get_active_process_count()}"
+            )
+        except Exception as e:
+            logger.exception("Error during workflow async background worker", e)
+        finally:
+            if ENABLE_PROCESS_WRAPPER:
+                try:
+                    if process and process.is_alive():
+                        process.kill()
+                    if process:
+                        increment_process_count(-1)
+                        if span_id:
+                            remove_active_span_id(span_id)
+                except Exception as e:
+                    logger.error("Failed to kill process", e)
+            else:
+                increment_process_count(-1)
+                if span_id:
+                    remove_active_span_id(span_id)
+    thread = threading.Thread(target=run_workflow_background)
+    thread.start()
+    return Response(
+        json.dumps({"success": True}),
+        status=200,
+        content_type="application/json",
+    )
+def _start_workflow(
+    context: WorkflowExecutorContext,
+) -> Union[
+    Response,
+    tuple[
+        Iterator[Union[str, dict]],
+        VembdaExecutionInitiatedEvent,
+        Optional[Process],
+        str,
+        dict[str, str],
+    ],
+]:
+    headers = _get_headers(context)
     logger.info(
         f"Starting Workflow Server Request, trace ID: {context.trace_id}, "
         f"process count: {get_active_process_count()}, process wrapper: {ENABLE_PROCESS_WRAPPER}"
@@ -100,29 +277,7 @@ def stream_workflow_route() -> Response:
         parent=None,
     )
-    process_output_queue: Queue[Union[str, dict]] = Queue()
-    headers = {
-        "X-Vellum-SDK-Version": vembda_initiated_event.body.sdk_version,
-        "X-Vellum-Server-Version": vembda_initiated_event.body.server_version,
-        "X-Vellum-Events-Emitted": str(is_events_emitting_enabled(context)),
-    }
-    # We can exceed the concurrency count currently with long running workflows due to a knative issue. So here
-    # if we detect a memory problem just exit us early
-    if not wait_for_available_process():
-        return Response(
-            json.dumps(
-                {
-                    "detail": f"Workflow server concurrent request rate exceeded. "
-                    f"Process count: {get_active_process_count()}"
-                }
-            ),
-            status=429,
-            content_type="application/json",
-            headers=headers,
-        )
+    output_queue: Queue[Union[str, dict]] = Queue()
     cancel_signal = MultiprocessingEvent()
     timeout_signal = MultiprocessingEvent()
@@ -131,7 +286,7 @@ def stream_workflow_route() -> Response:
         try:
             process = stream_workflow_process_timeout(
                 executor_context=context,
-                queue=process_output_queue,
+                queue=output_queue,
                 cancel_signal=cancel_signal,
                 timeout_signal=timeout_signal,
             )
@@ -139,10 +294,10 @@ def stream_workflow_route() -> Response:
         except Exception as e:
             logger.exception(e)
-            process_output_queue.put(create_vembda_rejected_event(context, traceback.format_exc()))
+            output_queue.put(create_vembda_rejected_event(context, traceback.format_exc()))
         try:
-            first_item = process_output_queue.get(timeout=WORKFLOW_INITIATION_TIMEOUT_SECONDS)
+            first_item = output_queue.get(timeout=WORKFLOW_INITIATION_TIMEOUT_SECONDS)
         except Empty:
             logger.error("Request timed out trying to initiate the Workflow")
@@ -291,72 +446,9 @@ def stream_workflow_route() -> Response:
                 break
             yield event
-    workflow_events = process_events(process_output_queue)
+    workflow_events = process_events(output_queue)
-    def generator() -> Generator[str, None, None]:
-        try:
-            yield "\n"
-            yield vembda_initiated_event.model_dump_json()
-            yield "\n"
-            for row in workflow_events:
-                yield "\n"
-                if isinstance(row, dict):
-                    dump = json.dumps(row)
-                    yield dump
-                else:
-                    yield row
-                yield "\n"
-            # Sometimes the connections get hung after they finish with the vembda fulfilled event
-            # if it happens during a knative scale down event. So we emit an END string so that
-            # we don't have to do string compares on all the events for performance.
-            yield "\n"
-            yield "END"
-            yield "\n"
-            logger.info(
-                f"Workflow stream completed, execution ID: {span_id}, process count: {get_active_process_count()}"
-            )
-        except GeneratorExit:
-            # These can happen either from Vembda disconnects (possibily from predict disconnects) or
-            # from knative activator gateway timeouts which are caused by idleTimeout or responseStartSeconds
-            # being exceeded.
-            app.logger.error(
-                "Client disconnected in the middle of the Workflow Stream",
-                extra={
-                    "sentry_tags": {
-                        "server_version": vembda_initiated_event.body.server_version,
-                        "sdk_version": vembda_initiated_event.body.sdk_version,
-                    }
-                },
-            )
-            return
-        except Exception as e:
-            logger.exception("Error during workflow response stream generator", extra={"error": e})
-            yield "\n"
-            yield "END"
-            yield "\n"
-            return
-        finally:
-            if ENABLE_PROCESS_WRAPPER:
-                try:
-                    if process and process.is_alive():
-                        process.kill()
-                    if process:
-                        increment_process_count(-1)
-                        remove_active_span_id(span_id)
-                except Exception as e:
-                    logger.error("Failed to kill process", e)
-            else:
-                increment_process_count(-1)
-                remove_active_span_id(span_id)
-    resp = Response(
-        stream_with_context(generator()),
-        status=200,
-        content_type="application/x-ndjson",
-        headers=headers,
-    )
-    return resp
+    return workflow_events, vembda_initiated_event, process, span_id, headers
 @bp.route("/stream-node", methods=["POST"])
@@ -436,6 +528,7 @@ def serialize_route() -> Response:
     files = data.get("files", {})
     workspace_api_key = data.get("workspace_api_key")
     is_new_server = data.get("is_new_server", False)
+    module = data.get("module")
     if not files:
         return Response(
@@ -448,7 +541,7 @@ def serialize_route() -> Response:
     # Generate a unique namespace for this serialization request
     namespace = get_random_namespace()
-    virtual_finder = VirtualFileFinder(files, namespace)
+    virtual_finder = VirtualFileFinder(files, namespace, source_module=module)
     headers = {
         "X-Vellum-Is-New-Server": str(is_new_server).lower(),
@@ -564,3 +657,12 @@ def startup_error_generator(
             },
         )
         return
+def _get_headers(context: WorkflowExecutorContext) -> dict[str, Union[str, Any]]:
+    headers = {
+        "X-Vellum-SDK-Version": get_version()["sdk_version"],
+        "X-Vellum-Server-Version": get_version()["server_version"],
+        "X-Vellum-Events-Emitted": str(is_events_emitting_enabled(context)),
+    }
+    return headers

workflow_server/core/executor.py CHANGED Viewed

@@ -1,5 +1,4 @@
-from datetime import datetime
-import importlib
+from datetime import datetime, timezone
 from io import StringIO
 import json
 import logging
@@ -12,7 +11,7 @@ from threading import Event as ThreadingEvent
 import time
 from traceback import format_exc
 from uuid import UUID, uuid4
-from typing import Any, Callable, Generator, Iterator, Optional, Tuple, Type
+from typing import Any, Callable, Generator, Iterator, Optional, Tuple
 from vellum_ee.workflows.display.utils.events import event_enricher
 from vellum_ee.workflows.server.virtual_file_loader import VirtualFileFinder
@@ -32,6 +31,7 @@ from vellum.workflows.resolvers.base import BaseWorkflowResolver
 from vellum.workflows.resolvers.resolver import VellumResolver
 from vellum.workflows.state.context import WorkflowContext
 from vellum.workflows.state.store import EmptyStore
+from vellum.workflows.triggers import BaseTrigger
 from vellum.workflows.types import CancelSignal
 from vellum.workflows.workflows.event_filters import workflow_sandbox_event_filter
 from workflow_server.config import LOCAL_DEPLOYMENT, LOCAL_WORKFLOW_MODULE
@@ -150,7 +150,21 @@ def stream_workflow(
     cancel_watcher_kill_switch = ThreadingEvent()
     try:
         workflow, namespace = _create_workflow(executor_context)
-        workflow_inputs = _get_workflow_inputs(executor_context, workflow.__class__)
+        trigger_id = executor_context.trigger_id
+        inputs_or_trigger = workflow.deserialize_trigger(trigger_id=trigger_id, inputs=executor_context.inputs)
+        # Determine whether we have inputs or a trigger
+        if isinstance(inputs_or_trigger, BaseInputs):
+            workflow_inputs = inputs_or_trigger
+            trigger = None
+        elif isinstance(inputs_or_trigger, BaseTrigger):
+            workflow_inputs = None
+            trigger = inputs_or_trigger
+        else:
+            workflow_inputs = None
+            trigger = None
         workflow_state = (
             workflow.deserialize_state(
@@ -176,6 +190,8 @@ def stream_workflow(
             entrypoint_nodes=[executor_context.node_id] if executor_context.node_id else None,
             previous_execution_id=executor_context.previous_execution_id,
             timeout=executor_context.timeout,
+            trigger=trigger,
+            execution_id=executor_context.workflow_span_id,
         )
     except WorkflowInitializationException as e:
         cancel_watcher_kill_switch.set()
@@ -257,32 +273,11 @@ def stream_node(
     disable_redirect: bool = True,
 ) -> Iterator[dict]:
     workflow, namespace = _create_workflow(executor_context)
-    Node: Optional[Type[BaseNode]] = None
-    for workflow_node in workflow.get_nodes():
-        if executor_context.node_id and workflow_node.__id__ == executor_context.node_id:
-            Node = workflow_node
-            break
-        elif (
-            executor_context.node_module
-            and executor_context.node_name
-            and workflow_node.__name__ == executor_context.node_name
-            and workflow_node.__module__ == f"{namespace}.{executor_context.node_module}"
-        ):
-            Node = workflow_node
-            break
-    if not Node:
-        identifier = executor_context.node_id or f"{executor_context.node_module}.{executor_context.node_name}"
-        raise WorkflowInitializationException(
-            message=f"Node '{identifier}' not found in workflow",
-            workflow_definition=workflow.__class__,
-        )
     def call_node() -> Generator[dict[str, Any], Any, None]:
         executor_context.stream_start_time = time.time_ns()
-        for event in workflow.run_node(Node, inputs=executor_context.inputs):  # type: ignore[arg-type]
+        for event in workflow.run_node(executor_context.node_ref, inputs=executor_context.inputs):
             yield event.model_dump(mode="json")
     return _call_stream(
@@ -343,7 +338,9 @@ def _call_stream(
 def _create_workflow(executor_context: BaseExecutorContext) -> Tuple[BaseWorkflow, str]:
     namespace = _get_file_namespace(executor_context)
     if namespace != LOCAL_WORKFLOW_MODULE:
-        sys.meta_path.append(VirtualFileFinder(executor_context.files, namespace))
+        sys.meta_path.append(
+            VirtualFileFinder(executor_context.files, namespace, source_module=executor_context.module)
+        )
     workflow_context = _create_workflow_context(executor_context)
     Workflow = BaseWorkflow.load_from_module(namespace)
@@ -434,6 +431,14 @@ def _enrich_event(event: WorkflowEvent, executor_context: Optional[BaseExecutorC
             if executor_context is not None:
                 metadata["is_new_server"] = executor_context.is_new_server
+                if executor_context.vembda_service_initiated_timestamp is not None and event.timestamp is not None:
+                    event_ts = event.timestamp
+                    if event_ts.tzinfo is None:
+                        event_ts = event_ts.replace(tzinfo=timezone.utc)
+                    event_ts_ns = int(event_ts.timestamp() * 1_000_000_000)
+                    initiated_latency = event_ts_ns - executor_context.vembda_service_initiated_timestamp
+                    metadata["initiated_latency"] = initiated_latency
         elif event.name == "workflow.execution.fulfilled" and is_deployment:
             metadata = {}
             memory_mb = get_memory_in_use_mb()
@@ -473,38 +478,3 @@ def _dump_event(event: BaseEvent, executor_context: BaseExecutorContext) -> dict
         dump["body"]["node_definition"]["module"] = module_base + dump["body"]["node_definition"]["module"][1:]
     return dump
-def _get_workflow_inputs(
-    executor_context: BaseExecutorContext, workflow_class: Type[BaseWorkflow]
-) -> Optional[BaseInputs]:
-    if not executor_context.inputs:
-        return None
-    if not executor_context.files.get("inputs.py"):
-        return None
-    namespace = _get_file_namespace(executor_context)
-    inputs_module_path = f"{namespace}.inputs"
-    try:
-        inputs_module = importlib.import_module(inputs_module_path)
-    except Exception as e:
-        raise WorkflowInitializationException(
-            message=f"Failed to initialize workflow inputs: {e}",
-            workflow_definition=workflow_class,
-        ) from e
-    if not hasattr(inputs_module, "Inputs"):
-        raise WorkflowInitializationException(
-            message=f"Inputs module {inputs_module_path} does not have a required Inputs class",
-            workflow_definition=workflow_class,
-        )
-    if not issubclass(inputs_module.Inputs, BaseInputs):
-        raise WorkflowInitializationException(
-            message=f"""The class {inputs_module_path}.Inputs was expected to be a subclass of BaseInputs, \
-but found {inputs_module.Inputs.__class__.__name__}""",
-            workflow_definition=workflow_class,
-        )
-    return inputs_module.Inputs(**executor_context.inputs)

workflow_server/core/utils.py CHANGED Viewed

@@ -2,6 +2,7 @@ from datetime import datetime
 from uuid import uuid4
 from typing import Optional
+from workflow_server.config import IS_ASYNC_MODE
 from workflow_server.core.events import VembdaExecutionFulfilledBody, VembdaExecutionFulfilledEvent
 from workflow_server.core.workflow_executor_context import BaseExecutorContext
@@ -46,6 +47,9 @@ def serialize_vembda_rejected_event(
 def is_events_emitting_enabled(executor_context: Optional[BaseExecutorContext]) -> bool:
+    if IS_ASYNC_MODE:
+        return True
     if not executor_context:
         return False

workflow_server/core/workflow_executor_context.py CHANGED Viewed

@@ -3,7 +3,7 @@ from functools import cached_property
 import os
 import time
 from uuid import UUID
-from typing import Any, Optional
+from typing import Any, Optional, Union
 from typing_extensions import Self
 from flask import has_request_context, request
@@ -36,6 +36,11 @@ class BaseExecutorContext(UniversalBaseModel):
     previous_execution_id: Optional[UUID] = None
     feature_flags: Optional[dict[str, bool]] = None
     is_new_server: bool = False
+    trigger_id: Optional[UUID] = None
+    # The actual 'execution id' of the workflow that we pass into the workflow
+    # when running in async mode.
+    workflow_span_id: Optional[UUID] = None
+    vembda_service_initiated_timestamp: Optional[int] = None
     @field_validator("inputs", mode="before")
     @classmethod
@@ -86,6 +91,18 @@ class NodeExecutorContext(BaseExecutorContext):
     node_module: Optional[str] = None
     node_name: Optional[str] = None
+    @property
+    def node_ref(self) -> Union[UUID, str]:
+        """
+        Returns the node reference for use with workflow.run_node().
+        Returns node_id if it exists, otherwise returns the combination
+        of node_module and node_name as a fully qualified string.
+        """
+        if self.node_id:
+            return self.node_id
+        return f"{self.node_module}.{self.node_name}"
     @model_validator(mode="after")
     def validate_node_identification(self) -> Self:
         if not self.node_id and not (self.node_module and self.node_name):

workflow_server/start.py CHANGED Viewed

@@ -33,6 +33,7 @@ class CustomGunicornLogger(glogging.Logger):
         logger = logging.getLogger("gunicorn.access")
         logger.addFilter(HealthCheckFilter())
         logger.addFilter(SignalFilter())
+        logger.addFilter(StatusIsAvailableFilter())
 class HealthCheckFilter(logging.Filter):
@@ -45,6 +46,11 @@ class SignalFilter(logging.Filter):
         return "SIGTERM" not in record.getMessage()
+class StatusIsAvailableFilter(logging.Filter):
+    def filter(self, record: Any) -> bool:
+        return "/status/is_available" not in record.getMessage()
 def start() -> None:
     if not is_development():
         start_oom_killer_worker()

workflow_server/utils/exit_handler.py CHANGED Viewed

@@ -1,15 +1,43 @@
+from datetime import datetime
 import logging
 import multiprocessing
 import signal
+from time import sleep
 from typing import Any
+from workflow_server.config import IS_ASYNC_MODE, is_development
+from workflow_server.utils.system_utils import get_active_process_count
 logger = logging.getLogger(__name__)
 process_killed_switch = multiprocessing.Event()
+def _wait_for_workers() -> None:
+    # Would be annoying to have this on for dev since would prevent reload restarts. Also disabling this
+    # for non async mode for now since it shouldn't be needed anyway cus we keep the requests open.
+    if is_development() and not IS_ASYNC_MODE:
+        return
+    start_time = datetime.now()
+    loops = 0
+    while get_active_process_count() > 0:
+        if loops % 30 == 0:
+            logger.info("Waiting for workflow processes to finish...")
+        # TODO needa pass in max workflow time here for VPC
+        if (datetime.now() - start_time).total_seconds() > 1800:
+            logger.warning("Max elapsed time waiting for workflow processes to complete exceeded, shutting down")
+            exit(1)
+        sleep(1)
+        loops += 1
 def gunicorn_exit_handler(_worker: Any) -> None:
+    logger.info("Received gunicorn kill signal")
     process_killed_switch.set()
-    logger.warning("Received gunicorn kill signal")
+    _wait_for_workers()
 def exit_handler(_signal: int, _frame: Any) -> None:
@@ -19,6 +47,7 @@ def exit_handler(_signal: int, _frame: Any) -> None:
     """
     process_killed_switch.set()
     logger.warning("Received kill signal")
+    _wait_for_workers()
     exit(1)

{vellum_workflow_server-1.8.6.post4.dist-info → vellum_workflow_server-1.10.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{vellum_workflow_server-1.8.6.post4.dist-info → vellum_workflow_server-1.10.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

vellum-workflow-server 1.8.6.post4__py3-none-any.whl → 1.10.0__py3-none-any.whl

vellum-workflow-server 1.8.6.post4py3-none-any.whl → 1.10.0py3-none-any.whl