vellum-workflow-server 1.8.6.post4__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vellum-workflow-server
3
- Version: 1.8.6.post4
3
+ Version: 1.10.0
4
4
  Summary:
5
5
  License: AGPL
6
6
  Requires-Python: >=3.9.0,<4
@@ -29,7 +29,7 @@ Requires-Dist: pyjwt (==2.10.0)
29
29
  Requires-Dist: python-dotenv (==1.0.1)
30
30
  Requires-Dist: retrying (==1.3.4)
31
31
  Requires-Dist: sentry-sdk[flask] (==2.20.0)
32
- Requires-Dist: vellum-ai (==1.8.6)
32
+ Requires-Dist: vellum-ai (==1.10.0)
33
33
  Description-Content-Type: text/markdown
34
34
 
35
35
  # Vellum Workflow Runner Server
@@ -5,22 +5,22 @@ workflow_server/api/healthz_view.py,sha256=itiRvBDBXncrw8Kbbc73UZLwqMAhgHOR3uSre
5
5
  workflow_server/api/status_view.py,sha256=Jah8dBAVL4uOcRfsjKAOyfVONFyk9HQjXeRfjcIqhmA,514
6
6
  workflow_server/api/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  workflow_server/api/tests/test_input_display_mapping.py,sha256=drBZqMudFyB5wgiUOcMgRXz7E7ge-Qgxbstw4E4f0zE,2211
8
- workflow_server/api/tests/test_workflow_view.py,sha256=81kAHpijNp0rvb3ZjvceB5uFEriVWPeWHnK78-xoeTc,32343
9
- workflow_server/api/tests/test_workflow_view_stream_workflow_route.py,sha256=WFgQYAySbFx5TpT-vB3qGlU8jP8gTo2pTPuuc5wz6RM,39664
10
- workflow_server/api/workflow_view.py,sha256=pJRUpAE83KXz0QvokORSmX4jDtniNQmlc_CkrQmHhxo,20753
8
+ workflow_server/api/tests/test_workflow_view.py,sha256=B6B8mCirt3FvpPKRP_AyzPJ199k_gwLzAcQuWRkzEfA,32343
9
+ workflow_server/api/tests/test_workflow_view_stream_workflow_route.py,sha256=Yrp_DlLbbwZJe5WRLwdlFT17R8CQoCK9-jlQ1jUT_eM,40377
10
+ workflow_server/api/workflow_view.py,sha256=RiRO0Z_gCIbdcG9XX_PcB9j8Qx5K_2dXxxtkib6fezY,24601
11
11
  workflow_server/code_exec_runner.py,sha256=DLNNrinCRbnkSvlqVvSZ1wv_etI7r_kKAXNPGMj3jBk,2196
12
12
  workflow_server/config.py,sha256=I4hfTsjIbHxoSKylPCjKnrysPV0jO5nfRKwpKvEcfAE,2193
13
13
  workflow_server/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  workflow_server/core/cancel_workflow.py,sha256=QcEeYUIrxq4pub-z9BlGi5fLI3gVRml-56rMCW7j5Hc,2212
15
15
  workflow_server/core/events.py,sha256=24MA66DVQuaLJJcZrS8IL1Zq4Ohi9CoouKZ5VgoH3Cs,1402
16
- workflow_server/core/executor.py,sha256=K7W_F2lqJxhrdzqzNhKym_k8enJjJucYJQRLsS_sw3Q,17895
17
- workflow_server/core/utils.py,sha256=si0NB4Suurc-mn8NYdn59xM9CkPrfOP1aWEVrZvifDI,1929
18
- workflow_server/core/workflow_executor_context.py,sha256=7Vp714LNVx_J5ERbgRHy5pJo_MaXsccIePWEW3IBshw,3234
16
+ workflow_server/core/executor.py,sha256=xbySFdb9KHoqFDfiKMR77fViFVo3XEQ5ER54C1PlS8c,16948
17
+ workflow_server/core/utils.py,sha256=mecVPqQkthrC4mpop3r8J3IWnBmKbDgqfCrSagyzVEg,2021
18
+ workflow_server/core/workflow_executor_context.py,sha256=8faOdpU4cBeIbmOvg9VzD3eS5i_PKcH7tyNGzx_rehg,3899
19
19
  workflow_server/logging_config.py,sha256=Hvx1t8uhqMMinl-5qcef7ufUvzs6x14VRnCb7YZxEAg,1206
20
20
  workflow_server/server.py,sha256=pBl0OQmrLE-PbTDwTgsVmxgz_Ai3TVhFRaMnr6PX6Yk,1849
21
- workflow_server/start.py,sha256=xSIobowtSLoZI86bbMkmEw3pqJHQaFdDyNffk4kGYL8,2544
21
+ workflow_server/start.py,sha256=Ams5ycqVbBorC7s6EI95BYzjpxzlo5mQbBnMNOkJS0w,2753
22
22
  workflow_server/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- workflow_server/utils/exit_handler.py,sha256=_FacDVi4zc3bfTA3D2mJsISePlJ8jpLrnGVo5-xZQFs,743
23
+ workflow_server/utils/exit_handler.py,sha256=PzRpzmia4Ki33sJTWjsvjD5oLP4_qfS5SZg2uXnyqxE,1767
24
24
  workflow_server/utils/log_proxy.py,sha256=nugi6fOgAYKX2X9DIc39TG366rsmmDUPoEtG3gzma_Y,3088
25
25
  workflow_server/utils/oom_killer.py,sha256=dzaqSzi0jQ3MvALwwiYIO9r6VWLa5Ln9AY6l11WEexo,3050
26
26
  workflow_server/utils/sentry.py,sha256=pmGDoaFhJwUprjP_Vmz6bETitqKQulJ0vwRP-gYb2w4,2145
@@ -30,7 +30,7 @@ workflow_server/utils/tests/test_sentry_integration.py,sha256=14PfuW8AaQNNtqLmBs
30
30
  workflow_server/utils/tests/test_system_utils.py,sha256=_4GwXvVvU5BrATxUEWwQIPg0bzQXMWBtiBmjP8MTxJM,4314
31
31
  workflow_server/utils/tests/test_utils.py,sha256=0Nq6du8o-iBtTrip9_wgHES53JSiJbVdSXaBnPobw3s,6930
32
32
  workflow_server/utils/utils.py,sha256=m7iMJtor5SQLWu7jlJw-X5Q3nmbq69BCxTMv6qnFYrA,4835
33
- vellum_workflow_server-1.8.6.post4.dist-info/METADATA,sha256=c1qUerOxt_TK40PtyU8uWZ8J7YzeCHRa-Z4Zw4OB2Xo,2273
34
- vellum_workflow_server-1.8.6.post4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
35
- vellum_workflow_server-1.8.6.post4.dist-info/entry_points.txt,sha256=uB_0yPkr7YV6RhEXzvFReUM8P4OQBlVXD6TN6eb9-oc,277
36
- vellum_workflow_server-1.8.6.post4.dist-info/RECORD,,
33
+ vellum_workflow_server-1.10.0.dist-info/METADATA,sha256=KLswbGouJhCTrF98d2iVsu_YgsnF8rb4DY_K6t1CWRc,2269
34
+ vellum_workflow_server-1.10.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
35
+ vellum_workflow_server-1.10.0.dist-info/entry_points.txt,sha256=uB_0yPkr7YV6RhEXzvFReUM8P4OQBlVXD6TN6eb9-oc,277
36
+ vellum_workflow_server-1.10.0.dist-info/RECORD,,
@@ -63,11 +63,11 @@ class TestNode(BaseNode):
63
63
  "comment": {"expanded": True, "value": "A test node for processing data."},
64
64
  "position": {"x": 0.0, "y": 0.0},
65
65
  },
66
- "id": "7a8b251d-f5ca-462a-b293-071d219460fb",
66
+ "id": "6f4c9178-9f46-4723-bcb7-0bd59db54eca",
67
67
  "label": "Test Node",
68
68
  "outputs": [],
69
- "ports": [{"id": "a3a0eefd-45d0-4f13-8c58-a836a9f7f9ed", "name": "default", "type": "DEFAULT"}],
70
- "trigger": {"id": "a022e36c-9852-4772-9be3-3c6c147fd811", "merge_behavior": "AWAIT_ATTRIBUTES"},
69
+ "ports": [{"id": "4394823f-79a8-4dbc-99ae-06a1df6c7408", "name": "default", "type": "DEFAULT"}],
70
+ "trigger": {"id": "07240af1-67c6-4460-b53d-53f0b0f1b90e", "merge_behavior": "AWAIT_ATTRIBUTES"},
71
71
  "type": "GENERIC",
72
72
  }
73
73
 
@@ -127,11 +127,11 @@ class SomeOtherNode(BaseNode):
127
127
  "comment": {"expanded": True, "value": "This is Some Node."},
128
128
  "position": {"x": 0.0, "y": 0.0},
129
129
  },
130
- "id": "1e559c2e-db82-41f0-9ceb-5e89b0c5a0a3",
130
+ "id": "89e84bac-5a5f-4f64-8083-7d3ebec98be1",
131
131
  "label": "Some Node",
132
132
  "outputs": [],
133
- "ports": [{"id": "48e39e97-5fd4-471e-b4f2-51d3baf06456", "name": "default", "type": "DEFAULT"}],
134
- "trigger": {"id": "e3381fb7-61fc-4c46-ae8e-51fc463b6a59", "merge_behavior": "AWAIT_ATTRIBUTES"},
133
+ "ports": [{"id": "2983ea5c-1d29-483a-b896-53098f5de4f1", "name": "default", "type": "DEFAULT"}],
134
+ "trigger": {"id": "6996efb0-5a20-4719-8835-34fe6552764a", "merge_behavior": "AWAIT_ATTRIBUTES"},
135
135
  "type": "GENERIC",
136
136
  }
137
137
 
@@ -150,11 +150,11 @@ class SomeOtherNode(BaseNode):
150
150
  "comment": {"expanded": True, "value": "This is Some Other Node."},
151
151
  "position": {"x": 0.0, "y": 0.0},
152
152
  },
153
- "id": "7aee541b-b245-4c8a-9137-3e4631d5100c",
153
+ "id": "3cdbba02-8a34-4e0f-8b94-770a944dcaa3",
154
154
  "label": "Some Other Node",
155
155
  "outputs": [],
156
- "ports": [{"id": "fb66b46a-d970-4bc9-83ea-70c154c57ddd", "name": "default", "type": "DEFAULT"}],
157
- "trigger": {"id": "13fa2714-20b3-4bc3-ab79-621a188e3bfa", "merge_behavior": "AWAIT_ATTRIBUTES"},
156
+ "ports": [{"id": "1839bde5-2ad4-4723-b21b-2c55fa833a7a", "name": "default", "type": "DEFAULT"}],
157
+ "trigger": {"id": "c36df8a8-5624-45be-99c9-826cf511a951", "merge_behavior": "AWAIT_ATTRIBUTES"},
158
158
  "type": "GENERIC",
159
159
  }
160
160
 
@@ -222,11 +222,11 @@ class HelperClass:
222
222
  "comment": {"expanded": True, "value": "Processes input data."},
223
223
  "position": {"x": 0.0, "y": 0.0},
224
224
  },
225
- "id": "f92c09f0-0434-46cb-829d-a73f801d6343",
225
+ "id": "7121bcb9-98a1-4907-bf9b-9734d773fd15",
226
226
  "label": "Processing Node",
227
227
  "outputs": [],
228
- "ports": [{"id": "abaa2984-b312-4491-b069-e689759f72c8", "name": "default", "type": "DEFAULT"}],
229
- "trigger": {"id": "35378c2b-f089-44af-ac37-efe4ea42c817", "merge_behavior": "AWAIT_ATTRIBUTES"},
228
+ "ports": [{"id": "de27da74-30e9-4e7b-95c2-92bdfc5bf042", "name": "default", "type": "DEFAULT"}],
229
+ "trigger": {"id": "e02bd85e-8b03-4b21-8b3e-f411042334ce", "merge_behavior": "AWAIT_ATTRIBUTES"},
230
230
  "type": "GENERIC",
231
231
  }
232
232
 
@@ -240,11 +240,11 @@ class HelperClass:
240
240
  "comment": {"expanded": True, "value": "Transforms data format."},
241
241
  "position": {"x": 0.0, "y": 0.0},
242
242
  },
243
- "id": "09ca32f7-c8f2-4469-97e5-1f288f85127a",
243
+ "id": "6a785cb0-f631-4f03-94c6-e82331c14c1a",
244
244
  "label": "Transformation Node",
245
245
  "outputs": [],
246
- "ports": [{"id": "88778117-fbfc-4b44-964b-5a4994aa2f24", "name": "default", "type": "DEFAULT"}],
247
- "trigger": {"id": "5d096263-7fbf-490a-83b7-e441852b5fb6", "merge_behavior": "AWAIT_ATTRIBUTES"},
246
+ "ports": [{"id": "67a13ea0-fd6b-44dc-af46-c72da06aa11f", "name": "default", "type": "DEFAULT"}],
247
+ "trigger": {"id": "08d4e317-baa8-478f-b278-99362e50e6b4", "merge_behavior": "AWAIT_ATTRIBUTES"},
248
248
  "type": "GENERIC",
249
249
  }
250
250
 
@@ -306,11 +306,11 @@ class BrokenNode(BaseNode)
306
306
  "comment": {"expanded": True, "value": "This is Some Node."},
307
307
  "position": {"x": 0.0, "y": 0.0},
308
308
  },
309
- "id": "1e559c2e-db82-41f0-9ceb-5e89b0c5a0a3",
309
+ "id": "a2706730-074b-4ea3-968a-25e68af1caed",
310
310
  "label": "Some Node",
311
311
  "outputs": [],
312
- "ports": [{"id": "48e39e97-5fd4-471e-b4f2-51d3baf06456", "name": "default", "type": "DEFAULT"}],
313
- "trigger": {"id": "e3381fb7-61fc-4c46-ae8e-51fc463b6a59", "merge_behavior": "AWAIT_ATTRIBUTES"},
312
+ "ports": [{"id": "e0ee3653-e071-4b91-9dfc-5e1dca9c665b", "name": "default", "type": "DEFAULT"}],
313
+ "trigger": {"id": "8d931b01-30ca-4c0d-b1b7-7c18379c83e6", "merge_behavior": "AWAIT_ATTRIBUTES"},
314
314
  "type": "GENERIC",
315
315
  }
316
316
 
@@ -371,12 +371,12 @@ class MyAdditionNode(BaseNode):
371
371
  "adornments": None,
372
372
  "attributes": [
373
373
  {
374
- "id": "aed3bcbb-d243-4a77-bb5e-409e9a28e868",
374
+ "id": "4223b340-447f-46c2-b35d-30ef16c5ae17",
375
375
  "name": "arg1",
376
376
  "value": None,
377
377
  },
378
378
  {
379
- "id": "9225d225-a41b-4642-8964-f28f58dcf4bf",
379
+ "id": "1de0f46a-95f6-4cd0-bb0f-e2414054d507",
380
380
  "name": "arg2",
381
381
  "value": None,
382
382
  },
@@ -387,11 +387,11 @@ class MyAdditionNode(BaseNode):
387
387
  "comment": {"expanded": True, "value": "Custom node that performs simple addition."},
388
388
  "position": {"x": 0.0, "y": 0.0},
389
389
  },
390
- "id": "195cd69d-3d2d-41e4-a432-16c433cb8d34",
390
+ "id": "2464b610-fb6d-495b-b17c-933ee147f19f",
391
391
  "label": "My Addition Node",
392
- "outputs": [{"id": "3d8e40cb-2aa8-44bd-ae6a-708a9fbc4779", "name": "result", "type": "NUMBER", "value": None}],
393
- "ports": [{"id": "9a9e4ef6-febf-4093-a515-217bbb1373db", "name": "default", "type": "DEFAULT"}],
394
- "trigger": {"id": "a5298668-d808-4a45-a62e-790943948e8a", "merge_behavior": "AWAIT_ATTRIBUTES"},
392
+ "outputs": [{"id": "f39d85c9-e7bf-45e1-bb67-f16225db0118", "name": "result", "type": "NUMBER", "value": None}],
393
+ "ports": [{"id": "bc489295-cd8a-4aa2-88bb-34446374100d", "name": "default", "type": "DEFAULT"}],
394
+ "trigger": {"id": "ff580cad-73d6-44fe-8f2c-4b8dc990ee70", "merge_behavior": "AWAIT_ATTRIBUTES"},
395
395
  "type": "GENERIC",
396
396
  "should_file_merge": True,
397
397
  }
@@ -5,6 +5,7 @@ import io
5
5
  import json
6
6
  from queue import Empty
7
7
  import re
8
+ import time
8
9
  from unittest import mock
9
10
  from uuid import uuid4
10
11
 
@@ -133,6 +134,8 @@ class Workflow(BaseWorkflow):
133
134
 
134
135
  with mock.patch("builtins.open", mock.mock_open(read_data="104857600")):
135
136
  # WHEN we call the stream route
137
+ ts_ns = time.time_ns()
138
+ request_body["vembda_service_initiated_timestamp"] = ts_ns
136
139
  status_code, events = both_stream_types(request_body)
137
140
 
138
141
  # THEN we get a 200 response
@@ -177,6 +180,15 @@ class Workflow(BaseWorkflow):
177
180
  assert "is_new_server" in server_metadata
178
181
  assert server_metadata["is_new_server"] is False
179
182
 
183
+ # AND the initiated event should have initiated_latency within a reasonable range
184
+ assert "initiated_latency" in server_metadata, "initiated_latency should be present in server_metadata"
185
+ initiated_latency = server_metadata["initiated_latency"]
186
+ assert isinstance(initiated_latency, int), "initiated_latency should be an integer (nanoseconds)"
187
+ # Latency should be positive and less than 60 seconds (60_000_000_000 nanoseconds) for CI
188
+ assert (
189
+ 0 < initiated_latency < 60_000_000_000
190
+ ), f"initiated_latency should be between 0 and 60 seconds, got {initiated_latency} ns"
191
+
180
192
  assert events[2]["name"] == "workflow.execution.fulfilled", events[2]
181
193
  assert events[2]["body"]["workflow_definition"]["module"] == ["test", "workflow"]
182
194
 
@@ -8,6 +8,7 @@ import os
8
8
  import pkgutil
9
9
  from queue import Empty
10
10
  import sys
11
+ import threading
11
12
  import time
12
13
  import traceback
13
14
  from uuid import uuid4
@@ -71,19 +72,195 @@ WORKFLOW_INITIATION_TIMEOUT_SECONDS = 60
71
72
  @bp.route("/stream", methods=["POST"])
72
73
  def stream_workflow_route() -> Response:
73
74
  data = request.get_json()
75
+ try:
76
+ context = WorkflowExecutorContext.model_validate(data)
77
+ except ValidationError as e:
78
+ error_message = e.errors()[0]["msg"]
79
+ error_location = e.errors()[0]["loc"]
80
+
81
+ return Response(
82
+ json.dumps({"detail": f"Invalid context: {error_message} at {error_location}"}),
83
+ status=400,
84
+ content_type="application/json",
85
+ )
86
+
87
+ headers = _get_headers(context)
88
+
89
+ # We can exceed the concurrency count currently with long running workflows due to a knative issue. So here
90
+ # if we detect a memory problem just exit us early
91
+ if not wait_for_available_process():
92
+ return Response(
93
+ json.dumps(
94
+ {
95
+ "detail": f"Workflow server concurrent request rate exceeded. "
96
+ f"Process count: {get_active_process_count()}"
97
+ }
98
+ ),
99
+ status=429,
100
+ content_type="application/json",
101
+ headers=headers,
102
+ )
103
+
104
+ start_workflow_state = _start_workflow(context)
105
+ if isinstance(start_workflow_state, Response):
106
+ return start_workflow_state
107
+
108
+ workflow_events, vembda_initiated_event, process, span_id, headers = start_workflow_state
109
+
110
+ def generator() -> Generator[str, None, None]:
111
+ try:
112
+ yield "\n"
113
+ yield vembda_initiated_event.model_dump_json()
114
+ yield "\n"
115
+ for row in workflow_events:
116
+ yield "\n"
117
+ if isinstance(row, dict):
118
+ dump = json.dumps(row)
119
+ yield dump
120
+ else:
121
+ yield row
122
+ yield "\n"
123
+ # Sometimes the connections get hung after they finish with the vembda fulfilled event
124
+ # if it happens during a knative scale down event. So we emit an END string so that
125
+ # we don't have to do string compares on all the events for performance.
126
+ yield "\n"
127
+ yield "END"
128
+ yield "\n"
129
+
130
+ logger.info(
131
+ f"Workflow stream completed, execution ID: {span_id}, process count: {get_active_process_count()}"
132
+ )
133
+ except GeneratorExit:
134
+ # These can happen either from Vembda disconnects (possibily from predict disconnects) or
135
+ # from knative activator gateway timeouts which are caused by idleTimeout or responseStartSeconds
136
+ # being exceeded.
137
+ app.logger.error(
138
+ "Client disconnected in the middle of the Workflow Stream",
139
+ extra={
140
+ "sentry_tags": {
141
+ "server_version": vembda_initiated_event.body.server_version,
142
+ "sdk_version": vembda_initiated_event.body.sdk_version,
143
+ }
144
+ },
145
+ )
146
+ return
147
+ except Exception as e:
148
+ logger.exception("Error during workflow response stream generator", extra={"error": e})
149
+ yield "\n"
150
+ yield "END"
151
+ yield "\n"
152
+ return
153
+ finally:
154
+ if ENABLE_PROCESS_WRAPPER:
155
+ try:
156
+ if process and process.is_alive():
157
+ process.kill()
158
+ if process:
159
+ increment_process_count(-1)
160
+ remove_active_span_id(span_id)
161
+ except Exception as e:
162
+ logger.error("Failed to kill process", e)
163
+ else:
164
+ increment_process_count(-1)
165
+ remove_active_span_id(span_id)
166
+
167
+ resp = Response(
168
+ stream_with_context(generator()),
169
+ status=200,
170
+ content_type="application/x-ndjson",
171
+ headers=headers,
172
+ )
173
+ return resp
74
174
 
175
+
176
+ @bp.route("/async-exec", methods=["POST"])
177
+ def async_exec_workflow() -> Response:
178
+ data = request.get_json()
75
179
  try:
76
180
  context = WorkflowExecutorContext.model_validate(data)
77
181
  except ValidationError as e:
78
182
  error_message = e.errors()[0]["msg"]
79
183
  error_location = e.errors()[0]["loc"]
80
184
 
185
+ # TODO need to convert this to a vembda event so that trigger'd execs can me notified
186
+ # can either do it here in the workflow server or
81
187
  return Response(
82
188
  json.dumps({"detail": f"Invalid context: {error_message} at {error_location}"}),
83
189
  status=400,
84
190
  content_type="application/json",
85
191
  )
86
192
 
193
+ # Reject back to the queue handler if were low on memory here, though maybe we should update the is_available
194
+ # route to look at memory too. Don't send this response as an event. Though we might want some logic to catch
195
+ # if they have a workflow server that can never start a workflow because the base image uses so much memory.
196
+ if not wait_for_available_process():
197
+ return Response(
198
+ json.dumps({"detail": f"Server resources low." f"Process count: {get_active_process_count()}"}),
199
+ status=429,
200
+ content_type="application/json",
201
+ )
202
+
203
+ def run_workflow_background() -> None:
204
+ process: Optional[Process] = None
205
+ span_id: Optional[str] = None
206
+
207
+ try:
208
+ start_workflow_result = _start_workflow(context)
209
+ if isinstance(start_workflow_result, Response):
210
+ # TODO same here, should return this response as en event or it will get yeeted to the nether
211
+ # return start_workflow_result
212
+ return
213
+
214
+ workflow_events, vembda_initiated_event, process, span_id, headers = start_workflow_result
215
+
216
+ for _ in workflow_events:
217
+ # This is way inefficient in process mode since were just having the main proc stream the events
218
+ # to nowhere wasting memory I/O and cpu.
219
+ continue
220
+ logger.info(
221
+ f"Workflow async exec completed, execution ID: {span_id}, process count: {get_active_process_count()}"
222
+ )
223
+ except Exception as e:
224
+ logger.exception("Error during workflow async background worker", e)
225
+ finally:
226
+ if ENABLE_PROCESS_WRAPPER:
227
+ try:
228
+ if process and process.is_alive():
229
+ process.kill()
230
+ if process:
231
+ increment_process_count(-1)
232
+ if span_id:
233
+ remove_active_span_id(span_id)
234
+ except Exception as e:
235
+ logger.error("Failed to kill process", e)
236
+ else:
237
+ increment_process_count(-1)
238
+ if span_id:
239
+ remove_active_span_id(span_id)
240
+
241
+ thread = threading.Thread(target=run_workflow_background)
242
+ thread.start()
243
+
244
+ return Response(
245
+ json.dumps({"success": True}),
246
+ status=200,
247
+ content_type="application/json",
248
+ )
249
+
250
+
251
+ def _start_workflow(
252
+ context: WorkflowExecutorContext,
253
+ ) -> Union[
254
+ Response,
255
+ tuple[
256
+ Iterator[Union[str, dict]],
257
+ VembdaExecutionInitiatedEvent,
258
+ Optional[Process],
259
+ str,
260
+ dict[str, str],
261
+ ],
262
+ ]:
263
+ headers = _get_headers(context)
87
264
  logger.info(
88
265
  f"Starting Workflow Server Request, trace ID: {context.trace_id}, "
89
266
  f"process count: {get_active_process_count()}, process wrapper: {ENABLE_PROCESS_WRAPPER}"
@@ -100,29 +277,7 @@ def stream_workflow_route() -> Response:
100
277
  parent=None,
101
278
  )
102
279
 
103
- process_output_queue: Queue[Union[str, dict]] = Queue()
104
-
105
- headers = {
106
- "X-Vellum-SDK-Version": vembda_initiated_event.body.sdk_version,
107
- "X-Vellum-Server-Version": vembda_initiated_event.body.server_version,
108
- "X-Vellum-Events-Emitted": str(is_events_emitting_enabled(context)),
109
- }
110
-
111
- # We can exceed the concurrency count currently with long running workflows due to a knative issue. So here
112
- # if we detect a memory problem just exit us early
113
- if not wait_for_available_process():
114
- return Response(
115
- json.dumps(
116
- {
117
- "detail": f"Workflow server concurrent request rate exceeded. "
118
- f"Process count: {get_active_process_count()}"
119
- }
120
- ),
121
- status=429,
122
- content_type="application/json",
123
- headers=headers,
124
- )
125
-
280
+ output_queue: Queue[Union[str, dict]] = Queue()
126
281
  cancel_signal = MultiprocessingEvent()
127
282
  timeout_signal = MultiprocessingEvent()
128
283
 
@@ -131,7 +286,7 @@ def stream_workflow_route() -> Response:
131
286
  try:
132
287
  process = stream_workflow_process_timeout(
133
288
  executor_context=context,
134
- queue=process_output_queue,
289
+ queue=output_queue,
135
290
  cancel_signal=cancel_signal,
136
291
  timeout_signal=timeout_signal,
137
292
  )
@@ -139,10 +294,10 @@ def stream_workflow_route() -> Response:
139
294
  except Exception as e:
140
295
  logger.exception(e)
141
296
 
142
- process_output_queue.put(create_vembda_rejected_event(context, traceback.format_exc()))
297
+ output_queue.put(create_vembda_rejected_event(context, traceback.format_exc()))
143
298
 
144
299
  try:
145
- first_item = process_output_queue.get(timeout=WORKFLOW_INITIATION_TIMEOUT_SECONDS)
300
+ first_item = output_queue.get(timeout=WORKFLOW_INITIATION_TIMEOUT_SECONDS)
146
301
  except Empty:
147
302
  logger.error("Request timed out trying to initiate the Workflow")
148
303
 
@@ -291,72 +446,9 @@ def stream_workflow_route() -> Response:
291
446
  break
292
447
  yield event
293
448
 
294
- workflow_events = process_events(process_output_queue)
449
+ workflow_events = process_events(output_queue)
295
450
 
296
- def generator() -> Generator[str, None, None]:
297
- try:
298
- yield "\n"
299
- yield vembda_initiated_event.model_dump_json()
300
- yield "\n"
301
- for row in workflow_events:
302
- yield "\n"
303
- if isinstance(row, dict):
304
- dump = json.dumps(row)
305
- yield dump
306
- else:
307
- yield row
308
- yield "\n"
309
- # Sometimes the connections get hung after they finish with the vembda fulfilled event
310
- # if it happens during a knative scale down event. So we emit an END string so that
311
- # we don't have to do string compares on all the events for performance.
312
- yield "\n"
313
- yield "END"
314
- yield "\n"
315
-
316
- logger.info(
317
- f"Workflow stream completed, execution ID: {span_id}, process count: {get_active_process_count()}"
318
- )
319
- except GeneratorExit:
320
- # These can happen either from Vembda disconnects (possibily from predict disconnects) or
321
- # from knative activator gateway timeouts which are caused by idleTimeout or responseStartSeconds
322
- # being exceeded.
323
- app.logger.error(
324
- "Client disconnected in the middle of the Workflow Stream",
325
- extra={
326
- "sentry_tags": {
327
- "server_version": vembda_initiated_event.body.server_version,
328
- "sdk_version": vembda_initiated_event.body.sdk_version,
329
- }
330
- },
331
- )
332
- return
333
- except Exception as e:
334
- logger.exception("Error during workflow response stream generator", extra={"error": e})
335
- yield "\n"
336
- yield "END"
337
- yield "\n"
338
- return
339
- finally:
340
- if ENABLE_PROCESS_WRAPPER:
341
- try:
342
- if process and process.is_alive():
343
- process.kill()
344
- if process:
345
- increment_process_count(-1)
346
- remove_active_span_id(span_id)
347
- except Exception as e:
348
- logger.error("Failed to kill process", e)
349
- else:
350
- increment_process_count(-1)
351
- remove_active_span_id(span_id)
352
-
353
- resp = Response(
354
- stream_with_context(generator()),
355
- status=200,
356
- content_type="application/x-ndjson",
357
- headers=headers,
358
- )
359
- return resp
451
+ return workflow_events, vembda_initiated_event, process, span_id, headers
360
452
 
361
453
 
362
454
  @bp.route("/stream-node", methods=["POST"])
@@ -436,6 +528,7 @@ def serialize_route() -> Response:
436
528
  files = data.get("files", {})
437
529
  workspace_api_key = data.get("workspace_api_key")
438
530
  is_new_server = data.get("is_new_server", False)
531
+ module = data.get("module")
439
532
 
440
533
  if not files:
441
534
  return Response(
@@ -448,7 +541,7 @@ def serialize_route() -> Response:
448
541
 
449
542
  # Generate a unique namespace for this serialization request
450
543
  namespace = get_random_namespace()
451
- virtual_finder = VirtualFileFinder(files, namespace)
544
+ virtual_finder = VirtualFileFinder(files, namespace, source_module=module)
452
545
 
453
546
  headers = {
454
547
  "X-Vellum-Is-New-Server": str(is_new_server).lower(),
@@ -564,3 +657,12 @@ def startup_error_generator(
564
657
  },
565
658
  )
566
659
  return
660
+
661
+
662
+ def _get_headers(context: WorkflowExecutorContext) -> dict[str, Union[str, Any]]:
663
+ headers = {
664
+ "X-Vellum-SDK-Version": get_version()["sdk_version"],
665
+ "X-Vellum-Server-Version": get_version()["server_version"],
666
+ "X-Vellum-Events-Emitted": str(is_events_emitting_enabled(context)),
667
+ }
668
+ return headers
@@ -1,5 +1,4 @@
1
- from datetime import datetime
2
- import importlib
1
+ from datetime import datetime, timezone
3
2
  from io import StringIO
4
3
  import json
5
4
  import logging
@@ -12,7 +11,7 @@ from threading import Event as ThreadingEvent
12
11
  import time
13
12
  from traceback import format_exc
14
13
  from uuid import UUID, uuid4
15
- from typing import Any, Callable, Generator, Iterator, Optional, Tuple, Type
14
+ from typing import Any, Callable, Generator, Iterator, Optional, Tuple
16
15
 
17
16
  from vellum_ee.workflows.display.utils.events import event_enricher
18
17
  from vellum_ee.workflows.server.virtual_file_loader import VirtualFileFinder
@@ -32,6 +31,7 @@ from vellum.workflows.resolvers.base import BaseWorkflowResolver
32
31
  from vellum.workflows.resolvers.resolver import VellumResolver
33
32
  from vellum.workflows.state.context import WorkflowContext
34
33
  from vellum.workflows.state.store import EmptyStore
34
+ from vellum.workflows.triggers import BaseTrigger
35
35
  from vellum.workflows.types import CancelSignal
36
36
  from vellum.workflows.workflows.event_filters import workflow_sandbox_event_filter
37
37
  from workflow_server.config import LOCAL_DEPLOYMENT, LOCAL_WORKFLOW_MODULE
@@ -150,7 +150,21 @@ def stream_workflow(
150
150
  cancel_watcher_kill_switch = ThreadingEvent()
151
151
  try:
152
152
  workflow, namespace = _create_workflow(executor_context)
153
- workflow_inputs = _get_workflow_inputs(executor_context, workflow.__class__)
153
+
154
+ trigger_id = executor_context.trigger_id
155
+
156
+ inputs_or_trigger = workflow.deserialize_trigger(trigger_id=trigger_id, inputs=executor_context.inputs)
157
+
158
+ # Determine whether we have inputs or a trigger
159
+ if isinstance(inputs_or_trigger, BaseInputs):
160
+ workflow_inputs = inputs_or_trigger
161
+ trigger = None
162
+ elif isinstance(inputs_or_trigger, BaseTrigger):
163
+ workflow_inputs = None
164
+ trigger = inputs_or_trigger
165
+ else:
166
+ workflow_inputs = None
167
+ trigger = None
154
168
 
155
169
  workflow_state = (
156
170
  workflow.deserialize_state(
@@ -176,6 +190,8 @@ def stream_workflow(
176
190
  entrypoint_nodes=[executor_context.node_id] if executor_context.node_id else None,
177
191
  previous_execution_id=executor_context.previous_execution_id,
178
192
  timeout=executor_context.timeout,
193
+ trigger=trigger,
194
+ execution_id=executor_context.workflow_span_id,
179
195
  )
180
196
  except WorkflowInitializationException as e:
181
197
  cancel_watcher_kill_switch.set()
@@ -257,32 +273,11 @@ def stream_node(
257
273
  disable_redirect: bool = True,
258
274
  ) -> Iterator[dict]:
259
275
  workflow, namespace = _create_workflow(executor_context)
260
- Node: Optional[Type[BaseNode]] = None
261
-
262
- for workflow_node in workflow.get_nodes():
263
- if executor_context.node_id and workflow_node.__id__ == executor_context.node_id:
264
- Node = workflow_node
265
- break
266
- elif (
267
- executor_context.node_module
268
- and executor_context.node_name
269
- and workflow_node.__name__ == executor_context.node_name
270
- and workflow_node.__module__ == f"{namespace}.{executor_context.node_module}"
271
- ):
272
- Node = workflow_node
273
- break
274
-
275
- if not Node:
276
- identifier = executor_context.node_id or f"{executor_context.node_module}.{executor_context.node_name}"
277
- raise WorkflowInitializationException(
278
- message=f"Node '{identifier}' not found in workflow",
279
- workflow_definition=workflow.__class__,
280
- )
281
276
 
282
277
  def call_node() -> Generator[dict[str, Any], Any, None]:
283
278
  executor_context.stream_start_time = time.time_ns()
284
279
 
285
- for event in workflow.run_node(Node, inputs=executor_context.inputs): # type: ignore[arg-type]
280
+ for event in workflow.run_node(executor_context.node_ref, inputs=executor_context.inputs):
286
281
  yield event.model_dump(mode="json")
287
282
 
288
283
  return _call_stream(
@@ -343,7 +338,9 @@ def _call_stream(
343
338
  def _create_workflow(executor_context: BaseExecutorContext) -> Tuple[BaseWorkflow, str]:
344
339
  namespace = _get_file_namespace(executor_context)
345
340
  if namespace != LOCAL_WORKFLOW_MODULE:
346
- sys.meta_path.append(VirtualFileFinder(executor_context.files, namespace))
341
+ sys.meta_path.append(
342
+ VirtualFileFinder(executor_context.files, namespace, source_module=executor_context.module)
343
+ )
347
344
 
348
345
  workflow_context = _create_workflow_context(executor_context)
349
346
  Workflow = BaseWorkflow.load_from_module(namespace)
@@ -434,6 +431,14 @@ def _enrich_event(event: WorkflowEvent, executor_context: Optional[BaseExecutorC
434
431
 
435
432
  if executor_context is not None:
436
433
  metadata["is_new_server"] = executor_context.is_new_server
434
+
435
+ if executor_context.vembda_service_initiated_timestamp is not None and event.timestamp is not None:
436
+ event_ts = event.timestamp
437
+ if event_ts.tzinfo is None:
438
+ event_ts = event_ts.replace(tzinfo=timezone.utc)
439
+ event_ts_ns = int(event_ts.timestamp() * 1_000_000_000)
440
+ initiated_latency = event_ts_ns - executor_context.vembda_service_initiated_timestamp
441
+ metadata["initiated_latency"] = initiated_latency
437
442
  elif event.name == "workflow.execution.fulfilled" and is_deployment:
438
443
  metadata = {}
439
444
  memory_mb = get_memory_in_use_mb()
@@ -473,38 +478,3 @@ def _dump_event(event: BaseEvent, executor_context: BaseExecutorContext) -> dict
473
478
  dump["body"]["node_definition"]["module"] = module_base + dump["body"]["node_definition"]["module"][1:]
474
479
 
475
480
  return dump
476
-
477
-
478
- def _get_workflow_inputs(
479
- executor_context: BaseExecutorContext, workflow_class: Type[BaseWorkflow]
480
- ) -> Optional[BaseInputs]:
481
- if not executor_context.inputs:
482
- return None
483
-
484
- if not executor_context.files.get("inputs.py"):
485
- return None
486
-
487
- namespace = _get_file_namespace(executor_context)
488
- inputs_module_path = f"{namespace}.inputs"
489
- try:
490
- inputs_module = importlib.import_module(inputs_module_path)
491
- except Exception as e:
492
- raise WorkflowInitializationException(
493
- message=f"Failed to initialize workflow inputs: {e}",
494
- workflow_definition=workflow_class,
495
- ) from e
496
-
497
- if not hasattr(inputs_module, "Inputs"):
498
- raise WorkflowInitializationException(
499
- message=f"Inputs module {inputs_module_path} does not have a required Inputs class",
500
- workflow_definition=workflow_class,
501
- )
502
-
503
- if not issubclass(inputs_module.Inputs, BaseInputs):
504
- raise WorkflowInitializationException(
505
- message=f"""The class {inputs_module_path}.Inputs was expected to be a subclass of BaseInputs, \
506
- but found {inputs_module.Inputs.__class__.__name__}""",
507
- workflow_definition=workflow_class,
508
- )
509
-
510
- return inputs_module.Inputs(**executor_context.inputs)
@@ -2,6 +2,7 @@ from datetime import datetime
2
2
  from uuid import uuid4
3
3
  from typing import Optional
4
4
 
5
+ from workflow_server.config import IS_ASYNC_MODE
5
6
  from workflow_server.core.events import VembdaExecutionFulfilledBody, VembdaExecutionFulfilledEvent
6
7
  from workflow_server.core.workflow_executor_context import BaseExecutorContext
7
8
 
@@ -46,6 +47,9 @@ def serialize_vembda_rejected_event(
46
47
 
47
48
 
48
49
  def is_events_emitting_enabled(executor_context: Optional[BaseExecutorContext]) -> bool:
50
+ if IS_ASYNC_MODE:
51
+ return True
52
+
49
53
  if not executor_context:
50
54
  return False
51
55
 
@@ -3,7 +3,7 @@ from functools import cached_property
3
3
  import os
4
4
  import time
5
5
  from uuid import UUID
6
- from typing import Any, Optional
6
+ from typing import Any, Optional, Union
7
7
  from typing_extensions import Self
8
8
 
9
9
  from flask import has_request_context, request
@@ -36,6 +36,11 @@ class BaseExecutorContext(UniversalBaseModel):
36
36
  previous_execution_id: Optional[UUID] = None
37
37
  feature_flags: Optional[dict[str, bool]] = None
38
38
  is_new_server: bool = False
39
+ trigger_id: Optional[UUID] = None
40
+ # The actual 'execution id' of the workflow that we pass into the workflow
41
+ # when running in async mode.
42
+ workflow_span_id: Optional[UUID] = None
43
+ vembda_service_initiated_timestamp: Optional[int] = None
39
44
 
40
45
  @field_validator("inputs", mode="before")
41
46
  @classmethod
@@ -86,6 +91,18 @@ class NodeExecutorContext(BaseExecutorContext):
86
91
  node_module: Optional[str] = None
87
92
  node_name: Optional[str] = None
88
93
 
94
+ @property
95
+ def node_ref(self) -> Union[UUID, str]:
96
+ """
97
+ Returns the node reference for use with workflow.run_node().
98
+
99
+ Returns node_id if it exists, otherwise returns the combination
100
+ of node_module and node_name as a fully qualified string.
101
+ """
102
+ if self.node_id:
103
+ return self.node_id
104
+ return f"{self.node_module}.{self.node_name}"
105
+
89
106
  @model_validator(mode="after")
90
107
  def validate_node_identification(self) -> Self:
91
108
  if not self.node_id and not (self.node_module and self.node_name):
workflow_server/start.py CHANGED
@@ -33,6 +33,7 @@ class CustomGunicornLogger(glogging.Logger):
33
33
  logger = logging.getLogger("gunicorn.access")
34
34
  logger.addFilter(HealthCheckFilter())
35
35
  logger.addFilter(SignalFilter())
36
+ logger.addFilter(StatusIsAvailableFilter())
36
37
 
37
38
 
38
39
  class HealthCheckFilter(logging.Filter):
@@ -45,6 +46,11 @@ class SignalFilter(logging.Filter):
45
46
  return "SIGTERM" not in record.getMessage()
46
47
 
47
48
 
49
+ class StatusIsAvailableFilter(logging.Filter):
50
+ def filter(self, record: Any) -> bool:
51
+ return "/status/is_available" not in record.getMessage()
52
+
53
+
48
54
  def start() -> None:
49
55
  if not is_development():
50
56
  start_oom_killer_worker()
@@ -1,15 +1,43 @@
1
+ from datetime import datetime
1
2
  import logging
2
3
  import multiprocessing
3
4
  import signal
5
+ from time import sleep
4
6
  from typing import Any
5
7
 
8
+ from workflow_server.config import IS_ASYNC_MODE, is_development
9
+ from workflow_server.utils.system_utils import get_active_process_count
10
+
6
11
  logger = logging.getLogger(__name__)
7
12
  process_killed_switch = multiprocessing.Event()
8
13
 
9
14
 
15
+ def _wait_for_workers() -> None:
16
+ # Would be annoying to have this on for dev since would prevent reload restarts. Also disabling this
17
+ # for non async mode for now since it shouldn't be needed anyway cus we keep the requests open.
18
+ if is_development() and not IS_ASYNC_MODE:
19
+ return
20
+
21
+ start_time = datetime.now()
22
+ loops = 0
23
+
24
+ while get_active_process_count() > 0:
25
+ if loops % 30 == 0:
26
+ logger.info("Waiting for workflow processes to finish...")
27
+
28
+ # TODO needa pass in max workflow time here for VPC
29
+ if (datetime.now() - start_time).total_seconds() > 1800:
30
+ logger.warning("Max elapsed time waiting for workflow processes to complete exceeded, shutting down")
31
+ exit(1)
32
+
33
+ sleep(1)
34
+ loops += 1
35
+
36
+
10
37
  def gunicorn_exit_handler(_worker: Any) -> None:
38
+ logger.info("Received gunicorn kill signal")
11
39
  process_killed_switch.set()
12
- logger.warning("Received gunicorn kill signal")
40
+ _wait_for_workers()
13
41
 
14
42
 
15
43
  def exit_handler(_signal: int, _frame: Any) -> None:
@@ -19,6 +47,7 @@ def exit_handler(_signal: int, _frame: Any) -> None:
19
47
  """
20
48
  process_killed_switch.set()
21
49
  logger.warning("Received kill signal")
50
+ _wait_for_workers()
22
51
  exit(1)
23
52
 
24
53