vellum-workflow-server 1.8.2__py3-none-any.whl → 1.10.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vellum_workflow_server-1.8.2.dist-info → vellum_workflow_server-1.10.7.dist-info}/METADATA +3 -3
- {vellum_workflow_server-1.8.2.dist-info → vellum_workflow_server-1.10.7.dist-info}/RECORD +17 -15
- workflow_server/api/auth_middleware.py +2 -2
- workflow_server/api/status_view.py +19 -0
- workflow_server/api/tests/test_workflow_view.py +75 -24
- workflow_server/api/tests/test_workflow_view_async_exec.py +410 -0
- workflow_server/api/tests/test_workflow_view_stream_workflow_route.py +100 -1
- workflow_server/api/workflow_view.py +206 -93
- workflow_server/config.py +2 -0
- workflow_server/core/executor.py +47 -67
- workflow_server/core/utils.py +4 -0
- workflow_server/core/workflow_executor_context.py +18 -1
- workflow_server/server.py +2 -0
- workflow_server/start.py +8 -2
- workflow_server/utils/exit_handler.py +30 -1
- {vellum_workflow_server-1.8.2.dist-info → vellum_workflow_server-1.10.7.dist-info}/WHEEL +0 -0
- {vellum_workflow_server-1.8.2.dist-info → vellum_workflow_server-1.10.7.dist-info}/entry_points.txt +0 -0
|
@@ -8,6 +8,7 @@ import os
|
|
|
8
8
|
import pkgutil
|
|
9
9
|
from queue import Empty
|
|
10
10
|
import sys
|
|
11
|
+
import threading
|
|
11
12
|
import time
|
|
12
13
|
import traceback
|
|
13
14
|
from uuid import uuid4
|
|
@@ -71,7 +72,6 @@ WORKFLOW_INITIATION_TIMEOUT_SECONDS = 60
|
|
|
71
72
|
@bp.route("/stream", methods=["POST"])
|
|
72
73
|
def stream_workflow_route() -> Response:
|
|
73
74
|
data = request.get_json()
|
|
74
|
-
|
|
75
75
|
try:
|
|
76
76
|
context = WorkflowExecutorContext.model_validate(data)
|
|
77
77
|
except ValidationError as e:
|
|
@@ -84,28 +84,7 @@ def stream_workflow_route() -> Response:
|
|
|
84
84
|
content_type="application/json",
|
|
85
85
|
)
|
|
86
86
|
|
|
87
|
-
|
|
88
|
-
f"Starting Workflow Server Request, trace ID: {context.trace_id}, "
|
|
89
|
-
f"process count: {get_active_process_count()}, process wrapper: {ENABLE_PROCESS_WRAPPER}"
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
# Create this event up here so timestamps are fully from the start to account for any unknown overhead
|
|
93
|
-
vembda_initiated_event = VembdaExecutionInitiatedEvent(
|
|
94
|
-
id=uuid4(),
|
|
95
|
-
timestamp=datetime.now(),
|
|
96
|
-
trace_id=context.trace_id,
|
|
97
|
-
span_id=context.execution_id,
|
|
98
|
-
body=VembdaExecutionInitiatedBody.model_validate(get_version()),
|
|
99
|
-
parent=None,
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
process_output_queue: Queue[Union[str, dict]] = Queue()
|
|
103
|
-
|
|
104
|
-
headers = {
|
|
105
|
-
"X-Vellum-SDK-Version": vembda_initiated_event.body.sdk_version,
|
|
106
|
-
"X-Vellum-Server-Version": vembda_initiated_event.body.server_version,
|
|
107
|
-
"X-Vellum-Events-Emitted": str(is_events_emitting_enabled(context)),
|
|
108
|
-
}
|
|
87
|
+
headers = _get_headers(context)
|
|
109
88
|
|
|
110
89
|
# We can exceed the concurrency count currently with long running workflows due to a knative issue. So here
|
|
111
90
|
# if we detect a memory problem just exit us early
|
|
@@ -122,6 +101,183 @@ def stream_workflow_route() -> Response:
|
|
|
122
101
|
headers=headers,
|
|
123
102
|
)
|
|
124
103
|
|
|
104
|
+
start_workflow_state = _start_workflow(context)
|
|
105
|
+
if isinstance(start_workflow_state, Response):
|
|
106
|
+
return start_workflow_state
|
|
107
|
+
|
|
108
|
+
workflow_events, vembda_initiated_event, process, span_id, headers = start_workflow_state
|
|
109
|
+
|
|
110
|
+
def generator() -> Generator[str, None, None]:
|
|
111
|
+
try:
|
|
112
|
+
yield "\n"
|
|
113
|
+
yield vembda_initiated_event.model_dump_json()
|
|
114
|
+
yield "\n"
|
|
115
|
+
for row in workflow_events:
|
|
116
|
+
yield "\n"
|
|
117
|
+
if isinstance(row, dict):
|
|
118
|
+
dump = json.dumps(row)
|
|
119
|
+
yield dump
|
|
120
|
+
else:
|
|
121
|
+
yield row
|
|
122
|
+
yield "\n"
|
|
123
|
+
# Sometimes the connections get hung after they finish with the vembda fulfilled event
|
|
124
|
+
# if it happens during a knative scale down event. So we emit an END string so that
|
|
125
|
+
# we don't have to do string compares on all the events for performance.
|
|
126
|
+
yield "\n"
|
|
127
|
+
yield "END"
|
|
128
|
+
yield "\n"
|
|
129
|
+
|
|
130
|
+
logger.info(
|
|
131
|
+
f"Workflow stream completed, execution ID: {span_id}, process count: {get_active_process_count()}"
|
|
132
|
+
)
|
|
133
|
+
except GeneratorExit:
|
|
134
|
+
# These can happen either from Vembda disconnects (possibily from predict disconnects) or
|
|
135
|
+
# from knative activator gateway timeouts which are caused by idleTimeout or responseStartSeconds
|
|
136
|
+
# being exceeded.
|
|
137
|
+
app.logger.error(
|
|
138
|
+
"Client disconnected in the middle of the Workflow Stream",
|
|
139
|
+
extra={
|
|
140
|
+
"sentry_tags": {
|
|
141
|
+
"server_version": vembda_initiated_event.body.server_version,
|
|
142
|
+
"sdk_version": vembda_initiated_event.body.sdk_version,
|
|
143
|
+
}
|
|
144
|
+
},
|
|
145
|
+
)
|
|
146
|
+
return
|
|
147
|
+
except Exception as e:
|
|
148
|
+
logger.exception("Error during workflow response stream generator", extra={"error": e})
|
|
149
|
+
yield "\n"
|
|
150
|
+
yield "END"
|
|
151
|
+
yield "\n"
|
|
152
|
+
return
|
|
153
|
+
finally:
|
|
154
|
+
if ENABLE_PROCESS_WRAPPER:
|
|
155
|
+
try:
|
|
156
|
+
if process and process.is_alive():
|
|
157
|
+
process.kill()
|
|
158
|
+
if process:
|
|
159
|
+
increment_process_count(-1)
|
|
160
|
+
remove_active_span_id(span_id)
|
|
161
|
+
except Exception as e:
|
|
162
|
+
logger.error("Failed to kill process", e)
|
|
163
|
+
else:
|
|
164
|
+
increment_process_count(-1)
|
|
165
|
+
remove_active_span_id(span_id)
|
|
166
|
+
|
|
167
|
+
resp = Response(
|
|
168
|
+
stream_with_context(generator()),
|
|
169
|
+
status=200,
|
|
170
|
+
content_type="application/x-ndjson",
|
|
171
|
+
headers=headers,
|
|
172
|
+
)
|
|
173
|
+
return resp
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@bp.route("/async-exec", methods=["POST"])
|
|
177
|
+
def async_exec_workflow() -> Response:
|
|
178
|
+
data = request.get_json()
|
|
179
|
+
try:
|
|
180
|
+
context = WorkflowExecutorContext.model_validate(data)
|
|
181
|
+
except ValidationError as e:
|
|
182
|
+
error_message = e.errors()[0]["msg"]
|
|
183
|
+
error_location = e.errors()[0]["loc"]
|
|
184
|
+
|
|
185
|
+
# TODO need to convert this to a vembda event so that trigger'd execs can me notified
|
|
186
|
+
# can either do it here in the workflow server or
|
|
187
|
+
return Response(
|
|
188
|
+
json.dumps({"detail": f"Invalid context: {error_message} at {error_location}"}),
|
|
189
|
+
status=400,
|
|
190
|
+
content_type="application/json",
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Reject back to the queue handler if were low on memory here, though maybe we should update the is_available
|
|
194
|
+
# route to look at memory too. Don't send this response as an event. Though we might want some logic to catch
|
|
195
|
+
# if they have a workflow server that can never start a workflow because the base image uses so much memory.
|
|
196
|
+
if not wait_for_available_process():
|
|
197
|
+
return Response(
|
|
198
|
+
json.dumps({"detail": f"Server resources low." f"Process count: {get_active_process_count()}"}),
|
|
199
|
+
status=429,
|
|
200
|
+
content_type="application/json",
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
def run_workflow_background() -> None:
|
|
204
|
+
process: Optional[Process] = None
|
|
205
|
+
span_id: Optional[str] = None
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
start_workflow_result = _start_workflow(context)
|
|
209
|
+
if isinstance(start_workflow_result, Response):
|
|
210
|
+
# TODO same here, should return this response as en event or it will get yeeted to the nether
|
|
211
|
+
# return start_workflow_result
|
|
212
|
+
return
|
|
213
|
+
|
|
214
|
+
workflow_events, vembda_initiated_event, process, span_id, headers = start_workflow_result
|
|
215
|
+
|
|
216
|
+
for _ in workflow_events:
|
|
217
|
+
# This is way inefficient in process mode since were just having the main proc stream the events
|
|
218
|
+
# to nowhere wasting memory I/O and cpu.
|
|
219
|
+
continue
|
|
220
|
+
logger.info(
|
|
221
|
+
f"Workflow async exec completed, execution ID: {span_id}, process count: {get_active_process_count()}"
|
|
222
|
+
)
|
|
223
|
+
except Exception as e:
|
|
224
|
+
logger.exception("Error during workflow async background worker", e)
|
|
225
|
+
finally:
|
|
226
|
+
if ENABLE_PROCESS_WRAPPER:
|
|
227
|
+
try:
|
|
228
|
+
if process and process.is_alive():
|
|
229
|
+
process.kill()
|
|
230
|
+
if process:
|
|
231
|
+
increment_process_count(-1)
|
|
232
|
+
if span_id:
|
|
233
|
+
remove_active_span_id(span_id)
|
|
234
|
+
except Exception as e:
|
|
235
|
+
logger.error("Failed to kill process", e)
|
|
236
|
+
else:
|
|
237
|
+
increment_process_count(-1)
|
|
238
|
+
if span_id:
|
|
239
|
+
remove_active_span_id(span_id)
|
|
240
|
+
|
|
241
|
+
thread = threading.Thread(target=run_workflow_background)
|
|
242
|
+
thread.start()
|
|
243
|
+
|
|
244
|
+
return Response(
|
|
245
|
+
json.dumps({"success": True}),
|
|
246
|
+
status=200,
|
|
247
|
+
content_type="application/json",
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _start_workflow(
|
|
252
|
+
context: WorkflowExecutorContext,
|
|
253
|
+
) -> Union[
|
|
254
|
+
Response,
|
|
255
|
+
tuple[
|
|
256
|
+
Iterator[Union[str, dict]],
|
|
257
|
+
VembdaExecutionInitiatedEvent,
|
|
258
|
+
Optional[Process],
|
|
259
|
+
str,
|
|
260
|
+
dict[str, str],
|
|
261
|
+
],
|
|
262
|
+
]:
|
|
263
|
+
headers = _get_headers(context)
|
|
264
|
+
logger.info(
|
|
265
|
+
f"Starting Workflow Server Request, trace ID: {context.trace_id}, "
|
|
266
|
+
f"process count: {get_active_process_count()}, process wrapper: {ENABLE_PROCESS_WRAPPER}"
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# Create this event up here so timestamps are fully from the start to account for any unknown overhead
|
|
270
|
+
version_data = get_version()
|
|
271
|
+
vembda_initiated_event = VembdaExecutionInitiatedEvent(
|
|
272
|
+
id=uuid4(),
|
|
273
|
+
timestamp=datetime.now(),
|
|
274
|
+
trace_id=context.trace_id,
|
|
275
|
+
span_id=context.execution_id,
|
|
276
|
+
body=VembdaExecutionInitiatedBody.model_validate(version_data),
|
|
277
|
+
parent=None,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
output_queue: Queue[Union[str, dict]] = Queue()
|
|
125
281
|
cancel_signal = MultiprocessingEvent()
|
|
126
282
|
timeout_signal = MultiprocessingEvent()
|
|
127
283
|
|
|
@@ -130,7 +286,7 @@ def stream_workflow_route() -> Response:
|
|
|
130
286
|
try:
|
|
131
287
|
process = stream_workflow_process_timeout(
|
|
132
288
|
executor_context=context,
|
|
133
|
-
queue=
|
|
289
|
+
queue=output_queue,
|
|
134
290
|
cancel_signal=cancel_signal,
|
|
135
291
|
timeout_signal=timeout_signal,
|
|
136
292
|
)
|
|
@@ -138,10 +294,10 @@ def stream_workflow_route() -> Response:
|
|
|
138
294
|
except Exception as e:
|
|
139
295
|
logger.exception(e)
|
|
140
296
|
|
|
141
|
-
|
|
297
|
+
output_queue.put(create_vembda_rejected_event(context, traceback.format_exc()))
|
|
142
298
|
|
|
143
299
|
try:
|
|
144
|
-
first_item =
|
|
300
|
+
first_item = output_queue.get(timeout=WORKFLOW_INITIATION_TIMEOUT_SECONDS)
|
|
145
301
|
except Empty:
|
|
146
302
|
logger.error("Request timed out trying to initiate the Workflow")
|
|
147
303
|
|
|
@@ -290,72 +446,9 @@ def stream_workflow_route() -> Response:
|
|
|
290
446
|
break
|
|
291
447
|
yield event
|
|
292
448
|
|
|
293
|
-
workflow_events = process_events(
|
|
294
|
-
|
|
295
|
-
def generator() -> Generator[str, None, None]:
|
|
296
|
-
try:
|
|
297
|
-
yield "\n"
|
|
298
|
-
yield vembda_initiated_event.model_dump_json()
|
|
299
|
-
yield "\n"
|
|
300
|
-
for row in workflow_events:
|
|
301
|
-
yield "\n"
|
|
302
|
-
if isinstance(row, dict):
|
|
303
|
-
dump = json.dumps(row)
|
|
304
|
-
yield dump
|
|
305
|
-
else:
|
|
306
|
-
yield row
|
|
307
|
-
yield "\n"
|
|
308
|
-
# Sometimes the connections get hung after they finish with the vembda fulfilled event
|
|
309
|
-
# if it happens during a knative scale down event. So we emit an END string so that
|
|
310
|
-
# we don't have to do string compares on all the events for performance.
|
|
311
|
-
yield "\n"
|
|
312
|
-
yield "END"
|
|
313
|
-
yield "\n"
|
|
314
|
-
|
|
315
|
-
logger.info(
|
|
316
|
-
f"Workflow stream completed, execution ID: {span_id}, process count: {get_active_process_count()}"
|
|
317
|
-
)
|
|
318
|
-
except GeneratorExit:
|
|
319
|
-
# These can happen either from Vembda disconnects (possibily from predict disconnects) or
|
|
320
|
-
# from knative activator gateway timeouts which are caused by idleTimeout or responseStartSeconds
|
|
321
|
-
# being exceeded.
|
|
322
|
-
app.logger.error(
|
|
323
|
-
"Client disconnected in the middle of the Workflow Stream",
|
|
324
|
-
extra={
|
|
325
|
-
"sentry_tags": {
|
|
326
|
-
"server_version": vembda_initiated_event.body.server_version,
|
|
327
|
-
"sdk_version": vembda_initiated_event.body.sdk_version,
|
|
328
|
-
}
|
|
329
|
-
},
|
|
330
|
-
)
|
|
331
|
-
return
|
|
332
|
-
except Exception as e:
|
|
333
|
-
logger.exception("Error during workflow response stream generator", extra={"error": e})
|
|
334
|
-
yield "\n"
|
|
335
|
-
yield "END"
|
|
336
|
-
yield "\n"
|
|
337
|
-
return
|
|
338
|
-
finally:
|
|
339
|
-
if ENABLE_PROCESS_WRAPPER:
|
|
340
|
-
try:
|
|
341
|
-
if process and process.is_alive():
|
|
342
|
-
process.kill()
|
|
343
|
-
if process:
|
|
344
|
-
increment_process_count(-1)
|
|
345
|
-
remove_active_span_id(span_id)
|
|
346
|
-
except Exception as e:
|
|
347
|
-
logger.error("Failed to kill process", e)
|
|
348
|
-
else:
|
|
349
|
-
increment_process_count(-1)
|
|
350
|
-
remove_active_span_id(span_id)
|
|
449
|
+
workflow_events = process_events(output_queue)
|
|
351
450
|
|
|
352
|
-
|
|
353
|
-
stream_with_context(generator()),
|
|
354
|
-
status=200,
|
|
355
|
-
content_type="application/x-ndjson",
|
|
356
|
-
headers=headers,
|
|
357
|
-
)
|
|
358
|
-
return resp
|
|
451
|
+
return workflow_events, vembda_initiated_event, process, span_id, headers
|
|
359
452
|
|
|
360
453
|
|
|
361
454
|
@bp.route("/stream-node", methods=["POST"])
|
|
@@ -374,12 +467,13 @@ def stream_node_route() -> Response:
|
|
|
374
467
|
)
|
|
375
468
|
|
|
376
469
|
# Create this event up here so timestamps are fully from the start to account for any unknown overhead
|
|
470
|
+
version_data = get_version()
|
|
377
471
|
vembda_initiated_event = VembdaExecutionInitiatedEvent(
|
|
378
472
|
id=uuid4(),
|
|
379
473
|
timestamp=datetime.now(),
|
|
380
474
|
trace_id=context.trace_id,
|
|
381
475
|
span_id=context.execution_id,
|
|
382
|
-
body=VembdaExecutionInitiatedBody.model_validate(
|
|
476
|
+
body=VembdaExecutionInitiatedBody.model_validate(version_data),
|
|
383
477
|
parent=None,
|
|
384
478
|
)
|
|
385
479
|
|
|
@@ -433,6 +527,8 @@ def serialize_route() -> Response:
|
|
|
433
527
|
|
|
434
528
|
files = data.get("files", {})
|
|
435
529
|
workspace_api_key = data.get("workspace_api_key")
|
|
530
|
+
is_new_server = data.get("is_new_server", False)
|
|
531
|
+
module = data.get("module")
|
|
436
532
|
|
|
437
533
|
if not files:
|
|
438
534
|
return Response(
|
|
@@ -445,7 +541,12 @@ def serialize_route() -> Response:
|
|
|
445
541
|
|
|
446
542
|
# Generate a unique namespace for this serialization request
|
|
447
543
|
namespace = get_random_namespace()
|
|
448
|
-
virtual_finder = VirtualFileFinder(files, namespace)
|
|
544
|
+
virtual_finder = VirtualFileFinder(files, namespace, source_module=module)
|
|
545
|
+
|
|
546
|
+
headers = {
|
|
547
|
+
"X-Vellum-Is-New-Server": str(is_new_server).lower(),
|
|
548
|
+
}
|
|
549
|
+
|
|
449
550
|
try:
|
|
450
551
|
sys.meta_path.append(virtual_finder)
|
|
451
552
|
result = BaseWorkflowDisplay.serialize_module(namespace, client=client, dry_run=True)
|
|
@@ -454,6 +555,7 @@ def serialize_route() -> Response:
|
|
|
454
555
|
json.dumps(result.model_dump()),
|
|
455
556
|
status=200,
|
|
456
557
|
content_type="application/json",
|
|
558
|
+
headers=headers,
|
|
457
559
|
)
|
|
458
560
|
|
|
459
561
|
except WorkflowInitializationException as e:
|
|
@@ -463,6 +565,7 @@ def serialize_route() -> Response:
|
|
|
463
565
|
json.dumps({"detail": error_message}),
|
|
464
566
|
status=400,
|
|
465
567
|
content_type="application/json",
|
|
568
|
+
headers=headers,
|
|
466
569
|
)
|
|
467
570
|
|
|
468
571
|
except Exception as e:
|
|
@@ -471,6 +574,7 @@ def serialize_route() -> Response:
|
|
|
471
574
|
json.dumps({"detail": f"Serialization failed: {str(e)}"}),
|
|
472
575
|
status=500,
|
|
473
576
|
content_type="application/json",
|
|
577
|
+
headers=headers,
|
|
474
578
|
)
|
|
475
579
|
|
|
476
580
|
finally:
|
|
@@ -553,3 +657,12 @@ def startup_error_generator(
|
|
|
553
657
|
},
|
|
554
658
|
)
|
|
555
659
|
return
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def _get_headers(context: WorkflowExecutorContext) -> dict[str, Union[str, Any]]:
|
|
663
|
+
headers = {
|
|
664
|
+
"X-Vellum-SDK-Version": get_version()["sdk_version"],
|
|
665
|
+
"X-Vellum-Server-Version": get_version()["server_version"],
|
|
666
|
+
"X-Vellum-Events-Emitted": str(is_events_emitting_enabled(context)),
|
|
667
|
+
}
|
|
668
|
+
return headers
|
workflow_server/config.py
CHANGED
|
@@ -42,6 +42,8 @@ LOCAL_WORKFLOW_MODULE = os.getenv("LOCAL_WORKFLOW_MODULE")
|
|
|
42
42
|
# The deployment name to match against when using local mode so you can still run your normal workflow
|
|
43
43
|
LOCAL_DEPLOYMENT = os.getenv("LOCAL_DEPLOYMENT")
|
|
44
44
|
|
|
45
|
+
IS_ASYNC_MODE = os.getenv("IS_ASYNC_MODE", "false").lower() == "true"
|
|
46
|
+
|
|
45
47
|
|
|
46
48
|
def is_development() -> bool:
|
|
47
49
|
return os.getenv("FLASK_ENV", "local") == "local"
|
workflow_server/core/executor.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
|
-
import importlib
|
|
1
|
+
from datetime import datetime, timezone
|
|
3
2
|
from io import StringIO
|
|
4
3
|
import json
|
|
5
4
|
import logging
|
|
@@ -12,12 +11,13 @@ from threading import Event as ThreadingEvent
|
|
|
12
11
|
import time
|
|
13
12
|
from traceback import format_exc
|
|
14
13
|
from uuid import UUID, uuid4
|
|
15
|
-
from typing import Any, Callable, Generator, Iterator, Optional, Tuple
|
|
14
|
+
from typing import Any, Callable, Generator, Iterator, Optional, Tuple
|
|
16
15
|
|
|
17
16
|
from vellum_ee.workflows.display.utils.events import event_enricher
|
|
18
17
|
from vellum_ee.workflows.server.virtual_file_loader import VirtualFileFinder
|
|
19
18
|
|
|
20
19
|
from vellum.workflows import BaseWorkflow
|
|
20
|
+
from vellum.workflows.context import execution_context
|
|
21
21
|
from vellum.workflows.emitters.base import BaseWorkflowEmitter
|
|
22
22
|
from vellum.workflows.emitters.vellum_emitter import VellumEmitter
|
|
23
23
|
from vellum.workflows.events.exception_handling import stream_initialization_exception
|
|
@@ -31,8 +31,9 @@ from vellum.workflows.resolvers.base import BaseWorkflowResolver
|
|
|
31
31
|
from vellum.workflows.resolvers.resolver import VellumResolver
|
|
32
32
|
from vellum.workflows.state.context import WorkflowContext
|
|
33
33
|
from vellum.workflows.state.store import EmptyStore
|
|
34
|
+
from vellum.workflows.triggers import BaseTrigger
|
|
34
35
|
from vellum.workflows.types import CancelSignal
|
|
35
|
-
from vellum.workflows.workflows.event_filters import
|
|
36
|
+
from vellum.workflows.workflows.event_filters import workflow_sandbox_event_filter
|
|
36
37
|
from workflow_server.config import LOCAL_DEPLOYMENT, LOCAL_WORKFLOW_MODULE
|
|
37
38
|
from workflow_server.core.cancel_workflow import CancelWorkflowWatcherThread
|
|
38
39
|
from workflow_server.core.events import (
|
|
@@ -149,7 +150,21 @@ def stream_workflow(
|
|
|
149
150
|
cancel_watcher_kill_switch = ThreadingEvent()
|
|
150
151
|
try:
|
|
151
152
|
workflow, namespace = _create_workflow(executor_context)
|
|
152
|
-
|
|
153
|
+
|
|
154
|
+
trigger_id = executor_context.trigger_id
|
|
155
|
+
|
|
156
|
+
inputs_or_trigger = workflow.deserialize_trigger(trigger_id=trigger_id, inputs=executor_context.inputs)
|
|
157
|
+
|
|
158
|
+
# Determine whether we have inputs or a trigger
|
|
159
|
+
if isinstance(inputs_or_trigger, BaseInputs):
|
|
160
|
+
workflow_inputs = inputs_or_trigger
|
|
161
|
+
trigger = None
|
|
162
|
+
elif isinstance(inputs_or_trigger, BaseTrigger):
|
|
163
|
+
workflow_inputs = None
|
|
164
|
+
trigger = inputs_or_trigger
|
|
165
|
+
else:
|
|
166
|
+
workflow_inputs = None
|
|
167
|
+
trigger = None
|
|
153
168
|
|
|
154
169
|
workflow_state = (
|
|
155
170
|
workflow.deserialize_state(
|
|
@@ -170,15 +185,22 @@ def stream_workflow(
|
|
|
170
185
|
inputs=workflow_inputs,
|
|
171
186
|
state=workflow_state,
|
|
172
187
|
node_output_mocks=node_output_mocks,
|
|
173
|
-
event_filter=
|
|
188
|
+
event_filter=workflow_sandbox_event_filter,
|
|
174
189
|
cancel_signal=cancel_signal,
|
|
175
190
|
entrypoint_nodes=[executor_context.node_id] if executor_context.node_id else None,
|
|
176
191
|
previous_execution_id=executor_context.previous_execution_id,
|
|
177
192
|
timeout=executor_context.timeout,
|
|
193
|
+
trigger=trigger,
|
|
194
|
+
execution_id=executor_context.workflow_span_id,
|
|
178
195
|
)
|
|
179
196
|
except WorkflowInitializationException as e:
|
|
180
197
|
cancel_watcher_kill_switch.set()
|
|
181
|
-
|
|
198
|
+
|
|
199
|
+
with execution_context(
|
|
200
|
+
parent_context=executor_context.execution_context.parent_context,
|
|
201
|
+
trace_id=executor_context.execution_context.trace_id,
|
|
202
|
+
):
|
|
203
|
+
initialization_exception_stream = stream_initialization_exception(e)
|
|
182
204
|
|
|
183
205
|
def _stream_generator() -> Generator[dict[str, Any], Any, None]:
|
|
184
206
|
for event in initialization_exception_stream:
|
|
@@ -251,32 +273,11 @@ def stream_node(
|
|
|
251
273
|
disable_redirect: bool = True,
|
|
252
274
|
) -> Iterator[dict]:
|
|
253
275
|
workflow, namespace = _create_workflow(executor_context)
|
|
254
|
-
Node: Optional[Type[BaseNode]] = None
|
|
255
|
-
|
|
256
|
-
for workflow_node in workflow.get_nodes():
|
|
257
|
-
if executor_context.node_id and workflow_node.__id__ == executor_context.node_id:
|
|
258
|
-
Node = workflow_node
|
|
259
|
-
break
|
|
260
|
-
elif (
|
|
261
|
-
executor_context.node_module
|
|
262
|
-
and executor_context.node_name
|
|
263
|
-
and workflow_node.__name__ == executor_context.node_name
|
|
264
|
-
and workflow_node.__module__ == f"{namespace}.{executor_context.node_module}"
|
|
265
|
-
):
|
|
266
|
-
Node = workflow_node
|
|
267
|
-
break
|
|
268
|
-
|
|
269
|
-
if not Node:
|
|
270
|
-
identifier = executor_context.node_id or f"{executor_context.node_module}.{executor_context.node_name}"
|
|
271
|
-
raise WorkflowInitializationException(
|
|
272
|
-
message=f"Node '{identifier}' not found in workflow",
|
|
273
|
-
workflow_definition=workflow.__class__,
|
|
274
|
-
)
|
|
275
276
|
|
|
276
277
|
def call_node() -> Generator[dict[str, Any], Any, None]:
|
|
277
278
|
executor_context.stream_start_time = time.time_ns()
|
|
278
279
|
|
|
279
|
-
for event in workflow.run_node(
|
|
280
|
+
for event in workflow.run_node(executor_context.node_ref, inputs=executor_context.inputs):
|
|
280
281
|
yield event.model_dump(mode="json")
|
|
281
282
|
|
|
282
283
|
return _call_stream(
|
|
@@ -337,7 +338,9 @@ def _call_stream(
|
|
|
337
338
|
def _create_workflow(executor_context: BaseExecutorContext) -> Tuple[BaseWorkflow, str]:
|
|
338
339
|
namespace = _get_file_namespace(executor_context)
|
|
339
340
|
if namespace != LOCAL_WORKFLOW_MODULE:
|
|
340
|
-
sys.meta_path.append(
|
|
341
|
+
sys.meta_path.append(
|
|
342
|
+
VirtualFileFinder(executor_context.files, namespace, source_module=executor_context.module)
|
|
343
|
+
)
|
|
341
344
|
|
|
342
345
|
workflow_context = _create_workflow_context(executor_context)
|
|
343
346
|
Workflow = BaseWorkflow.load_from_module(namespace)
|
|
@@ -405,7 +408,7 @@ def get_random_namespace() -> str:
|
|
|
405
408
|
return "workflow_tmp_" + "".join(random.choice(string.ascii_letters + string.digits) for i in range(14))
|
|
406
409
|
|
|
407
410
|
|
|
408
|
-
def _enrich_event(event: WorkflowEvent,
|
|
411
|
+
def _enrich_event(event: WorkflowEvent, executor_context: Optional[BaseExecutorContext] = None) -> WorkflowEvent:
|
|
409
412
|
"""
|
|
410
413
|
Enrich an event with metadata based on the event type.
|
|
411
414
|
|
|
@@ -425,6 +428,17 @@ def _enrich_event(event: WorkflowEvent, vellum_client: Optional[Any]) -> Workflo
|
|
|
425
428
|
memory_mb = get_memory_in_use_mb()
|
|
426
429
|
if memory_mb is not None:
|
|
427
430
|
metadata["memory_usage_mb"] = memory_mb
|
|
431
|
+
|
|
432
|
+
if executor_context is not None:
|
|
433
|
+
metadata["is_new_server"] = executor_context.is_new_server
|
|
434
|
+
|
|
435
|
+
if executor_context.vembda_service_initiated_timestamp is not None and event.timestamp is not None:
|
|
436
|
+
event_ts = event.timestamp
|
|
437
|
+
if event_ts.tzinfo is None:
|
|
438
|
+
event_ts = event_ts.replace(tzinfo=timezone.utc)
|
|
439
|
+
event_ts_ns = int(event_ts.timestamp() * 1_000_000_000)
|
|
440
|
+
initiated_latency = event_ts_ns - executor_context.vembda_service_initiated_timestamp
|
|
441
|
+
metadata["initiated_latency"] = initiated_latency
|
|
428
442
|
elif event.name == "workflow.execution.fulfilled" and is_deployment:
|
|
429
443
|
metadata = {}
|
|
430
444
|
memory_mb = get_memory_in_use_mb()
|
|
@@ -433,6 +447,7 @@ def _enrich_event(event: WorkflowEvent, vellum_client: Optional[Any]) -> Workflo
|
|
|
433
447
|
except Exception:
|
|
434
448
|
pass
|
|
435
449
|
|
|
450
|
+
vellum_client = executor_context.vellum_client if executor_context else None
|
|
436
451
|
return event_enricher(event, vellum_client, metadata=metadata)
|
|
437
452
|
|
|
438
453
|
|
|
@@ -441,7 +456,7 @@ def _dump_event(event: BaseEvent, executor_context: BaseExecutorContext) -> dict
|
|
|
441
456
|
|
|
442
457
|
dump = event.model_dump(
|
|
443
458
|
mode="json",
|
|
444
|
-
context={"event_enricher": lambda event: _enrich_event(event, executor_context
|
|
459
|
+
context={"event_enricher": lambda event: _enrich_event(event, executor_context)},
|
|
445
460
|
)
|
|
446
461
|
if dump["name"] in {
|
|
447
462
|
"workflow.execution.initiated",
|
|
@@ -463,38 +478,3 @@ def _dump_event(event: BaseEvent, executor_context: BaseExecutorContext) -> dict
|
|
|
463
478
|
dump["body"]["node_definition"]["module"] = module_base + dump["body"]["node_definition"]["module"][1:]
|
|
464
479
|
|
|
465
480
|
return dump
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
def _get_workflow_inputs(
|
|
469
|
-
executor_context: BaseExecutorContext, workflow_class: Type[BaseWorkflow]
|
|
470
|
-
) -> Optional[BaseInputs]:
|
|
471
|
-
if not executor_context.inputs:
|
|
472
|
-
return None
|
|
473
|
-
|
|
474
|
-
if not executor_context.files.get("inputs.py"):
|
|
475
|
-
return None
|
|
476
|
-
|
|
477
|
-
namespace = _get_file_namespace(executor_context)
|
|
478
|
-
inputs_module_path = f"{namespace}.inputs"
|
|
479
|
-
try:
|
|
480
|
-
inputs_module = importlib.import_module(inputs_module_path)
|
|
481
|
-
except Exception as e:
|
|
482
|
-
raise WorkflowInitializationException(
|
|
483
|
-
message=f"Failed to initialize workflow inputs: {e}",
|
|
484
|
-
workflow_definition=workflow_class,
|
|
485
|
-
) from e
|
|
486
|
-
|
|
487
|
-
if not hasattr(inputs_module, "Inputs"):
|
|
488
|
-
raise WorkflowInitializationException(
|
|
489
|
-
message=f"Inputs module {inputs_module_path} does not have a required Inputs class",
|
|
490
|
-
workflow_definition=workflow_class,
|
|
491
|
-
)
|
|
492
|
-
|
|
493
|
-
if not issubclass(inputs_module.Inputs, BaseInputs):
|
|
494
|
-
raise WorkflowInitializationException(
|
|
495
|
-
message=f"""The class {inputs_module_path}.Inputs was expected to be a subclass of BaseInputs, \
|
|
496
|
-
but found {inputs_module.Inputs.__class__.__name__}""",
|
|
497
|
-
workflow_definition=workflow_class,
|
|
498
|
-
)
|
|
499
|
-
|
|
500
|
-
return inputs_module.Inputs(**executor_context.inputs)
|
workflow_server/core/utils.py
CHANGED
|
@@ -2,6 +2,7 @@ from datetime import datetime
|
|
|
2
2
|
from uuid import uuid4
|
|
3
3
|
from typing import Optional
|
|
4
4
|
|
|
5
|
+
from workflow_server.config import IS_ASYNC_MODE
|
|
5
6
|
from workflow_server.core.events import VembdaExecutionFulfilledBody, VembdaExecutionFulfilledEvent
|
|
6
7
|
from workflow_server.core.workflow_executor_context import BaseExecutorContext
|
|
7
8
|
|
|
@@ -46,6 +47,9 @@ def serialize_vembda_rejected_event(
|
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
def is_events_emitting_enabled(executor_context: Optional[BaseExecutorContext]) -> bool:
|
|
50
|
+
if IS_ASYNC_MODE:
|
|
51
|
+
return True
|
|
52
|
+
|
|
49
53
|
if not executor_context:
|
|
50
54
|
return False
|
|
51
55
|
|
|
@@ -3,7 +3,7 @@ from functools import cached_property
|
|
|
3
3
|
import os
|
|
4
4
|
import time
|
|
5
5
|
from uuid import UUID
|
|
6
|
-
from typing import Any, Optional
|
|
6
|
+
from typing import Any, Optional, Union
|
|
7
7
|
from typing_extensions import Self
|
|
8
8
|
|
|
9
9
|
from flask import has_request_context, request
|
|
@@ -36,6 +36,11 @@ class BaseExecutorContext(UniversalBaseModel):
|
|
|
36
36
|
previous_execution_id: Optional[UUID] = None
|
|
37
37
|
feature_flags: Optional[dict[str, bool]] = None
|
|
38
38
|
is_new_server: bool = False
|
|
39
|
+
trigger_id: Optional[UUID] = None
|
|
40
|
+
# The actual 'execution id' of the workflow that we pass into the workflow
|
|
41
|
+
# when running in async mode.
|
|
42
|
+
workflow_span_id: Optional[UUID] = None
|
|
43
|
+
vembda_service_initiated_timestamp: Optional[int] = None
|
|
39
44
|
|
|
40
45
|
@field_validator("inputs", mode="before")
|
|
41
46
|
@classmethod
|
|
@@ -86,6 +91,18 @@ class NodeExecutorContext(BaseExecutorContext):
|
|
|
86
91
|
node_module: Optional[str] = None
|
|
87
92
|
node_name: Optional[str] = None
|
|
88
93
|
|
|
94
|
+
@property
|
|
95
|
+
def node_ref(self) -> Union[UUID, str]:
|
|
96
|
+
"""
|
|
97
|
+
Returns the node reference for use with workflow.run_node().
|
|
98
|
+
|
|
99
|
+
Returns node_id if it exists, otherwise returns the combination
|
|
100
|
+
of node_module and node_name as a fully qualified string.
|
|
101
|
+
"""
|
|
102
|
+
if self.node_id:
|
|
103
|
+
return self.node_id
|
|
104
|
+
return f"{self.node_module}.{self.node_name}"
|
|
105
|
+
|
|
89
106
|
@model_validator(mode="after")
|
|
90
107
|
def validate_node_identification(self) -> Self:
|
|
91
108
|
if not self.node_id and not (self.node_module and self.node_name):
|
workflow_server/server.py
CHANGED
|
@@ -5,6 +5,7 @@ from flask import Flask
|
|
|
5
5
|
|
|
6
6
|
from workflow_server.api.auth_middleware import AuthMiddleware
|
|
7
7
|
from workflow_server.api.healthz_view import bp as healthz_bp
|
|
8
|
+
from workflow_server.api.status_view import bp as status_bp
|
|
8
9
|
from workflow_server.api.workflow_view import bp as workflow_bp
|
|
9
10
|
from workflow_server.config import is_development
|
|
10
11
|
from workflow_server.logging_config import GCPJsonFormatter
|
|
@@ -48,6 +49,7 @@ def create_app() -> Flask:
|
|
|
48
49
|
|
|
49
50
|
# Register blueprints
|
|
50
51
|
app.register_blueprint(healthz_bp, url_prefix="/healthz")
|
|
52
|
+
app.register_blueprint(status_bp, url_prefix="/status")
|
|
51
53
|
app.register_blueprint(workflow_bp, url_prefix="/workflow")
|
|
52
54
|
|
|
53
55
|
logger.info(is_development())
|