vellum-workflow-server 0.14.78__tar.gz → 0.14.79__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vellum-workflow-server might be problematic. Click here for more details.

Files changed (33) hide show
  1. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/PKG-INFO +2 -2
  2. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/pyproject.toml +2 -2
  3. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/api/tests/test_workflow_view_stream_workflow_route.py +20 -1
  4. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/api/workflow_view.py +93 -41
  5. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/config.py +1 -0
  6. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/core/executor.py +11 -3
  7. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/start.py +4 -2
  8. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/README.md +0 -0
  9. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/__init__.py +0 -0
  10. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/api/__init__.py +0 -0
  11. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/api/auth_middleware.py +0 -0
  12. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/api/healthz_view.py +0 -0
  13. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/api/tests/__init__.py +0 -0
  14. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/api/tests/test_input_display_mapping.py +0 -0
  15. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/api/tests/test_workflow_view.py +0 -0
  16. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/code_exec_runner.py +0 -0
  17. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/core/__init__.py +0 -0
  18. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/core/cancel_workflow.py +0 -0
  19. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/core/events.py +0 -0
  20. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/core/utils.py +0 -0
  21. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/core/workflow_executor_context.py +0 -0
  22. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/server.py +0 -0
  23. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/utils/__init__.py +0 -0
  24. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/utils/exit_handler.py +0 -0
  25. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/utils/log_proxy.py +0 -0
  26. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/utils/oom_killer.py +0 -0
  27. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/utils/sentry.py +0 -0
  28. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/utils/system_utils.py +0 -0
  29. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/utils/tests/__init__.py +0 -0
  30. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/utils/tests/test_sentry_integration.py +0 -0
  31. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/utils/tests/test_system_utils.py +0 -0
  32. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/utils/tests/test_utils.py +0 -0
  33. {vellum_workflow_server-0.14.78 → vellum_workflow_server-0.14.79}/src/workflow_server/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vellum-workflow-server
3
- Version: 0.14.78
3
+ Version: 0.14.79
4
4
  Summary:
5
5
  License: AGPL
6
6
  Requires-Python: >=3.9.0,<4
@@ -29,7 +29,7 @@ Requires-Dist: pyjwt (==2.10.0)
29
29
  Requires-Dist: python-dotenv (==1.0.1)
30
30
  Requires-Dist: retrying (==1.3.4)
31
31
  Requires-Dist: sentry-sdk[flask] (==2.20.0)
32
- Requires-Dist: vellum-ai (==0.14.78)
32
+ Requires-Dist: vellum-ai (==0.14.79)
33
33
  Description-Content-Type: text/markdown
34
34
 
35
35
  # Vellum Workflow Runner Server
@@ -3,7 +3,7 @@ name = "vellum-workflow-server"
3
3
 
4
4
  [tool.poetry]
5
5
  name = "vellum-workflow-server"
6
- version = "0.14.78"
6
+ version = "0.14.79"
7
7
  description = ""
8
8
  readme = "README.md"
9
9
  authors = []
@@ -45,7 +45,7 @@ flask = "2.3.3"
45
45
  orderly-set = "5.2.2"
46
46
  pebble = "5.0.7"
47
47
  gunicorn = "23.0.0"
48
- vellum-ai = "0.14.78"
48
+ vellum-ai = "0.14.79"
49
49
  python-dotenv = "1.0.1"
50
50
  retrying = "1.3.4"
51
51
  sentry-sdk = {extras = ["flask"], version = "2.20.0"}
@@ -32,6 +32,25 @@ def flask_stream(request_body: dict) -> tuple[int, list]:
32
32
  ]
33
33
 
34
34
 
35
+ @mock.patch("workflow_server.api.workflow_view.ENABLE_PROCESS_WRAPPER", False)
36
+ def flask_stream_disable_process_wrapper(request_body: dict) -> tuple[int, list]:
37
+ flask_app = create_app()
38
+ with flask_app.test_client() as test_client:
39
+ response = test_client.post("/workflow/stream", json=request_body)
40
+ status_code = response.status_code
41
+
42
+ return status_code, [
43
+ json.loads(line)
44
+ for line in response.data.decode().split("\n")
45
+ if line
46
+ and line
47
+ not in [
48
+ "WAITING",
49
+ "END",
50
+ ]
51
+ ]
52
+
53
+
35
54
  def code_exec_stream(request_body: dict) -> tuple[int, list]:
36
55
  output = io.StringIO()
37
56
 
@@ -48,7 +67,7 @@ def code_exec_stream(request_body: dict) -> tuple[int, list]:
48
67
  return 200, events
49
68
 
50
69
 
51
- @pytest.fixture(params=[flask_stream, code_exec_stream])
70
+ @pytest.fixture(params=[flask_stream, code_exec_stream, flask_stream_disable_process_wrapper])
52
71
  def both_stream_types(request):
53
72
  return request.param
54
73
 
@@ -8,6 +8,7 @@ import os
8
8
  import pkgutil
9
9
  from queue import Empty
10
10
  import sys
11
+ from threading import Event as ThreadingEvent
11
12
  import time
12
13
  import traceback
13
14
  from uuid import uuid4
@@ -18,9 +19,10 @@ from pydantic import ValidationError
18
19
  from vellum_ee.workflows.display.nodes.get_node_display_class import get_node_display_class
19
20
  from vellum_ee.workflows.display.types import WorkflowDisplayContext
20
21
 
22
+ from vellum.workflows.exceptions import WorkflowInitializationException
21
23
  from vellum.workflows.nodes import BaseNode
22
24
  from vellum.workflows.utils.names import pascal_to_title_case
23
- from workflow_server.config import MEMORY_LIMIT_MB
25
+ from workflow_server.config import ENABLE_PROCESS_WRAPPER, MEMORY_LIMIT_MB
24
26
  from workflow_server.core.events import (
25
27
  SPAN_ID_EVENT,
26
28
  STREAM_FINISHED_EVENT,
@@ -30,7 +32,7 @@ from workflow_server.core.events import (
30
32
  VembdaExecutionInitiatedBody,
31
33
  VembdaExecutionInitiatedEvent,
32
34
  )
33
- from workflow_server.core.executor import stream_node_pebble_timeout, stream_workflow_process_timeout
35
+ from workflow_server.core.executor import stream_node_pebble_timeout, stream_workflow, stream_workflow_process_timeout
34
36
  from workflow_server.core.utils import create_vembda_rejected_event, serialize_vembda_rejected_event
35
37
  from workflow_server.core.workflow_executor_context import (
36
38
  DEFAULT_TIMEOUT_SECONDS,
@@ -73,7 +75,7 @@ def stream_workflow_route() -> Response:
73
75
 
74
76
  logger.info(
75
77
  f"Starting workflow stream, execution ID: {context.execution_id}, "
76
- f"process count: {get_active_process_count()}"
78
+ f"process count: {get_active_process_count()}, process wrapper: {ENABLE_PROCESS_WRAPPER}"
77
79
  )
78
80
 
79
81
  # Create this event up here so timestamps are fully from the start to account for any unknown overhead
@@ -108,33 +110,66 @@ def stream_workflow_route() -> Response:
108
110
  headers=headers,
109
111
  )
110
112
 
113
+ cancel_signal = ThreadingEvent()
114
+
111
115
  process: Optional[Process] = None
112
- try:
113
- process = stream_workflow_process_timeout(
114
- executor_context=context,
115
- queue=process_output_queue,
116
- )
117
- increment_process_count(1)
118
- except Exception as e:
119
- logger.exception(e)
116
+ if ENABLE_PROCESS_WRAPPER:
117
+ try:
118
+ process = stream_workflow_process_timeout(
119
+ executor_context=context,
120
+ queue=process_output_queue,
121
+ cancel_signal=cancel_signal,
122
+ )
123
+ increment_process_count(1)
124
+ except Exception as e:
125
+ logger.exception(e)
120
126
 
121
- process_output_queue.put(create_vembda_rejected_event(context, traceback.format_exc()))
127
+ process_output_queue.put(create_vembda_rejected_event(context, traceback.format_exc()))
122
128
 
123
- try:
124
- first_item = process_output_queue.get(timeout=WORKFLOW_INITIATION_TIMEOUT_SECONDS)
125
- except Empty:
126
- logger.error("Request timed out trying to initiate the Workflow")
129
+ try:
130
+ first_item = process_output_queue.get(timeout=WORKFLOW_INITIATION_TIMEOUT_SECONDS)
131
+ except Empty:
132
+ logger.error("Request timed out trying to initiate the Workflow")
133
+
134
+ if process and process.is_alive():
135
+ process.kill()
136
+ increment_process_count(-1)
137
+
138
+ return Response(
139
+ json.dumps({"detail": "Request timed out trying to initiate the Workflow"}),
140
+ status=408,
141
+ content_type="application/json",
142
+ headers=headers,
143
+ )
144
+ else:
127
145
 
128
- if process and process.is_alive():
129
- process.kill()
130
- increment_process_count(-1)
146
+ def workflow_stream_processor() -> Iterator[Union[dict, str]]:
147
+ span_id_emitted = False
148
+ try:
149
+ workflow_iterator, span_id = stream_workflow(
150
+ context,
151
+ disable_redirect=True,
152
+ cancel_signal=cancel_signal,
153
+ )
154
+ yield f"{SPAN_ID_EVENT}:{span_id}"
155
+ span_id_emitted = True
156
+ for event in workflow_iterator:
157
+ yield event
158
+ except WorkflowInitializationException as e:
159
+ if not span_id_emitted:
160
+ yield f"{SPAN_ID_EVENT}:{uuid4()}"
161
+
162
+ yield serialize_vembda_rejected_event(context, str(e))
163
+ except Exception as e:
164
+ if not span_id_emitted:
165
+ yield f"{SPAN_ID_EVENT}:{uuid4()}"
131
166
 
132
- return Response(
133
- json.dumps({"detail": "Request timed out trying to initiate the Workflow"}),
134
- status=408,
135
- content_type="application/json",
136
- headers=headers,
137
- )
167
+ logger.exception(e)
168
+ yield serialize_vembda_rejected_event(context, "Internal Server Error")
169
+
170
+ stream_iterator = workflow_stream_processor()
171
+ first_item = next(stream_iterator)
172
+ increment_process_count(1)
138
173
 
139
174
  if isinstance(first_item, str) and first_item.startswith(SPAN_ID_EVENT):
140
175
  span_id = first_item.split(":")[1]
@@ -151,21 +186,30 @@ def stream_workflow_route() -> Response:
151
186
  def process_events(queue: Queue) -> Iterator[Union[str, dict]]:
152
187
  event: Union[str, dict]
153
188
  loops = 0
189
+ timed_out_time: Optional[float] = None
154
190
 
155
191
  while True:
156
192
  loops += 1
157
193
  # Check if we timed out and kill the process if so. Set the timeout a little under what
158
194
  # the default is (30m) since the connection limit is 30m and otherwise we may not receive
159
- # the timeout event.
160
- if min(context.timeout, DEFAULT_TIMEOUT_SECONDS - 90) < (
161
- (time.time_ns() - context.request_start_time) / 1_000_000_000
195
+ # the timeout event. After cancelling the workflow wait 5 seconds for the workflow to emit
196
+ # any cancel events before ending the stream.
197
+ if (
198
+ min(context.timeout, DEFAULT_TIMEOUT_SECONDS - 90)
199
+ < ((time.time_ns() - context.request_start_time) / 1_000_000_000)
200
+ and not timed_out_time
162
201
  ):
163
- logger.error("Workflow timed out")
202
+ logger.error("Workflow timed out, waiting 5 seconds before ending request...")
203
+ cancel_signal.set()
204
+ timed_out_time = time.time()
164
205
 
165
- if process and process.is_alive():
206
+ if timed_out_time is not None and timed_out_time + 5 < time.time():
207
+ logger.warning("Killing request after workflow timeout")
208
+
209
+ if ENABLE_PROCESS_WRAPPER and process and process.is_alive():
166
210
  process.kill()
167
211
 
168
- if process:
212
+ if not ENABLE_PROCESS_WRAPPER or process:
169
213
  increment_process_count(-1)
170
214
 
171
215
  yield VembdaExecutionFulfilledEvent(
@@ -198,15 +242,18 @@ def stream_workflow_route() -> Response:
198
242
  break
199
243
 
200
244
  try:
201
- item = queue.get(timeout=0.1)
202
- event = item
245
+ if ENABLE_PROCESS_WRAPPER:
246
+ item = queue.get(timeout=0.1)
247
+ event = item
248
+ else:
249
+ event = next(stream_iterator)
203
250
  except Empty:
204
251
  # Emit waiting event if were just sitting around to attempt to keep the line
205
252
  # open to trick knative
206
253
  if loops % 20 == 0:
207
254
  yield "WAITING"
208
255
 
209
- if process and not process.is_alive():
256
+ if ENABLE_PROCESS_WRAPPER and process and not process.is_alive():
210
257
  logger.error("Workflow process exited abnormally")
211
258
 
212
259
  yield create_vembda_rejected_event(
@@ -216,6 +263,8 @@ def stream_workflow_route() -> Response:
216
263
  break
217
264
 
218
265
  continue
266
+ except StopIteration:
267
+ break
219
268
  except Exception as e:
220
269
  logger.exception(e)
221
270
  break
@@ -263,13 +312,16 @@ def stream_workflow_route() -> Response:
263
312
  yield "\n"
264
313
  return
265
314
  finally:
266
- try:
267
- if process and process.is_alive():
268
- process.kill()
269
- if process:
270
- increment_process_count(-1)
271
- except Exception as e:
272
- logger.error("Failed to kill process", e)
315
+ if ENABLE_PROCESS_WRAPPER:
316
+ try:
317
+ if process and process.is_alive():
318
+ process.kill()
319
+ if process:
320
+ increment_process_count(-1)
321
+ except Exception as e:
322
+ logger.error("Failed to kill process", e)
323
+ else:
324
+ increment_process_count(-1)
273
325
 
274
326
  resp = Response(
275
327
  stream_with_context(generator()),
@@ -28,6 +28,7 @@ PORT = os.getenv("PORT", "8000")
28
28
  VELLUM_API_URL_HOST = os.getenv("VELLUM_API_URL_HOST", "localhost")
29
29
  VELLUM_API_URL_PORT = os.getenv("VELLUM_API_URL_PORT", 8000)
30
30
  CONCURRENCY = int(os.getenv("CONCURRENCY", "8"))
31
+ ENABLE_PROCESS_WRAPPER = os.getenv("ENABLE_PROCESS_WRAPPER", "true").lower() == "true"
31
32
 
32
33
 
33
34
  def is_development() -> bool:
@@ -84,10 +84,14 @@ def _stream_node_wrapper(executor_context: NodeExecutorContext, queue: Queue) ->
84
84
  )
85
85
 
86
86
 
87
- def _stream_workflow_wrapper(executor_context: WorkflowExecutorContext, queue: Queue) -> None:
87
+ def _stream_workflow_wrapper(
88
+ executor_context: WorkflowExecutorContext,
89
+ queue: Queue,
90
+ cancel_signal: Optional[ThreadingEvent],
91
+ ) -> None:
88
92
  span_id_emitted = False
89
93
  try:
90
- stream_iterator, span_id = stream_workflow(executor_context=executor_context)
94
+ stream_iterator, span_id = stream_workflow(executor_context=executor_context, cancel_signal=cancel_signal)
91
95
 
92
96
  queue.put(f"{SPAN_ID_EVENT}:{span_id}")
93
97
  span_id_emitted = True
@@ -114,12 +118,14 @@ def _stream_workflow_wrapper(executor_context: WorkflowExecutorContext, queue: Q
114
118
  def stream_workflow_process_timeout(
115
119
  executor_context: WorkflowExecutorContext,
116
120
  queue: Queue,
121
+ cancel_signal: Optional[ThreadingEvent],
117
122
  ) -> Process:
118
123
  workflow_process = Process(
119
124
  target=_stream_workflow_wrapper,
120
125
  args=(
121
126
  executor_context,
122
127
  queue,
128
+ cancel_signal,
123
129
  ),
124
130
  )
125
131
  workflow_process.start()
@@ -145,6 +151,7 @@ def stream_workflow_process_timeout(
145
151
  def stream_workflow(
146
152
  executor_context: WorkflowExecutorContext,
147
153
  disable_redirect: bool = True,
154
+ cancel_signal: Optional[ThreadingEvent] = None,
148
155
  ) -> tuple[Iterator[dict], UUID]:
149
156
  workflow, namespace = _gather_workflow(executor_context)
150
157
  workflow_inputs = _get_workflow_inputs(executor_context)
@@ -164,7 +171,7 @@ def stream_workflow(
164
171
  )
165
172
 
166
173
  cancel_watcher_kill_switch = ThreadingEvent()
167
- cancel_signal = ThreadingEvent()
174
+ cancel_signal = cancel_signal or ThreadingEvent()
168
175
  cancel_watcher = CancelWorkflowWatcherThread(
169
176
  kill_switch=cancel_watcher_kill_switch,
170
177
  execution_id=executor_context.execution_id,
@@ -188,6 +195,7 @@ def stream_workflow(
188
195
  cancel_signal=cancel_signal,
189
196
  entrypoint_nodes=[run_from_node] if run_from_node else None,
190
197
  )
198
+
191
199
  except Exception:
192
200
  cancel_watcher_kill_switch.set()
193
201
  logger.exception("Failed to generate Workflow Stream")
@@ -5,7 +5,7 @@ from typing import Any, Optional
5
5
  from gunicorn import glogging
6
6
  import gunicorn.app.base
7
7
 
8
- from workflow_server.config import PORT, is_development
8
+ from workflow_server.config import ENABLE_PROCESS_WRAPPER, PORT, is_development
9
9
  from workflow_server.server import app
10
10
  from workflow_server.utils.exit_handler import gunicorn_exit_handler, init_signal_handlers
11
11
  from workflow_server.utils.oom_killer import start_oom_killer_worker
@@ -48,7 +48,9 @@ def start() -> None:
48
48
  options = {
49
49
  "bind": f"0.0.0.0:{PORT}",
50
50
  "workers": int(os.getenv("GUNICORN_WORKERS", 2)),
51
- "threads": int(os.getenv("GUNICORN_THREADS", 6)),
51
+ "threads": int(os.getenv("GUNICORN_THREADS", 9 if ENABLE_PROCESS_WRAPPER else 6)),
52
+ # Try to avoid memory leaks when using non process mode
53
+ "max_requests": 0 if ENABLE_PROCESS_WRAPPER else 20,
52
54
  "worker_class": "gthread",
53
55
  "timeout": int(os.getenv("GUNICORN_TIMEOUT", 1800)),
54
56
  "logger_class": CustomGunicornLogger,