vellum-workflow-server 1.4.0__py3-none-any.whl → 1.4.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vellum-workflow-server might be problematic. Click here for more details.
- {vellum_workflow_server-1.4.0.dist-info → vellum_workflow_server-1.4.0.post1.dist-info}/METADATA +1 -1
- {vellum_workflow_server-1.4.0.dist-info → vellum_workflow_server-1.4.0.post1.dist-info}/RECORD +7 -7
- workflow_server/api/workflow_view.py +11 -4
- workflow_server/utils/oom_killer.py +4 -1
- workflow_server/utils/system_utils.py +34 -0
- {vellum_workflow_server-1.4.0.dist-info → vellum_workflow_server-1.4.0.post1.dist-info}/WHEEL +0 -0
- {vellum_workflow_server-1.4.0.dist-info → vellum_workflow_server-1.4.0.post1.dist-info}/entry_points.txt +0 -0
{vellum_workflow_server-1.4.0.dist-info → vellum_workflow_server-1.4.0.post1.dist-info}/RECORD
RENAMED
|
@@ -6,7 +6,7 @@ workflow_server/api/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
|
|
|
6
6
|
workflow_server/api/tests/test_input_display_mapping.py,sha256=drBZqMudFyB5wgiUOcMgRXz7E7ge-Qgxbstw4E4f0zE,2211
|
|
7
7
|
workflow_server/api/tests/test_workflow_view.py,sha256=RlAw1tHeIlnOXGrFQN-w3EOLPZkhp6Dfy6d1r7kU5oc,22573
|
|
8
8
|
workflow_server/api/tests/test_workflow_view_stream_workflow_route.py,sha256=Qo8u6mPyRCmE2jamY1yIh8l44hgo4-Nwlq03z61ND5g,27031
|
|
9
|
-
workflow_server/api/workflow_view.py,sha256=
|
|
9
|
+
workflow_server/api/workflow_view.py,sha256=4XaBbhzQte4bRbbUPhrqAXDBXTXoc67Twzr1Pp5bPpw,21800
|
|
10
10
|
workflow_server/code_exec_runner.py,sha256=lBnMIorPZL8zZBye6TjeCIs06WTJM7P2HR07B1fjJJI,2533
|
|
11
11
|
workflow_server/config.py,sha256=qmmTr6ty3ZN5LDOFs3TfUxYshYe6Mmn_LanplHHeE9Q,1796
|
|
12
12
|
workflow_server/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -20,15 +20,15 @@ workflow_server/start.py,sha256=pkwRcms6I4tkVHP06LdrZY6rG_DFHfBx4ioY5X91W5k,2264
|
|
|
20
20
|
workflow_server/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
21
|
workflow_server/utils/exit_handler.py,sha256=_FacDVi4zc3bfTA3D2mJsISePlJ8jpLrnGVo5-xZQFs,743
|
|
22
22
|
workflow_server/utils/log_proxy.py,sha256=nugi6fOgAYKX2X9DIc39TG366rsmmDUPoEtG3gzma_Y,3088
|
|
23
|
-
workflow_server/utils/oom_killer.py,sha256=
|
|
23
|
+
workflow_server/utils/oom_killer.py,sha256=AprKFXC_wT3lQZcKDxU5O6dtJwi6meRxjo7nhQtQ8T0,2955
|
|
24
24
|
workflow_server/utils/sentry.py,sha256=pqx3X_4W3yOzmz8QMJYUEi39skIKWtrTN5nyFhaPkbk,1597
|
|
25
|
-
workflow_server/utils/system_utils.py,sha256=
|
|
25
|
+
workflow_server/utils/system_utils.py,sha256=3jNv113zRkKJ0928i2Vm6TqFHrDulteQu1kjseP2B0Y,3271
|
|
26
26
|
workflow_server/utils/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
27
|
workflow_server/utils/tests/test_sentry_integration.py,sha256=LGmWiaLhFrx-jslrRjRq9JY6Z5ShLZyx_N_L0-FU6OI,2100
|
|
28
28
|
workflow_server/utils/tests/test_system_utils.py,sha256=_4GwXvVvU5BrATxUEWwQIPg0bzQXMWBtiBmjP8MTxJM,4314
|
|
29
29
|
workflow_server/utils/tests/test_utils.py,sha256=0Nq6du8o-iBtTrip9_wgHES53JSiJbVdSXaBnPobw3s,6930
|
|
30
30
|
workflow_server/utils/utils.py,sha256=ZPoM1Suhid22dpB8oEFLux8wx-9iyzmSfWuYxSCrgWk,4774
|
|
31
|
-
vellum_workflow_server-1.4.0.dist-info/METADATA,sha256=
|
|
32
|
-
vellum_workflow_server-1.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
33
|
-
vellum_workflow_server-1.4.0.dist-info/entry_points.txt,sha256=uB_0yPkr7YV6RhEXzvFReUM8P4OQBlVXD6TN6eb9-oc,277
|
|
34
|
-
vellum_workflow_server-1.4.0.dist-info/RECORD,,
|
|
31
|
+
vellum_workflow_server-1.4.0.post1.dist-info/METADATA,sha256=fY2l-73g7b-QbDRuJzHYfA40CIhE3t5o6CaXCdidCKI,2273
|
|
32
|
+
vellum_workflow_server-1.4.0.post1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
33
|
+
vellum_workflow_server-1.4.0.post1.dist-info/entry_points.txt,sha256=uB_0yPkr7YV6RhEXzvFReUM8P4OQBlVXD6TN6eb9-oc,277
|
|
34
|
+
vellum_workflow_server-1.4.0.post1.dist-info/RECORD,,
|
|
@@ -49,8 +49,10 @@ from workflow_server.core.workflow_executor_context import (
|
|
|
49
49
|
)
|
|
50
50
|
from workflow_server.utils.oom_killer import get_is_oom_killed
|
|
51
51
|
from workflow_server.utils.system_utils import (
|
|
52
|
+
add_active_span_id,
|
|
52
53
|
get_active_process_count,
|
|
53
54
|
increment_process_count,
|
|
55
|
+
remove_active_span_id,
|
|
54
56
|
wait_for_available_process,
|
|
55
57
|
)
|
|
56
58
|
from workflow_server.utils.utils import convert_json_inputs_to_vellum, get_version
|
|
@@ -182,10 +184,7 @@ def stream_workflow_route() -> Response:
|
|
|
182
184
|
first_item = next(stream_iterator)
|
|
183
185
|
increment_process_count(1)
|
|
184
186
|
|
|
185
|
-
if isinstance(first_item, str)
|
|
186
|
-
span_id = first_item.split(":")[1]
|
|
187
|
-
headers["X-Vellum-Workflow-Span-Id"] = span_id
|
|
188
|
-
else:
|
|
187
|
+
if not isinstance(first_item, str) or not first_item.startswith(SPAN_ID_EVENT):
|
|
189
188
|
logger.error("Workflow stream did not start with span id event")
|
|
190
189
|
return Response(
|
|
191
190
|
json.dumps({"detail": "Internal Server Error"}),
|
|
@@ -194,6 +193,10 @@ def stream_workflow_route() -> Response:
|
|
|
194
193
|
headers=headers,
|
|
195
194
|
)
|
|
196
195
|
|
|
196
|
+
span_id = first_item.split(":")[1]
|
|
197
|
+
headers["X-Vellum-Workflow-Span-Id"] = span_id
|
|
198
|
+
add_active_span_id(span_id)
|
|
199
|
+
|
|
197
200
|
logger.info(f"Starting Workflow Stream, execution ID: {span_id}, ")
|
|
198
201
|
|
|
199
202
|
def process_events(queue: Queue) -> Iterator[Union[str, dict]]:
|
|
@@ -227,6 +230,7 @@ def stream_workflow_route() -> Response:
|
|
|
227
230
|
|
|
228
231
|
if not ENABLE_PROCESS_WRAPPER or process:
|
|
229
232
|
increment_process_count(-1)
|
|
233
|
+
remove_active_span_id(span_id)
|
|
230
234
|
|
|
231
235
|
yield VembdaExecutionFulfilledEvent(
|
|
232
236
|
id=uuid4(),
|
|
@@ -254,6 +258,7 @@ def stream_workflow_route() -> Response:
|
|
|
254
258
|
process.kill()
|
|
255
259
|
if process:
|
|
256
260
|
increment_process_count(-1)
|
|
261
|
+
remove_active_span_id(span_id)
|
|
257
262
|
|
|
258
263
|
break
|
|
259
264
|
|
|
@@ -332,10 +337,12 @@ def stream_workflow_route() -> Response:
|
|
|
332
337
|
process.kill()
|
|
333
338
|
if process:
|
|
334
339
|
increment_process_count(-1)
|
|
340
|
+
remove_active_span_id(span_id)
|
|
335
341
|
except Exception as e:
|
|
336
342
|
logger.error("Failed to kill process", e)
|
|
337
343
|
else:
|
|
338
344
|
increment_process_count(-1)
|
|
345
|
+
remove_active_span_id(span_id)
|
|
339
346
|
|
|
340
347
|
resp = Response(
|
|
341
348
|
stream_with_context(generator()),
|
|
@@ -15,6 +15,7 @@ from workflow_server.utils.system_utils import (
|
|
|
15
15
|
FORCE_GC_MEMORY_PERCENT,
|
|
16
16
|
WARN_MEMORY_PERCENT,
|
|
17
17
|
get_active_process_count,
|
|
18
|
+
get_active_span_ids,
|
|
18
19
|
get_memory_in_use_mb,
|
|
19
20
|
)
|
|
20
21
|
|
|
@@ -71,8 +72,10 @@ class OomKillerThread(Thread):
|
|
|
71
72
|
|
|
72
73
|
if memory_mb > (MEMORY_LIMIT_MB * _MAX_MEMORY_PERCENT):
|
|
73
74
|
self._kill_switch.set()
|
|
75
|
+
active_span_ids = get_active_span_ids()
|
|
74
76
|
logger.error(
|
|
75
|
-
f"Workflow server OOM killed, memory: {memory_mb}MB, Process Count: {get_active_process_count()}"
|
|
77
|
+
f"Workflow server OOM killed, memory: {memory_mb}MB, Process Count: {get_active_process_count()}",
|
|
78
|
+
extra={"active_span_ids": active_span_ids},
|
|
76
79
|
)
|
|
77
80
|
# Give time for the threads to get our kill switch
|
|
78
81
|
sleep(_KILL_GRACE_PERIOD)
|
|
@@ -17,6 +17,7 @@ _MEMORY_CHECK_INTERVAL_SECONDS = 2
|
|
|
17
17
|
_MAX_MEMORY_CHECK_ATTEMPTS = 3
|
|
18
18
|
_ACTIVE_PROCESS_COUNT = multiprocessing.Value("i", 0)
|
|
19
19
|
_ACTIVE_PROCESS_LOCK = multiprocessing.Lock()
|
|
20
|
+
_ACTIVE_SPAN_IDS = multiprocessing.Manager().list()
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
def increment_process_count(change: int) -> None:
|
|
@@ -36,6 +37,39 @@ def get_active_process_count() -> int:
|
|
|
36
37
|
return _ACTIVE_PROCESS_COUNT.value # type: ignore
|
|
37
38
|
|
|
38
39
|
|
|
40
|
+
def get_active_span_ids() -> list[str]:
|
|
41
|
+
"""Get a copy of currently active span IDs"""
|
|
42
|
+
with _ACTIVE_PROCESS_LOCK:
|
|
43
|
+
return list(_ACTIVE_SPAN_IDS)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def add_active_span_id(span_id: str) -> None:
|
|
47
|
+
"""Add a span ID to the active tracking list"""
|
|
48
|
+
result = _ACTIVE_PROCESS_LOCK.acquire(timeout=5)
|
|
49
|
+
try:
|
|
50
|
+
if result:
|
|
51
|
+
_ACTIVE_SPAN_IDS.append(span_id)
|
|
52
|
+
else:
|
|
53
|
+
logger.error("Failed to lock workflow server span ID tracking.")
|
|
54
|
+
finally:
|
|
55
|
+
if result:
|
|
56
|
+
_ACTIVE_PROCESS_LOCK.release()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def remove_active_span_id(span_id: str) -> None:
|
|
60
|
+
"""Remove a span ID from the active tracking list"""
|
|
61
|
+
result = _ACTIVE_PROCESS_LOCK.acquire(timeout=5)
|
|
62
|
+
try:
|
|
63
|
+
if result and span_id in _ACTIVE_SPAN_IDS:
|
|
64
|
+
_ACTIVE_SPAN_IDS.remove(span_id)
|
|
65
|
+
else:
|
|
66
|
+
if not result:
|
|
67
|
+
logger.error("Failed to lock workflow server span ID tracking.")
|
|
68
|
+
finally:
|
|
69
|
+
if result:
|
|
70
|
+
_ACTIVE_PROCESS_LOCK.release()
|
|
71
|
+
|
|
72
|
+
|
|
39
73
|
def get_memory_in_use_mb() -> Optional[float]:
|
|
40
74
|
try:
|
|
41
75
|
with open("/sys/fs/cgroup/memory/memory.usage_in_bytes", "r") as file:
|
{vellum_workflow_server-1.4.0.dist-info → vellum_workflow_server-1.4.0.post1.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|