synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/common_old/backend.py +0 -1
- examples/crafter_debug_render.py +15 -6
- examples/evals_old/compare_models.py +1 -0
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
- examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
- examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
- examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
- examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
- examples/finetuning_old/synth_qwen_v1/util.py +7 -2
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +17 -15
- examples/rl/run_rl_and_save.py +24 -7
- examples/rl/task_app/math_single_step.py +128 -11
- examples/rl/task_app/math_task_app.py +11 -3
- examples/rl_old/task_app.py +222 -53
- examples/warming_up_to_rl/analyze_trace_db.py +7 -5
- examples/warming_up_to_rl/export_trace_sft.py +141 -16
- examples/warming_up_to_rl/groq_test.py +11 -4
- examples/warming_up_to_rl/manage_secrets.py +15 -6
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +108 -30
- examples/warming_up_to_rl/run_fft_and_save.py +128 -52
- examples/warming_up_to_rl/run_local_rollout.py +87 -36
- examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
- examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
- examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
- examples/warming_up_to_rl/run_rl_and_save.py +31 -7
- examples/warming_up_to_rl/run_rollout_remote.py +37 -10
- examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
- synth_ai/__init__.py +1 -0
- synth_ai/api/train/builders.py +34 -10
- synth_ai/api/train/cli.py +172 -32
- synth_ai/api/train/config_finder.py +59 -4
- synth_ai/api/train/env_resolver.py +32 -14
- synth_ai/api/train/pollers.py +11 -3
- synth_ai/api/train/task_app.py +4 -1
- synth_ai/api/train/utils.py +20 -4
- synth_ai/cli/__init__.py +11 -4
- synth_ai/cli/balance.py +1 -1
- synth_ai/cli/demo.py +19 -5
- synth_ai/cli/rl_demo.py +75 -16
- synth_ai/cli/root.py +116 -37
- synth_ai/cli/task_apps.py +1286 -170
- synth_ai/cli/traces.py +1 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +67 -30
- synth_ai/demos/core/cli.py +493 -164
- synth_ai/demos/demo_task_apps/core.py +50 -6
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/reproducibility/tree.py +3 -1
- synth_ai/environments/service/core_routes.py +6 -2
- synth_ai/evals/base.py +0 -2
- synth_ai/experimental/synth_oss.py +11 -12
- synth_ai/handshake.py +3 -1
- synth_ai/http_client.py +31 -7
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +8 -4
- synth_ai/jobs/client.py +40 -10
- synth_ai/learning/client.py +33 -8
- synth_ai/learning/config.py +0 -2
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +6 -3
- synth_ai/learning/health.py +9 -2
- synth_ai/learning/jobs.py +17 -5
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
- synth_ai/learning/prompts/random_search.py +4 -1
- synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
- synth_ai/learning/rl_client.py +42 -14
- synth_ai/learning/sse.py +0 -2
- synth_ai/learning/validators.py +6 -2
- synth_ai/lm/caching/ephemeral.py +1 -3
- synth_ai/lm/core/exceptions.py +0 -2
- synth_ai/lm/core/main.py +13 -1
- synth_ai/lm/core/synth_models.py +0 -1
- synth_ai/lm/core/vendor_clients.py +4 -2
- synth_ai/lm/overrides.py +2 -2
- synth_ai/lm/vendors/core/anthropic_api.py +7 -7
- synth_ai/lm/vendors/core/openai_api.py +2 -0
- synth_ai/lm/vendors/openai_standard.py +3 -1
- synth_ai/lm/vendors/openai_standard_responses.py +6 -3
- synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
- synth_ai/lm/vendors/synth_client.py +37 -10
- synth_ai/rl/__init__.py +0 -1
- synth_ai/rl/contracts.py +0 -2
- synth_ai/rl/env_keys.py +6 -1
- synth_ai/task/__init__.py +1 -0
- synth_ai/task/apps/__init__.py +11 -11
- synth_ai/task/auth.py +29 -17
- synth_ai/task/client.py +3 -1
- synth_ai/task/contracts.py +1 -0
- synth_ai/task/datasets.py +3 -1
- synth_ai/task/errors.py +3 -2
- synth_ai/task/health.py +0 -2
- synth_ai/task/json.py +0 -1
- synth_ai/task/proxy.py +2 -5
- synth_ai/task/rubrics.py +9 -3
- synth_ai/task/server.py +31 -5
- synth_ai/task/tracing_utils.py +8 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +0 -1
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +1 -0
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +2 -0
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +24 -3
- synth_ai/tracing_v3/storage/base.py +4 -1
- synth_ai/tracing_v3/storage/factory.py +0 -1
- synth_ai/tracing_v3/turso/manager.py +102 -38
- synth_ai/tracing_v3/turso/models.py +4 -1
- synth_ai/tracing_v3/utils.py +1 -0
- synth_ai/v0/tracing/upload.py +32 -135
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -156
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/install_sqld.sh +0 -40
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0
|
@@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
|
|
|
11
11
|
|
|
12
12
|
class OpenAIClient:
|
|
13
13
|
"""Async HTTP client for OpenAI-compatible inference servers (vLLM)."""
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
def __init__(
|
|
16
16
|
self,
|
|
17
17
|
base_url: str,
|
|
@@ -22,11 +22,13 @@ class OpenAIClient:
|
|
|
22
22
|
self.api_key = api_key
|
|
23
23
|
self.timeout_s = timeout_s
|
|
24
24
|
self.headers = {}
|
|
25
|
-
|
|
25
|
+
|
|
26
26
|
if api_key:
|
|
27
27
|
self.headers["Authorization"] = f"Bearer {api_key}"
|
|
28
28
|
|
|
29
|
-
def _fix_model_parameters(
|
|
29
|
+
def _fix_model_parameters(
|
|
30
|
+
self, request: Dict[str, Any], target_url: Optional[str] = None
|
|
31
|
+
) -> Dict[str, Any]:
|
|
30
32
|
"""
|
|
31
33
|
Fix parameter compatibility for newer OpenAI models.
|
|
32
34
|
|
|
@@ -75,7 +77,9 @@ class OpenAIClient:
|
|
|
75
77
|
if "max_tokens" in fixed_request:
|
|
76
78
|
if "max_completion_tokens" not in fixed_request:
|
|
77
79
|
fixed_request["max_completion_tokens"] = fixed_request.pop("max_tokens")
|
|
78
|
-
logger.info(
|
|
80
|
+
logger.info(
|
|
81
|
+
f"Converted max_tokens to max_completion_tokens for model {model}"
|
|
82
|
+
)
|
|
79
83
|
else:
|
|
80
84
|
fixed_request.pop("max_tokens")
|
|
81
85
|
logger.info(f"Removed conflicting max_tokens parameter for model {model}")
|
|
@@ -145,7 +149,9 @@ class OpenAIClient:
|
|
|
145
149
|
logger.info("Removed stop_after_tool_calls for OpenAI request")
|
|
146
150
|
# Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
|
|
147
151
|
low_url = url.lower()
|
|
148
|
-
if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
|
|
152
|
+
if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
|
|
153
|
+
processed_request, dict
|
|
154
|
+
):
|
|
149
155
|
rf = processed_request.get("response_format")
|
|
150
156
|
rf_type = None
|
|
151
157
|
if isinstance(rf, dict):
|
|
@@ -164,7 +170,9 @@ class OpenAIClient:
|
|
|
164
170
|
# Join any text segments
|
|
165
171
|
parts = []
|
|
166
172
|
for seg in content:
|
|
167
|
-
if isinstance(seg, dict) and isinstance(
|
|
173
|
+
if isinstance(seg, dict) and isinstance(
|
|
174
|
+
seg.get("text"), str
|
|
175
|
+
):
|
|
168
176
|
parts.append(seg["text"])
|
|
169
177
|
text = "\n".join(parts)
|
|
170
178
|
if isinstance(text, str) and ("json" in text.lower()):
|
|
@@ -174,13 +182,17 @@ class OpenAIClient:
|
|
|
174
182
|
continue
|
|
175
183
|
if not has_json_word:
|
|
176
184
|
try:
|
|
177
|
-
instruction =
|
|
185
|
+
instruction = (
|
|
186
|
+
"Respond in strict JSON only. Output a single valid JSON object."
|
|
187
|
+
)
|
|
178
188
|
if not isinstance(msgs, list):
|
|
179
189
|
msgs = []
|
|
180
190
|
# Prepend a system message to satisfy Groq requirement without changing user intent
|
|
181
191
|
prepend = {"role": "system", "content": instruction}
|
|
182
192
|
processed_request["messages"] = [prepend] + list(msgs)
|
|
183
|
-
logger.info(
|
|
193
|
+
logger.info(
|
|
194
|
+
"Injected JSON-mode system instruction for Groq response_format compliance"
|
|
195
|
+
)
|
|
184
196
|
except Exception:
|
|
185
197
|
pass
|
|
186
198
|
except Exception:
|
|
@@ -194,7 +206,7 @@ class OpenAIClient:
|
|
|
194
206
|
headers=headers,
|
|
195
207
|
)
|
|
196
208
|
response.raise_for_status()
|
|
197
|
-
|
|
209
|
+
|
|
198
210
|
# Rich response diagnostics
|
|
199
211
|
content_type = response.headers.get("content-type")
|
|
200
212
|
body_text = response.text
|
|
@@ -203,12 +215,14 @@ class OpenAIClient:
|
|
|
203
215
|
)
|
|
204
216
|
if body_text:
|
|
205
217
|
preview_len = min(800, len(body_text))
|
|
206
|
-
logger.info(
|
|
218
|
+
logger.info(
|
|
219
|
+
f"Inference response preview ({preview_len} bytes): {body_text[:preview_len]}"
|
|
220
|
+
)
|
|
207
221
|
|
|
208
222
|
result = response.json()
|
|
209
223
|
logger.info(f"Inference response parsed_type={type(result).__name__}")
|
|
210
224
|
return result
|
|
211
|
-
|
|
225
|
+
|
|
212
226
|
except httpx.TimeoutException:
|
|
213
227
|
logger.error(f"Request to {url} timed out after {timeout}s")
|
|
214
228
|
raise
|
|
@@ -217,12 +231,14 @@ class OpenAIClient:
|
|
|
217
231
|
text = e.response.text if e.response is not None else str(e)
|
|
218
232
|
# Log full body for debugging remote failures
|
|
219
233
|
try:
|
|
220
|
-
logger.error(
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
234
|
+
logger.error(
|
|
235
|
+
{
|
|
236
|
+
"openai_http_error": True,
|
|
237
|
+
"status": status,
|
|
238
|
+
"url": url,
|
|
239
|
+
"body": text,
|
|
240
|
+
}
|
|
241
|
+
)
|
|
226
242
|
except Exception:
|
|
227
243
|
logger.error(f"HTTP error from {url}: {status} - {text}")
|
|
228
244
|
# For 4xx/5xx, print full sanitized request to aid debugging (especially Groq 400s)
|
|
@@ -230,13 +246,15 @@ class OpenAIClient:
|
|
|
230
246
|
redacted_headers = dict(headers)
|
|
231
247
|
if "Authorization" in redacted_headers:
|
|
232
248
|
redacted_headers["Authorization"] = "***REDACTED***"
|
|
233
|
-
logger.error(
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
249
|
+
logger.error(
|
|
250
|
+
{
|
|
251
|
+
"request_debug": True,
|
|
252
|
+
"status": status,
|
|
253
|
+
"target": url,
|
|
254
|
+
"headers": redacted_headers,
|
|
255
|
+
"payload": processed_request,
|
|
256
|
+
}
|
|
257
|
+
)
|
|
240
258
|
except Exception:
|
|
241
259
|
pass
|
|
242
260
|
# Special case: token budget exceeded (OpenAI-compatible error schema)
|
|
@@ -270,23 +288,38 @@ class OpenAIClient:
|
|
|
270
288
|
try:
|
|
271
289
|
tools_arr = processed_request.get("tools") or []
|
|
272
290
|
if isinstance(tools_arr, list) and tools_arr:
|
|
273
|
-
f =
|
|
274
|
-
|
|
291
|
+
f = (
|
|
292
|
+
tools_arr[0].get("function")
|
|
293
|
+
if isinstance(tools_arr[0], dict)
|
|
294
|
+
else None
|
|
295
|
+
)
|
|
296
|
+
cand = (
|
|
297
|
+
(f or {}).get("name")
|
|
298
|
+
if isinstance(f, dict)
|
|
299
|
+
else None
|
|
300
|
+
)
|
|
275
301
|
if isinstance(cand, str) and cand:
|
|
276
302
|
func_name = cand
|
|
277
303
|
except Exception:
|
|
278
304
|
pass
|
|
279
|
-
processed_request["tool_choice"] = {
|
|
305
|
+
processed_request["tool_choice"] = {
|
|
306
|
+
"type": "function",
|
|
307
|
+
"function": {"name": func_name},
|
|
308
|
+
}
|
|
280
309
|
processed_request["parallel_tool_calls"] = False
|
|
281
|
-
logger.warning(
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
310
|
+
logger.warning(
|
|
311
|
+
{
|
|
312
|
+
"token_budget_recovery": True,
|
|
313
|
+
"messages_tokens": messages_tokens,
|
|
314
|
+
"model_limit": model_limit,
|
|
315
|
+
"retry_max_tokens": new_max,
|
|
316
|
+
}
|
|
317
|
+
)
|
|
287
318
|
# Retry once with reduced budget
|
|
288
319
|
async with httpx.AsyncClient(timeout=timeout) as client2:
|
|
289
|
-
r2 = await client2.post(
|
|
320
|
+
r2 = await client2.post(
|
|
321
|
+
url, json=processed_request, headers=headers
|
|
322
|
+
)
|
|
290
323
|
r2.raise_for_status()
|
|
291
324
|
return r2.json()
|
|
292
325
|
except Exception:
|
|
@@ -302,14 +335,17 @@ class OpenAIClient:
|
|
|
302
335
|
err = e.response.json()
|
|
303
336
|
except Exception:
|
|
304
337
|
err = {"error": "unprocessable", "detail": (text or "")[:200]}
|
|
305
|
-
logger.warning(
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
338
|
+
logger.warning(
|
|
339
|
+
{
|
|
340
|
+
"inference_422_recovered": True,
|
|
341
|
+
"detail": err,
|
|
342
|
+
}
|
|
343
|
+
)
|
|
309
344
|
except Exception:
|
|
310
345
|
pass
|
|
311
346
|
# Return a minimal OpenAI-compatible response with no tool_calls/content
|
|
312
347
|
import time as _t
|
|
348
|
+
|
|
313
349
|
return {
|
|
314
350
|
"id": f"cmpl-{int(_t.time())}",
|
|
315
351
|
"object": "chat.completion",
|
|
@@ -328,7 +364,7 @@ class OpenAIClient:
|
|
|
328
364
|
except Exception as e:
|
|
329
365
|
logger.error(f"Unexpected error calling {url}: {e}")
|
|
330
366
|
raise
|
|
331
|
-
|
|
367
|
+
|
|
332
368
|
async def check_health(
|
|
333
369
|
self,
|
|
334
370
|
base_url: Optional[str] = None,
|
|
@@ -336,17 +372,17 @@ class OpenAIClient:
|
|
|
336
372
|
) -> Dict[str, Any]:
|
|
337
373
|
"""
|
|
338
374
|
Check if the inference service is healthy.
|
|
339
|
-
|
|
375
|
+
|
|
340
376
|
Args:
|
|
341
377
|
base_url: Override base URL for this request
|
|
342
378
|
timeout_s: Override timeout for this request
|
|
343
|
-
|
|
379
|
+
|
|
344
380
|
Returns:
|
|
345
381
|
Health status dict with 'status' field
|
|
346
382
|
"""
|
|
347
383
|
url = (base_url or self.base_url).rstrip("/") + "/health"
|
|
348
384
|
timeout = timeout_s or 10.0
|
|
349
|
-
|
|
385
|
+
|
|
350
386
|
try:
|
|
351
387
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
352
388
|
response = await client.get(url, headers=self.headers)
|
|
@@ -364,7 +400,7 @@ class OpenAIClient:
|
|
|
364
400
|
return {"status": "unhealthy", "error": str(e)}
|
|
365
401
|
except Exception as e:
|
|
366
402
|
return {"status": "unhealthy", "error": str(e)}
|
|
367
|
-
|
|
403
|
+
|
|
368
404
|
async def generate_with_retries(
|
|
369
405
|
self,
|
|
370
406
|
request: Dict[str, Any],
|
|
@@ -376,7 +412,7 @@ class OpenAIClient:
|
|
|
376
412
|
) -> Dict[str, Any]:
|
|
377
413
|
"""
|
|
378
414
|
Generate with exponential backoff retries for transient errors.
|
|
379
|
-
|
|
415
|
+
|
|
380
416
|
Args:
|
|
381
417
|
request: OpenAI-compatible chat completion request
|
|
382
418
|
base_url: Override base URL
|
|
@@ -384,13 +420,13 @@ class OpenAIClient:
|
|
|
384
420
|
max_retries: Maximum number of retry attempts
|
|
385
421
|
backoff_factor: Exponential backoff multiplier
|
|
386
422
|
extra_headers: Additional headers to include (e.g., X-Policy-Name)
|
|
387
|
-
|
|
423
|
+
|
|
388
424
|
Returns:
|
|
389
425
|
OpenAI-compatible chat completion response
|
|
390
426
|
"""
|
|
391
427
|
last_error = None
|
|
392
428
|
wait_time = 1.0
|
|
393
|
-
|
|
429
|
+
|
|
394
430
|
for attempt in range(max_retries + 1):
|
|
395
431
|
try:
|
|
396
432
|
# Apply parameter fixes to the request
|
|
@@ -417,7 +453,9 @@ class OpenAIClient:
|
|
|
417
453
|
retry_after = response_data.get("retry_after", 1)
|
|
418
454
|
# Use the suggested retry_after time instead of exponential backoff for overload
|
|
419
455
|
wait_time = max(wait_time, float(retry_after))
|
|
420
|
-
logger.warning(
|
|
456
|
+
logger.warning(
|
|
457
|
+
f"Inference service overloaded (400). {response_data} Retrying after {wait_time}s..."
|
|
458
|
+
)
|
|
421
459
|
else:
|
|
422
460
|
# This is a different type of 400 error, don't retry
|
|
423
461
|
try:
|
|
@@ -428,13 +466,15 @@ class OpenAIClient:
|
|
|
428
466
|
redacted_headers["Authorization"] = "***REDACTED***"
|
|
429
467
|
except Exception:
|
|
430
468
|
redacted_headers = {}
|
|
431
|
-
logger.error(
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
469
|
+
logger.error(
|
|
470
|
+
{
|
|
471
|
+
"non_overload_400": True,
|
|
472
|
+
"target": (base_url or self.base_url),
|
|
473
|
+
"payload": processed_request,
|
|
474
|
+
"headers": redacted_headers,
|
|
475
|
+
"body": e.response.text if e.response is not None else None,
|
|
476
|
+
}
|
|
477
|
+
)
|
|
438
478
|
except Exception:
|
|
439
479
|
pass
|
|
440
480
|
raise RuntimeError(
|
|
@@ -443,11 +483,13 @@ class OpenAIClient:
|
|
|
443
483
|
except Exception:
|
|
444
484
|
# If we can't parse the response, don't retry 400 errors
|
|
445
485
|
try:
|
|
446
|
-
logger.error(
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
486
|
+
logger.error(
|
|
487
|
+
{
|
|
488
|
+
"non_overload_400_unparsed": True,
|
|
489
|
+
"target": (base_url or self.base_url),
|
|
490
|
+
"payload": processed_request,
|
|
491
|
+
}
|
|
492
|
+
)
|
|
451
493
|
except Exception:
|
|
452
494
|
pass
|
|
453
495
|
raise RuntimeError(
|
|
@@ -472,7 +514,7 @@ class OpenAIClient:
|
|
|
472
514
|
)
|
|
473
515
|
except httpx.TimeoutException as e:
|
|
474
516
|
last_error = e
|
|
475
|
-
|
|
517
|
+
|
|
476
518
|
if attempt < max_retries:
|
|
477
519
|
logger.warning(
|
|
478
520
|
f"Inference request failed (attempt {attempt + 1}/{max_retries + 1}), "
|
|
@@ -480,7 +522,7 @@ class OpenAIClient:
|
|
|
480
522
|
)
|
|
481
523
|
await asyncio.sleep(wait_time)
|
|
482
524
|
wait_time *= backoff_factor
|
|
483
|
-
|
|
525
|
+
|
|
484
526
|
raise last_error
|
|
485
527
|
|
|
486
528
|
|
|
@@ -490,11 +532,11 @@ def create_inference_client(
|
|
|
490
532
|
) -> OpenAIClient:
|
|
491
533
|
"""
|
|
492
534
|
Create an inference client using TaskApp configuration.
|
|
493
|
-
|
|
535
|
+
|
|
494
536
|
Args:
|
|
495
537
|
task_app: TaskApp instance with vllm_base_url
|
|
496
538
|
api_key: Optional API key for authentication
|
|
497
|
-
|
|
539
|
+
|
|
498
540
|
Returns:
|
|
499
541
|
Configured OpenAIClient instance
|
|
500
542
|
"""
|
|
@@ -502,6 +544,7 @@ def create_inference_client(
|
|
|
502
544
|
if api_key is None:
|
|
503
545
|
try:
|
|
504
546
|
import os as _os # local import to avoid module-level side effects
|
|
547
|
+
|
|
505
548
|
api_key = _os.getenv("OPENAI_API_KEY") or getattr(task_app, "openai_api_key", None)
|
|
506
549
|
except Exception:
|
|
507
550
|
api_key = None
|