synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (157) hide show
  1. examples/common_old/backend.py +0 -1
  2. examples/crafter_debug_render.py +15 -6
  3. examples/evals_old/compare_models.py +1 -0
  4. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
  5. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
  6. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
  7. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
  8. examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
  9. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
  10. examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
  11. examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
  12. examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
  13. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
  14. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
  15. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
  16. examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
  17. examples/finetuning_old/synth_qwen_v1/util.py +7 -2
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +17 -15
  22. examples/rl/run_rl_and_save.py +24 -7
  23. examples/rl/task_app/math_single_step.py +128 -11
  24. examples/rl/task_app/math_task_app.py +11 -3
  25. examples/rl_old/task_app.py +222 -53
  26. examples/warming_up_to_rl/analyze_trace_db.py +7 -5
  27. examples/warming_up_to_rl/export_trace_sft.py +141 -16
  28. examples/warming_up_to_rl/groq_test.py +11 -4
  29. examples/warming_up_to_rl/manage_secrets.py +15 -6
  30. examples/warming_up_to_rl/readme.md +9 -2
  31. examples/warming_up_to_rl/run_eval.py +108 -30
  32. examples/warming_up_to_rl/run_fft_and_save.py +128 -52
  33. examples/warming_up_to_rl/run_local_rollout.py +87 -36
  34. examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
  35. examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
  36. examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
  37. examples/warming_up_to_rl/run_rl_and_save.py +31 -7
  38. examples/warming_up_to_rl/run_rollout_remote.py +37 -10
  39. examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
  40. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
  41. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
  42. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  43. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  44. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  45. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
  46. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
  47. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
  48. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
  49. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  50. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
  51. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  52. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
  53. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
  54. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
  55. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  56. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
  57. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
  58. synth_ai/__init__.py +1 -0
  59. synth_ai/api/train/builders.py +34 -10
  60. synth_ai/api/train/cli.py +172 -32
  61. synth_ai/api/train/config_finder.py +59 -4
  62. synth_ai/api/train/env_resolver.py +32 -14
  63. synth_ai/api/train/pollers.py +11 -3
  64. synth_ai/api/train/task_app.py +4 -1
  65. synth_ai/api/train/utils.py +20 -4
  66. synth_ai/cli/__init__.py +11 -4
  67. synth_ai/cli/balance.py +1 -1
  68. synth_ai/cli/demo.py +19 -5
  69. synth_ai/cli/rl_demo.py +75 -16
  70. synth_ai/cli/root.py +116 -37
  71. synth_ai/cli/task_apps.py +1286 -170
  72. synth_ai/cli/traces.py +1 -0
  73. synth_ai/cli/turso.py +73 -0
  74. synth_ai/core/experiment.py +0 -2
  75. synth_ai/demo_registry.py +67 -30
  76. synth_ai/demos/core/cli.py +493 -164
  77. synth_ai/demos/demo_task_apps/core.py +50 -6
  78. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  79. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
  80. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  81. synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
  82. synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
  83. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  84. synth_ai/environments/examples/bandit/engine.py +12 -4
  85. synth_ai/environments/examples/bandit/taskset.py +4 -4
  86. synth_ai/environments/reproducibility/tree.py +3 -1
  87. synth_ai/environments/service/core_routes.py +6 -2
  88. synth_ai/evals/base.py +0 -2
  89. synth_ai/experimental/synth_oss.py +11 -12
  90. synth_ai/handshake.py +3 -1
  91. synth_ai/http_client.py +31 -7
  92. synth_ai/inference/__init__.py +0 -2
  93. synth_ai/inference/client.py +8 -4
  94. synth_ai/jobs/client.py +40 -10
  95. synth_ai/learning/client.py +33 -8
  96. synth_ai/learning/config.py +0 -2
  97. synth_ai/learning/constants.py +0 -2
  98. synth_ai/learning/ft_client.py +6 -3
  99. synth_ai/learning/health.py +9 -2
  100. synth_ai/learning/jobs.py +17 -5
  101. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
  102. synth_ai/learning/prompts/random_search.py +4 -1
  103. synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
  104. synth_ai/learning/rl_client.py +42 -14
  105. synth_ai/learning/sse.py +0 -2
  106. synth_ai/learning/validators.py +6 -2
  107. synth_ai/lm/caching/ephemeral.py +1 -3
  108. synth_ai/lm/core/exceptions.py +0 -2
  109. synth_ai/lm/core/main.py +13 -1
  110. synth_ai/lm/core/synth_models.py +0 -1
  111. synth_ai/lm/core/vendor_clients.py +4 -2
  112. synth_ai/lm/overrides.py +2 -2
  113. synth_ai/lm/vendors/core/anthropic_api.py +7 -7
  114. synth_ai/lm/vendors/core/openai_api.py +2 -0
  115. synth_ai/lm/vendors/openai_standard.py +3 -1
  116. synth_ai/lm/vendors/openai_standard_responses.py +6 -3
  117. synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
  118. synth_ai/lm/vendors/synth_client.py +37 -10
  119. synth_ai/rl/__init__.py +0 -1
  120. synth_ai/rl/contracts.py +0 -2
  121. synth_ai/rl/env_keys.py +6 -1
  122. synth_ai/task/__init__.py +1 -0
  123. synth_ai/task/apps/__init__.py +11 -11
  124. synth_ai/task/auth.py +29 -17
  125. synth_ai/task/client.py +3 -1
  126. synth_ai/task/contracts.py +1 -0
  127. synth_ai/task/datasets.py +3 -1
  128. synth_ai/task/errors.py +3 -2
  129. synth_ai/task/health.py +0 -2
  130. synth_ai/task/json.py +0 -1
  131. synth_ai/task/proxy.py +2 -5
  132. synth_ai/task/rubrics.py +9 -3
  133. synth_ai/task/server.py +31 -5
  134. synth_ai/task/tracing_utils.py +8 -3
  135. synth_ai/task/validators.py +0 -1
  136. synth_ai/task/vendors.py +0 -1
  137. synth_ai/tracing_v3/db_config.py +26 -1
  138. synth_ai/tracing_v3/decorators.py +1 -0
  139. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  140. synth_ai/tracing_v3/hooks.py +2 -0
  141. synth_ai/tracing_v3/replica_sync.py +1 -0
  142. synth_ai/tracing_v3/session_tracer.py +24 -3
  143. synth_ai/tracing_v3/storage/base.py +4 -1
  144. synth_ai/tracing_v3/storage/factory.py +0 -1
  145. synth_ai/tracing_v3/turso/manager.py +102 -38
  146. synth_ai/tracing_v3/turso/models.py +4 -1
  147. synth_ai/tracing_v3/utils.py +1 -0
  148. synth_ai/v0/tracing/upload.py +32 -135
  149. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
  150. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -156
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
  152. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  153. synth_ai/install_sqld.sh +0 -40
  154. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
  155. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
  156. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
  157. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
11
11
 
12
12
  class OpenAIClient:
13
13
  """Async HTTP client for OpenAI-compatible inference servers (vLLM)."""
14
-
14
+
15
15
  def __init__(
16
16
  self,
17
17
  base_url: str,
@@ -22,11 +22,13 @@ class OpenAIClient:
22
22
  self.api_key = api_key
23
23
  self.timeout_s = timeout_s
24
24
  self.headers = {}
25
-
25
+
26
26
  if api_key:
27
27
  self.headers["Authorization"] = f"Bearer {api_key}"
28
28
 
29
- def _fix_model_parameters(self, request: Dict[str, Any], target_url: Optional[str] = None) -> Dict[str, Any]:
29
+ def _fix_model_parameters(
30
+ self, request: Dict[str, Any], target_url: Optional[str] = None
31
+ ) -> Dict[str, Any]:
30
32
  """
31
33
  Fix parameter compatibility for newer OpenAI models.
32
34
 
@@ -75,7 +77,9 @@ class OpenAIClient:
75
77
  if "max_tokens" in fixed_request:
76
78
  if "max_completion_tokens" not in fixed_request:
77
79
  fixed_request["max_completion_tokens"] = fixed_request.pop("max_tokens")
78
- logger.info(f"Converted max_tokens to max_completion_tokens for model {model}")
80
+ logger.info(
81
+ f"Converted max_tokens to max_completion_tokens for model {model}"
82
+ )
79
83
  else:
80
84
  fixed_request.pop("max_tokens")
81
85
  logger.info(f"Removed conflicting max_tokens parameter for model {model}")
@@ -145,7 +149,9 @@ class OpenAIClient:
145
149
  logger.info("Removed stop_after_tool_calls for OpenAI request")
146
150
  # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
147
151
  low_url = url.lower()
148
- if ("groq.com" in low_url or "/openai" in low_url) and isinstance(processed_request, dict):
152
+ if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
153
+ processed_request, dict
154
+ ):
149
155
  rf = processed_request.get("response_format")
150
156
  rf_type = None
151
157
  if isinstance(rf, dict):
@@ -164,7 +170,9 @@ class OpenAIClient:
164
170
  # Join any text segments
165
171
  parts = []
166
172
  for seg in content:
167
- if isinstance(seg, dict) and isinstance(seg.get("text"), str):
173
+ if isinstance(seg, dict) and isinstance(
174
+ seg.get("text"), str
175
+ ):
168
176
  parts.append(seg["text"])
169
177
  text = "\n".join(parts)
170
178
  if isinstance(text, str) and ("json" in text.lower()):
@@ -174,13 +182,17 @@ class OpenAIClient:
174
182
  continue
175
183
  if not has_json_word:
176
184
  try:
177
- instruction = "Respond in strict JSON only. Output a single valid JSON object."
185
+ instruction = (
186
+ "Respond in strict JSON only. Output a single valid JSON object."
187
+ )
178
188
  if not isinstance(msgs, list):
179
189
  msgs = []
180
190
  # Prepend a system message to satisfy Groq requirement without changing user intent
181
191
  prepend = {"role": "system", "content": instruction}
182
192
  processed_request["messages"] = [prepend] + list(msgs)
183
- logger.info("Injected JSON-mode system instruction for Groq response_format compliance")
193
+ logger.info(
194
+ "Injected JSON-mode system instruction for Groq response_format compliance"
195
+ )
184
196
  except Exception:
185
197
  pass
186
198
  except Exception:
@@ -194,7 +206,7 @@ class OpenAIClient:
194
206
  headers=headers,
195
207
  )
196
208
  response.raise_for_status()
197
-
209
+
198
210
  # Rich response diagnostics
199
211
  content_type = response.headers.get("content-type")
200
212
  body_text = response.text
@@ -203,12 +215,14 @@ class OpenAIClient:
203
215
  )
204
216
  if body_text:
205
217
  preview_len = min(800, len(body_text))
206
- logger.info(f"Inference response preview ({preview_len} bytes): {body_text[:preview_len]}")
218
+ logger.info(
219
+ f"Inference response preview ({preview_len} bytes): {body_text[:preview_len]}"
220
+ )
207
221
 
208
222
  result = response.json()
209
223
  logger.info(f"Inference response parsed_type={type(result).__name__}")
210
224
  return result
211
-
225
+
212
226
  except httpx.TimeoutException:
213
227
  logger.error(f"Request to {url} timed out after {timeout}s")
214
228
  raise
@@ -217,12 +231,14 @@ class OpenAIClient:
217
231
  text = e.response.text if e.response is not None else str(e)
218
232
  # Log full body for debugging remote failures
219
233
  try:
220
- logger.error({
221
- "openai_http_error": True,
222
- "status": status,
223
- "url": url,
224
- "body": text,
225
- })
234
+ logger.error(
235
+ {
236
+ "openai_http_error": True,
237
+ "status": status,
238
+ "url": url,
239
+ "body": text,
240
+ }
241
+ )
226
242
  except Exception:
227
243
  logger.error(f"HTTP error from {url}: {status} - {text}")
228
244
  # For 4xx/5xx, print full sanitized request to aid debugging (especially Groq 400s)
@@ -230,13 +246,15 @@ class OpenAIClient:
230
246
  redacted_headers = dict(headers)
231
247
  if "Authorization" in redacted_headers:
232
248
  redacted_headers["Authorization"] = "***REDACTED***"
233
- logger.error({
234
- "request_debug": True,
235
- "status": status,
236
- "target": url,
237
- "headers": redacted_headers,
238
- "payload": processed_request,
239
- })
249
+ logger.error(
250
+ {
251
+ "request_debug": True,
252
+ "status": status,
253
+ "target": url,
254
+ "headers": redacted_headers,
255
+ "payload": processed_request,
256
+ }
257
+ )
240
258
  except Exception:
241
259
  pass
242
260
  # Special case: token budget exceeded (OpenAI-compatible error schema)
@@ -270,23 +288,38 @@ class OpenAIClient:
270
288
  try:
271
289
  tools_arr = processed_request.get("tools") or []
272
290
  if isinstance(tools_arr, list) and tools_arr:
273
- f = tools_arr[0].get("function") if isinstance(tools_arr[0], dict) else None
274
- cand = (f or {}).get("name") if isinstance(f, dict) else None
291
+ f = (
292
+ tools_arr[0].get("function")
293
+ if isinstance(tools_arr[0], dict)
294
+ else None
295
+ )
296
+ cand = (
297
+ (f or {}).get("name")
298
+ if isinstance(f, dict)
299
+ else None
300
+ )
275
301
  if isinstance(cand, str) and cand:
276
302
  func_name = cand
277
303
  except Exception:
278
304
  pass
279
- processed_request["tool_choice"] = {"type": "function", "function": {"name": func_name}}
305
+ processed_request["tool_choice"] = {
306
+ "type": "function",
307
+ "function": {"name": func_name},
308
+ }
280
309
  processed_request["parallel_tool_calls"] = False
281
- logger.warning({
282
- "token_budget_recovery": True,
283
- "messages_tokens": messages_tokens,
284
- "model_limit": model_limit,
285
- "retry_max_tokens": new_max,
286
- })
310
+ logger.warning(
311
+ {
312
+ "token_budget_recovery": True,
313
+ "messages_tokens": messages_tokens,
314
+ "model_limit": model_limit,
315
+ "retry_max_tokens": new_max,
316
+ }
317
+ )
287
318
  # Retry once with reduced budget
288
319
  async with httpx.AsyncClient(timeout=timeout) as client2:
289
- r2 = await client2.post(url, json=processed_request, headers=headers)
320
+ r2 = await client2.post(
321
+ url, json=processed_request, headers=headers
322
+ )
290
323
  r2.raise_for_status()
291
324
  return r2.json()
292
325
  except Exception:
@@ -302,14 +335,17 @@ class OpenAIClient:
302
335
  err = e.response.json()
303
336
  except Exception:
304
337
  err = {"error": "unprocessable", "detail": (text or "")[:200]}
305
- logger.warning({
306
- "inference_422_recovered": True,
307
- "detail": err,
308
- })
338
+ logger.warning(
339
+ {
340
+ "inference_422_recovered": True,
341
+ "detail": err,
342
+ }
343
+ )
309
344
  except Exception:
310
345
  pass
311
346
  # Return a minimal OpenAI-compatible response with no tool_calls/content
312
347
  import time as _t
348
+
313
349
  return {
314
350
  "id": f"cmpl-{int(_t.time())}",
315
351
  "object": "chat.completion",
@@ -328,7 +364,7 @@ class OpenAIClient:
328
364
  except Exception as e:
329
365
  logger.error(f"Unexpected error calling {url}: {e}")
330
366
  raise
331
-
367
+
332
368
  async def check_health(
333
369
  self,
334
370
  base_url: Optional[str] = None,
@@ -336,17 +372,17 @@ class OpenAIClient:
336
372
  ) -> Dict[str, Any]:
337
373
  """
338
374
  Check if the inference service is healthy.
339
-
375
+
340
376
  Args:
341
377
  base_url: Override base URL for this request
342
378
  timeout_s: Override timeout for this request
343
-
379
+
344
380
  Returns:
345
381
  Health status dict with 'status' field
346
382
  """
347
383
  url = (base_url or self.base_url).rstrip("/") + "/health"
348
384
  timeout = timeout_s or 10.0
349
-
385
+
350
386
  try:
351
387
  async with httpx.AsyncClient(timeout=timeout) as client:
352
388
  response = await client.get(url, headers=self.headers)
@@ -364,7 +400,7 @@ class OpenAIClient:
364
400
  return {"status": "unhealthy", "error": str(e)}
365
401
  except Exception as e:
366
402
  return {"status": "unhealthy", "error": str(e)}
367
-
403
+
368
404
  async def generate_with_retries(
369
405
  self,
370
406
  request: Dict[str, Any],
@@ -376,7 +412,7 @@ class OpenAIClient:
376
412
  ) -> Dict[str, Any]:
377
413
  """
378
414
  Generate with exponential backoff retries for transient errors.
379
-
415
+
380
416
  Args:
381
417
  request: OpenAI-compatible chat completion request
382
418
  base_url: Override base URL
@@ -384,13 +420,13 @@ class OpenAIClient:
384
420
  max_retries: Maximum number of retry attempts
385
421
  backoff_factor: Exponential backoff multiplier
386
422
  extra_headers: Additional headers to include (e.g., X-Policy-Name)
387
-
423
+
388
424
  Returns:
389
425
  OpenAI-compatible chat completion response
390
426
  """
391
427
  last_error = None
392
428
  wait_time = 1.0
393
-
429
+
394
430
  for attempt in range(max_retries + 1):
395
431
  try:
396
432
  # Apply parameter fixes to the request
@@ -417,7 +453,9 @@ class OpenAIClient:
417
453
  retry_after = response_data.get("retry_after", 1)
418
454
  # Use the suggested retry_after time instead of exponential backoff for overload
419
455
  wait_time = max(wait_time, float(retry_after))
420
- logger.warning(f"Inference service overloaded (400). {response_data} Retrying after {wait_time}s...")
456
+ logger.warning(
457
+ f"Inference service overloaded (400). {response_data} Retrying after {wait_time}s..."
458
+ )
421
459
  else:
422
460
  # This is a different type of 400 error, don't retry
423
461
  try:
@@ -428,13 +466,15 @@ class OpenAIClient:
428
466
  redacted_headers["Authorization"] = "***REDACTED***"
429
467
  except Exception:
430
468
  redacted_headers = {}
431
- logger.error({
432
- "non_overload_400": True,
433
- "target": (base_url or self.base_url),
434
- "payload": processed_request,
435
- "headers": redacted_headers,
436
- "body": e.response.text if e.response is not None else None,
437
- })
469
+ logger.error(
470
+ {
471
+ "non_overload_400": True,
472
+ "target": (base_url or self.base_url),
473
+ "payload": processed_request,
474
+ "headers": redacted_headers,
475
+ "body": e.response.text if e.response is not None else None,
476
+ }
477
+ )
438
478
  except Exception:
439
479
  pass
440
480
  raise RuntimeError(
@@ -443,11 +483,13 @@ class OpenAIClient:
443
483
  except Exception:
444
484
  # If we can't parse the response, don't retry 400 errors
445
485
  try:
446
- logger.error({
447
- "non_overload_400_unparsed": True,
448
- "target": (base_url or self.base_url),
449
- "payload": processed_request,
450
- })
486
+ logger.error(
487
+ {
488
+ "non_overload_400_unparsed": True,
489
+ "target": (base_url or self.base_url),
490
+ "payload": processed_request,
491
+ }
492
+ )
451
493
  except Exception:
452
494
  pass
453
495
  raise RuntimeError(
@@ -472,7 +514,7 @@ class OpenAIClient:
472
514
  )
473
515
  except httpx.TimeoutException as e:
474
516
  last_error = e
475
-
517
+
476
518
  if attempt < max_retries:
477
519
  logger.warning(
478
520
  f"Inference request failed (attempt {attempt + 1}/{max_retries + 1}), "
@@ -480,7 +522,7 @@ class OpenAIClient:
480
522
  )
481
523
  await asyncio.sleep(wait_time)
482
524
  wait_time *= backoff_factor
483
-
525
+
484
526
  raise last_error
485
527
 
486
528
 
@@ -490,11 +532,11 @@ def create_inference_client(
490
532
  ) -> OpenAIClient:
491
533
  """
492
534
  Create an inference client using TaskApp configuration.
493
-
535
+
494
536
  Args:
495
537
  task_app: TaskApp instance with vllm_base_url
496
538
  api_key: Optional API key for authentication
497
-
539
+
498
540
  Returns:
499
541
  Configured OpenAIClient instance
500
542
  """
@@ -502,6 +544,7 @@ def create_inference_client(
502
544
  if api_key is None:
503
545
  try:
504
546
  import os as _os # local import to avoid module-level side effects
547
+
505
548
  api_key = _os.getenv("OPENAI_API_KEY") or getattr(task_app, "openai_api_key", None)
506
549
  except Exception:
507
550
  api_key = None