synth-ai 0.2.2.dev0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. synth_ai/cli/__init__.py +66 -0
  2. synth_ai/cli/balance.py +205 -0
  3. synth_ai/cli/calc.py +70 -0
  4. synth_ai/cli/demo.py +74 -0
  5. synth_ai/{cli.py → cli/legacy_root_backup.py} +60 -15
  6. synth_ai/cli/man.py +103 -0
  7. synth_ai/cli/recent.py +126 -0
  8. synth_ai/cli/root.py +184 -0
  9. synth_ai/cli/status.py +126 -0
  10. synth_ai/cli/traces.py +136 -0
  11. synth_ai/cli/watch.py +508 -0
  12. synth_ai/config/base_url.py +53 -0
  13. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +252 -0
  14. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_duckdb_v2_backup.py +413 -0
  15. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +760 -0
  16. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_synth.py +34 -0
  17. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth.py +1740 -0
  18. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth_v2_backup.py +1318 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_duckdb_v2_backup.py +386 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v2_backup.py +1352 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +4 -4
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/test_crafter_react_agent_openai_v2_backup.py +2551 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1 -1
  25. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +1 -1
  26. synth_ai/environments/examples/crafter_classic/agent_demos/old/traces/session_crafter_episode_16_15227b68-2906-416f-acc4-d6a9b4fa5828_20250725_001154.json +1363 -1
  27. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +3 -3
  28. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  29. synth_ai/environments/examples/crafter_custom/environment.py +1 -1
  30. synth_ai/environments/examples/enron/dataset/corbt___enron_emails_sample_questions/default/0.0.0/293c9fe8170037e01cc9cf5834e0cd5ef6f1a6bb/dataset_info.json +1 -0
  31. synth_ai/environments/examples/nethack/helpers/achievements.json +64 -0
  32. synth_ai/environments/examples/red/units/test_exploration_strategy.py +1 -1
  33. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +5 -5
  34. synth_ai/environments/examples/red/units/test_movement_debug.py +2 -2
  35. synth_ai/environments/examples/red/units/test_retry_movement.py +1 -1
  36. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/available_envs.json +122 -0
  37. synth_ai/environments/examples/sokoban/verified_puzzles.json +54987 -0
  38. synth_ai/environments/service/core_routes.py +1 -1
  39. synth_ai/experimental/synth_oss.py +446 -0
  40. synth_ai/learning/core.py +21 -0
  41. synth_ai/learning/gateway.py +4 -0
  42. synth_ai/learning/prompts/gepa.py +0 -0
  43. synth_ai/learning/prompts/mipro.py +8 -0
  44. synth_ai/lm/__init__.py +3 -0
  45. synth_ai/lm/core/main.py +4 -0
  46. synth_ai/lm/core/main_v3.py +238 -122
  47. synth_ai/lm/core/vendor_clients.py +4 -0
  48. synth_ai/lm/provider_support/openai.py +11 -2
  49. synth_ai/lm/vendors/base.py +7 -0
  50. synth_ai/lm/vendors/openai_standard.py +339 -4
  51. synth_ai/lm/vendors/openai_standard_responses.py +243 -0
  52. synth_ai/lm/vendors/synth_client.py +155 -5
  53. synth_ai/lm/warmup.py +54 -17
  54. synth_ai/tracing/__init__.py +18 -0
  55. synth_ai/tracing_v1/__init__.py +29 -14
  56. synth_ai/tracing_v3/__init__.py +2 -2
  57. synth_ai/tracing_v3/abstractions.py +62 -17
  58. synth_ai/tracing_v3/config.py +13 -7
  59. synth_ai/tracing_v3/db_config.py +6 -6
  60. synth_ai/tracing_v3/hooks.py +1 -1
  61. synth_ai/tracing_v3/llm_call_record_helpers.py +350 -0
  62. synth_ai/tracing_v3/lm_call_record_abstractions.py +257 -0
  63. synth_ai/tracing_v3/session_tracer.py +5 -5
  64. synth_ai/tracing_v3/tests/test_concurrent_operations.py +1 -1
  65. synth_ai/tracing_v3/tests/test_llm_call_records.py +672 -0
  66. synth_ai/tracing_v3/tests/test_session_tracer.py +43 -9
  67. synth_ai/tracing_v3/tests/test_turso_manager.py +1 -1
  68. synth_ai/tracing_v3/turso/manager.py +18 -11
  69. synth_ai/tracing_v3/turso/models.py +1 -0
  70. synth_ai/tui/__main__.py +13 -0
  71. synth_ai/tui/dashboard.py +329 -0
  72. synth_ai/v0/tracing/__init__.py +0 -0
  73. synth_ai/{tracing → v0/tracing}/base_client.py +3 -3
  74. synth_ai/{tracing → v0/tracing}/client_manager.py +1 -1
  75. synth_ai/{tracing → v0/tracing}/context.py +1 -1
  76. synth_ai/{tracing → v0/tracing}/decorators.py +11 -11
  77. synth_ai/v0/tracing/events/__init__.py +0 -0
  78. synth_ai/{tracing → v0/tracing}/events/manage.py +4 -4
  79. synth_ai/{tracing → v0/tracing}/events/scope.py +6 -6
  80. synth_ai/{tracing → v0/tracing}/events/store.py +3 -3
  81. synth_ai/{tracing → v0/tracing}/immediate_client.py +6 -6
  82. synth_ai/{tracing → v0/tracing}/log_client_base.py +2 -2
  83. synth_ai/{tracing → v0/tracing}/retry_queue.py +3 -3
  84. synth_ai/{tracing → v0/tracing}/trackers.py +2 -2
  85. synth_ai/{tracing → v0/tracing}/upload.py +4 -4
  86. synth_ai/v0/tracing_v1/__init__.py +16 -0
  87. synth_ai/{tracing_v1 → v0/tracing_v1}/base_client.py +3 -3
  88. synth_ai/{tracing_v1 → v0/tracing_v1}/client_manager.py +1 -1
  89. synth_ai/{tracing_v1 → v0/tracing_v1}/context.py +1 -1
  90. synth_ai/{tracing_v1 → v0/tracing_v1}/decorators.py +11 -11
  91. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  92. synth_ai/{tracing_v1 → v0/tracing_v1}/events/manage.py +4 -4
  93. synth_ai/{tracing_v1 → v0/tracing_v1}/events/scope.py +6 -6
  94. synth_ai/{tracing_v1 → v0/tracing_v1}/events/store.py +3 -3
  95. synth_ai/{tracing_v1 → v0/tracing_v1}/immediate_client.py +6 -6
  96. synth_ai/{tracing_v1 → v0/tracing_v1}/log_client_base.py +2 -2
  97. synth_ai/{tracing_v1 → v0/tracing_v1}/retry_queue.py +3 -3
  98. synth_ai/{tracing_v1 → v0/tracing_v1}/trackers.py +2 -2
  99. synth_ai/{tracing_v1 → v0/tracing_v1}/upload.py +4 -4
  100. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/METADATA +100 -5
  101. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/RECORD +115 -75
  102. /synth_ai/{tracing/events/__init__.py → compound/cais.py} +0 -0
  103. /synth_ai/{tracing_v1/events/__init__.py → environments/examples/crafter_classic/debug_translation.py} +0 -0
  104. /synth_ai/{tracing → v0/tracing}/abstractions.py +0 -0
  105. /synth_ai/{tracing → v0/tracing}/config.py +0 -0
  106. /synth_ai/{tracing → v0/tracing}/local.py +0 -0
  107. /synth_ai/{tracing → v0/tracing}/utils.py +0 -0
  108. /synth_ai/{tracing_v1 → v0/tracing_v1}/abstractions.py +0 -0
  109. /synth_ai/{tracing_v1 → v0/tracing_v1}/config.py +0 -0
  110. /synth_ai/{tracing_v1 → v0/tracing_v1}/local.py +0 -0
  111. /synth_ai/{tracing_v1 → v0/tracing_v1}/utils.py +0 -0
  112. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/WHEEL +0 -0
  113. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/entry_points.txt +0 -0
  114. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/licenses/LICENSE +0 -0
  115. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,10 @@
1
1
  from typing import Any, Dict, List, Optional, Union
2
+ import asyncio
3
+ import time
2
4
 
3
5
  import groq
4
6
  import openai
7
+ import os
5
8
  import pydantic_core
6
9
  from pydantic import BaseModel
7
10
 
@@ -12,6 +15,7 @@ from synth_ai.lm.tools.base import BaseTool
12
15
  from synth_ai.lm.vendors.base import BaseLMResponse, VendorBase
13
16
  from synth_ai.lm.constants import SPECIAL_BASE_TEMPS
14
17
  from synth_ai.lm.vendors.retries import MAX_BACKOFF
18
+ from synth_ai.lm.vendors.openai_standard_responses import OpenAIResponsesAPIMixin
15
19
  import backoff
16
20
 
17
21
  DEFAULT_EXCEPTIONS_TO_RETRY = (
@@ -50,7 +54,7 @@ def _silent_backoff_handler(_details):
50
54
  pass
51
55
 
52
56
 
53
- class OpenAIStandard(VendorBase):
57
+ class OpenAIStandard(VendorBase, OpenAIResponsesAPIMixin):
54
58
  """
55
59
  Standard OpenAI-compatible vendor implementation.
56
60
 
@@ -79,6 +83,16 @@ class OpenAIStandard(VendorBase):
79
83
  self.async_client = async_client
80
84
  self.used_for_structured_outputs = used_for_structured_outputs
81
85
  self.exceptions_to_retry = exceptions_to_retry
86
+
87
+ # Initialize Harmony support for OSS models
88
+ self.harmony_available = False
89
+ self.harmony_enc = None
90
+ try:
91
+ from openai_harmony import load_harmony_encoding, HarmonyEncodingName
92
+ self.harmony_available = True
93
+ self.harmony_enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
94
+ except ImportError:
95
+ pass
82
96
 
83
97
  @backoff.on_exception(
84
98
  backoff.expo,
@@ -99,6 +113,15 @@ class OpenAIStandard(VendorBase):
99
113
  assert lm_config.get("response_model", None) is None, (
100
114
  "response_model is not supported for standard calls"
101
115
  )
116
+
117
+ DEBUG = os.getenv("SYNTH_OPENAI_DEBUG") == "1"
118
+ if DEBUG:
119
+ print(f"🔍 OPENAI DEBUG: _hit_api_async called with:")
120
+ print(f" Model: {model}")
121
+ print(f" Messages: {len(messages)} messages")
122
+ print(f" Tools: {len(tools) if tools else 0} tools")
123
+ print(f" LM config: {lm_config}")
124
+
102
125
  messages = special_orion_transform(model, messages)
103
126
  used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
104
127
  lm_config["reasoning_effort"] = reasoning_effort
@@ -106,7 +129,14 @@ class OpenAIStandard(VendorBase):
106
129
  model, messages, lm_config=lm_config, tools=tools
107
130
  )
108
131
  if cache_result:
109
- return cache_result
132
+ if DEBUG:
133
+ print(f"🔍 OPENAI DEBUG: Cache hit! Returning cached result")
134
+ print(f" Cache result type: {type(cache_result)}")
135
+ print(f"🔍 OPENAI DEBUG: DISABLING CACHE FOR DEBUGGING - forcing API call")
136
+ # return cache_result # Commented out to force API call
137
+
138
+ if DEBUG:
139
+ print(f"🔍 OPENAI DEBUG: Cache miss, making actual API call")
110
140
 
111
141
  # Common API call params
112
142
  api_params = {
@@ -126,12 +156,205 @@ class OpenAIStandard(VendorBase):
126
156
  "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
127
157
  )
128
158
 
159
+ # Forward additional sampling / control params if provided
160
+ if lm_config.get("max_tokens") is not None:
161
+ api_params["max_tokens"] = lm_config["max_tokens"]
162
+ if lm_config.get("top_p") is not None:
163
+ api_params["top_p"] = lm_config["top_p"]
164
+ if lm_config.get("frequency_penalty") is not None:
165
+ api_params["frequency_penalty"] = lm_config["frequency_penalty"]
166
+ if lm_config.get("presence_penalty") is not None:
167
+ api_params["presence_penalty"] = lm_config["presence_penalty"]
168
+ if lm_config.get("stop") is not None:
169
+ api_params["stop"] = lm_config["stop"]
170
+ if lm_config.get("tool_choice") is not None:
171
+ api_params["tool_choice"] = lm_config["tool_choice"]
172
+ # Forward GPU preference to backend (body + header)
173
+ if lm_config.get("gpu_preference") is not None:
174
+ api_params["gpu_preference"] = lm_config["gpu_preference"]
175
+ # Also set header so proxies that read headers can honor it
176
+ hdrs = api_params.get("extra_headers", {})
177
+ hdrs["X-GPU-Preference"] = lm_config["gpu_preference"]
178
+ api_params["extra_headers"] = hdrs
179
+ # Also mirror stop_after_tool_calls into a header for robustness
180
+ try:
181
+ satc_val = None
182
+ if isinstance(lm_config.get("extra_body"), dict):
183
+ satc_val = lm_config["extra_body"].get("stop_after_tool_calls")
184
+ if satc_val is not None:
185
+ hdrs = api_params.get("extra_headers", {})
186
+ hdrs["X-Stop-After-Tool-Calls"] = str(satc_val)
187
+ api_params["extra_headers"] = hdrs
188
+ except Exception:
189
+ pass
190
+ # Forward Qwen3 chat template kwargs via extra_body when requested
191
+ if lm_config.get("enable_thinking") is not None:
192
+ api_params["extra_body"] = api_params.get("extra_body", {})
193
+ ctk = api_params["extra_body"].get("chat_template_kwargs", {})
194
+ ctk["enable_thinking"] = lm_config["enable_thinking"]
195
+ api_params["extra_body"]["chat_template_kwargs"] = ctk
196
+ # Forward arbitrary extra_body from lm_config if provided (merge)
197
+ if lm_config.get("extra_body") is not None:
198
+ # Shallow-merge top-level keys; nested keys (like chat_template_kwargs) should be provided whole
199
+ api_params["extra_body"] = {**api_params.get("extra_body", {}), **(lm_config.get("extra_body") or {})}
200
+ # Forward Qwen3 chat template kwargs via extra_body when requested
201
+ if lm_config.get("enable_thinking") is not None:
202
+ api_params["extra_body"] = api_params.get("extra_body", {})
203
+ ctk = api_params["extra_body"].get("chat_template_kwargs", {})
204
+ ctk["enable_thinking"] = lm_config["enable_thinking"]
205
+ api_params["extra_body"]["chat_template_kwargs"] = ctk
206
+
129
207
  # Add reasoning_effort only for o3-mini
130
208
  if model in ["o3-mini"]:
131
209
  print("Reasoning effort:", reasoning_effort)
132
210
  api_params["reasoning_effort"] = reasoning_effort
133
211
 
134
- output = await self.async_client.chat.completions.create(**api_params)
212
+ # Filter Synth-only params when calling external OpenAI-compatible providers
213
+ # External providers (e.g., OpenAI, Groq) reject unknown fields like
214
+ # extra_body.chat_template_kwargs or stop_after_tool_calls.
215
+ try:
216
+ base_url_obj = getattr(self.async_client, "base_url", None)
217
+ base_url_str = str(base_url_obj) if base_url_obj is not None else ""
218
+ except Exception:
219
+ base_url_str = ""
220
+
221
+ is_external_provider = (
222
+ "openai.com" in base_url_str or "api.groq.com" in base_url_str
223
+ )
224
+
225
+ if is_external_provider:
226
+ # Remove extra_body entirely; this is Synth-specific plumbing
227
+ if "extra_body" in api_params:
228
+ api_params.pop("extra_body", None)
229
+
230
+ # Also ensure we don't pass stray vendor-specific fields if present
231
+ # (defensive in case upstream added them at top-level later)
232
+ for k in ["chat_template_kwargs", "stop_after_tool_calls"]:
233
+ api_params.pop(k, None)
234
+
235
+ # GPT-5 models: parameter normalization
236
+ if model.startswith("gpt-5"):
237
+ # Require max_completion_tokens instead of max_tokens
238
+ if "max_tokens" in api_params:
239
+ api_params["max_completion_tokens"] = api_params.pop("max_tokens")
240
+ # Only default temperature=1 supported; omit custom temperature
241
+ if "temperature" in api_params:
242
+ api_params.pop("temperature", None)
243
+
244
+ # Call API with better auth error reporting
245
+ #try:
246
+ if DEBUG:
247
+ print(f"🔍 OPENAI DEBUG: Making request with params:")
248
+ print(f" Model: {api_params.get('model')}")
249
+ print(f" Messages: {len(api_params.get('messages', []))} messages")
250
+ print(f" Tools: {len(api_params.get('tools', []))} tools")
251
+ print(f" Max tokens: {api_params.get('max_tokens', 'NOT SET')}")
252
+ print(f" Temperature: {api_params.get('temperature', 'NOT SET')}")
253
+ if 'tools' in api_params:
254
+ print(f" First tool: {api_params['tools'][0]}")
255
+ print(f" FULL API PARAMS: {api_params}")
256
+
257
+ # Quiet targeted retry for OpenAI 400 tool_use_failed during tool-calling
258
+ try:
259
+ max_attempts_for_tool_use = int(os.getenv("SYNTH_TOOL_USE_RETRIES", "5"))
260
+ except Exception:
261
+ max_attempts_for_tool_use = 5
262
+ try:
263
+ backoff_seconds = float(os.getenv("SYNTH_TOOL_USE_BACKOFF_INITIAL", "0.5"))
264
+ except Exception:
265
+ backoff_seconds = 0.5
266
+
267
+ attempt_index = 0
268
+ while True:
269
+ try:
270
+ output = await self.async_client.chat.completions.create(**api_params)
271
+ break
272
+ except openai.BadRequestError as err:
273
+ # Detect tool-use failure from various SDK surfaces
274
+ should_retry = False
275
+ # 1) Body dict
276
+ body = getattr(err, "body", None)
277
+ if isinstance(body, dict):
278
+ try:
279
+ err_obj = body.get("error") if isinstance(body.get("error"), dict) else {}
280
+ code_val = err_obj.get("code")
281
+ msg_val = err_obj.get("message")
282
+ if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
283
+ should_retry = True
284
+ except Exception:
285
+ pass
286
+ # 2) Response JSON
287
+ if not should_retry:
288
+ try:
289
+ resp = getattr(err, "response", None)
290
+ if resp is not None:
291
+ j = resp.json()
292
+ if isinstance(j, dict):
293
+ err_obj = j.get("error") if isinstance(j.get("error"), dict) else {}
294
+ code_val = err_obj.get("code")
295
+ msg_val = err_obj.get("message")
296
+ if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
297
+ should_retry = True
298
+ except Exception:
299
+ pass
300
+ # 3) Fallback to string match
301
+ if not should_retry:
302
+ err_text = str(err)
303
+ if "tool_use_failed" in err_text or "Failed to call a function" in err_text:
304
+ should_retry = True
305
+ if should_retry and attempt_index + 1 < max_attempts_for_tool_use:
306
+ await asyncio.sleep(backoff_seconds)
307
+ backoff_seconds = min(backoff_seconds * 2.0, 2.0)
308
+ attempt_index += 1
309
+ continue
310
+ raise
311
+
312
+ if DEBUG:
313
+ print(f"🔍 OPENAI DEBUG: Response received:")
314
+ print(f" Type: {type(output)}")
315
+ print(f" Choices: {len(output.choices) if hasattr(output, 'choices') else 'N/A'}")
316
+ if hasattr(output, 'choices') and output.choices:
317
+ choice = output.choices[0]
318
+ print(f" Choice type: {type(choice)}")
319
+ if hasattr(choice, 'message'):
320
+ message = choice.message
321
+ print(f" Message type: {type(message)}")
322
+ print(f" Has tool_calls: {hasattr(message, 'tool_calls')}")
323
+ if hasattr(message, 'tool_calls'):
324
+ print(f" Tool calls: {message.tool_calls}")
325
+ print(f" Content: {message.content[:200] if hasattr(message, 'content') and message.content else 'None'}...")
326
+ # Show finish_reason and usage if available
327
+ try:
328
+ print(f" finish_reason: {getattr(choice, 'finish_reason', None)}")
329
+ usage = getattr(output, 'usage', None)
330
+ if usage:
331
+ print(f" usage: prompt_tokens={getattr(usage, 'prompt_tokens', None)}, completion_tokens={getattr(usage, 'completion_tokens', None)}, total_tokens={getattr(usage, 'total_tokens', None)}")
332
+ except Exception:
333
+ pass
334
+
335
+ if DEBUG:
336
+ print(f"🔍 OPENAI DEBUG: FULL RAW RESPONSE:")
337
+ if hasattr(output.choices[0].message, 'content') and output.choices[0].message.content:
338
+ print(f" FULL CONTENT:\n{output.choices[0].message.content}")
339
+ print(f" Raw choice: {choice}")
340
+ print(f" Raw message: {message}")
341
+ # except Exception as e:
342
+ # try:
343
+ # from openai import AuthenticationError as _OpenAIAuthErr # type: ignore
344
+ # except ModuleNotFoundError:
345
+ # _OpenAIAuthErr = type(e)
346
+ # if isinstance(e, _OpenAIAuthErr):
347
+ # key_preview = (os.getenv("OPENAI_API_KEY") or "")[:8]
348
+ # # Create a more informative error message but preserve the original exception
349
+ # enhanced_msg = f"Invalid API key format. Expected prefix 'sk-' or 'sk_live_'. Provided key begins with '{key_preview}'. Original error: {str(e)}"
350
+ # # Re-raise the original exception with enhanced message if possible
351
+ # if hasattr(e, 'response') and hasattr(e, 'body'):
352
+ # raise _OpenAIAuthErr(enhanced_msg, response=e.response, body=e.body) from None
353
+ # else:
354
+ # # Fallback: just re-raise the original with a print for debugging
355
+ # print(f"🔑 API Key Debug: {enhanced_msg}")
356
+ # raise e from None
357
+ # raise
135
358
  message = output.choices[0].message
136
359
 
137
360
  # Convert tool calls to dict format
@@ -149,10 +372,24 @@ class OpenAIStandard(VendorBase):
149
372
  for tc in message.tool_calls
150
373
  ]
151
374
 
375
+ # Attach basic usage if available
376
+ usage_dict = None
377
+ try:
378
+ usage_obj = getattr(output, 'usage', None)
379
+ if usage_obj is not None:
380
+ usage_dict = {
381
+ "prompt_tokens": getattr(usage_obj, 'prompt_tokens', None),
382
+ "completion_tokens": getattr(usage_obj, 'completion_tokens', None),
383
+ "total_tokens": getattr(usage_obj, 'total_tokens', None),
384
+ }
385
+ except Exception:
386
+ usage_dict = None
387
+
152
388
  lm_response = BaseLMResponse(
153
389
  raw_response=message.content or "", # Use empty string if no content
154
390
  structured_output=None,
155
391
  tool_calls=tool_calls,
392
+ usage=usage_dict,
156
393
  )
157
394
  lm_config["reasoning_effort"] = reasoning_effort
158
395
  used_cache_handler.add_to_managed_cache(
@@ -206,12 +443,84 @@ class OpenAIStandard(VendorBase):
206
443
  "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
207
444
  )
208
445
 
446
+ # Forward additional sampling / control params if provided
447
+ if lm_config.get("max_tokens") is not None:
448
+ api_params["max_tokens"] = lm_config["max_tokens"]
449
+ if lm_config.get("top_p") is not None:
450
+ api_params["top_p"] = lm_config["top_p"]
451
+ if lm_config.get("frequency_penalty") is not None:
452
+ api_params["frequency_penalty"] = lm_config["frequency_penalty"]
453
+ if lm_config.get("presence_penalty") is not None:
454
+ api_params["presence_penalty"] = lm_config["presence_penalty"]
455
+ if lm_config.get("stop") is not None:
456
+ api_params["stop"] = lm_config["stop"]
457
+ if lm_config.get("tool_choice") is not None:
458
+ api_params["tool_choice"] = lm_config["tool_choice"]
459
+
209
460
  # Add reasoning_effort only for o3-mini
210
461
  if model in ["o3-mini"]:
211
462
  api_params["reasoning_effort"] = reasoning_effort
212
463
 
213
- output = self.sync_client.chat.completions.create(**api_params)
464
+ # Sync path: apply the same targeted retry
465
+ try:
466
+ max_attempts_for_tool_use = int(os.getenv("SYNTH_TOOL_USE_RETRIES", "5"))
467
+ except Exception:
468
+ max_attempts_for_tool_use = 5
469
+ try:
470
+ backoff_seconds = float(os.getenv("SYNTH_TOOL_USE_BACKOFF_INITIAL", "0.5"))
471
+ except Exception:
472
+ backoff_seconds = 0.5
473
+
474
+ attempt_index = 0
475
+ while True:
476
+ try:
477
+ output = self.sync_client.chat.completions.create(**api_params)
478
+ break
479
+ except openai.BadRequestError as err:
480
+ should_retry = False
481
+ body = getattr(err, "body", None)
482
+ if isinstance(body, dict):
483
+ try:
484
+ err_obj = body.get("error") if isinstance(body.get("error"), dict) else {}
485
+ code_val = err_obj.get("code")
486
+ msg_val = err_obj.get("message")
487
+ if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
488
+ should_retry = True
489
+ except Exception:
490
+ pass
491
+ if not should_retry:
492
+ try:
493
+ resp = getattr(err, "response", None)
494
+ if resp is not None:
495
+ j = resp.json()
496
+ if isinstance(j, dict):
497
+ err_obj = j.get("error") if isinstance(j.get("error"), dict) else {}
498
+ code_val = err_obj.get("code")
499
+ msg_val = err_obj.get("message")
500
+ if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
501
+ should_retry = True
502
+ except Exception:
503
+ pass
504
+ if not should_retry:
505
+ err_text = str(err)
506
+ if "tool_use_failed" in err_text or "Failed to call a function" in err_text:
507
+ should_retry = True
508
+ if should_retry and attempt_index + 1 < max_attempts_for_tool_use:
509
+ time.sleep(backoff_seconds)
510
+ backoff_seconds = min(backoff_seconds * 2.0, 2.0)
511
+ attempt_index += 1
512
+ continue
513
+ raise
214
514
  message = output.choices[0].message
515
+ DEBUG = os.getenv("SYNTH_OPENAI_DEBUG") == "1"
516
+ if DEBUG:
517
+ try:
518
+ print(f"🔍 OPENAI DEBUG (sync): finish_reason={getattr(output.choices[0], 'finish_reason', None)}")
519
+ usage = getattr(output, 'usage', None)
520
+ if usage:
521
+ print(f"🔍 OPENAI DEBUG (sync): usage prompt_tokens={getattr(usage, 'prompt_tokens', None)}, completion_tokens={getattr(usage, 'completion_tokens', None)}, total_tokens={getattr(usage, 'total_tokens', None)}")
522
+ except Exception:
523
+ pass
215
524
 
216
525
  # Convert tool calls to dict format
217
526
  tool_calls = None
@@ -228,10 +537,24 @@ class OpenAIStandard(VendorBase):
228
537
  for tc in message.tool_calls
229
538
  ]
230
539
 
540
+ # Attach basic usage if available
541
+ usage_dict = None
542
+ try:
543
+ usage_obj = getattr(output, 'usage', None)
544
+ if usage_obj is not None:
545
+ usage_dict = {
546
+ "prompt_tokens": getattr(usage_obj, 'prompt_tokens', None),
547
+ "completion_tokens": getattr(usage_obj, 'completion_tokens', None),
548
+ "total_tokens": getattr(usage_obj, 'total_tokens', None),
549
+ }
550
+ except Exception:
551
+ usage_dict = None
552
+
231
553
  lm_response = BaseLMResponse(
232
554
  raw_response=message.content or "", # Use empty string if no content
233
555
  structured_output=None,
234
556
  tool_calls=tool_calls,
557
+ usage=usage_dict,
235
558
  )
236
559
  lm_config["reasoning_effort"] = reasoning_effort
237
560
  used_cache_handler.add_to_managed_cache(
@@ -342,6 +665,18 @@ class OpenAIStandard(VendorBase):
342
665
  if model in ["o3-mini"]:
343
666
  api_params["reasoning_effort"] = reasoning_effort
344
667
 
668
+ # Normalize for external OpenAI as well in sync path
669
+ try:
670
+ base_url_obj = getattr(self.sync_client, "base_url", None)
671
+ base_url_str_sync = str(base_url_obj) if base_url_obj is not None else ""
672
+ except Exception:
673
+ base_url_str_sync = ""
674
+ if ("openai.com" in base_url_str_sync or "api.groq.com" in base_url_str_sync) and model.startswith("gpt-5"):
675
+ if "max_tokens" in api_params:
676
+ api_params["max_completion_tokens"] = api_params.pop("max_tokens")
677
+ if "temperature" in api_params:
678
+ api_params.pop("temperature", None)
679
+
345
680
  output = self.sync_client.chat.completions.create(**api_params)
346
681
 
347
682
  structured_output_api_result = response_model(**output.choices[0].message.content)
@@ -0,0 +1,243 @@
1
+ """
2
+ OpenAI Responses API extensions for OpenAIStandard vendor.
3
+
4
+ This module contains the Responses API and Harmony encoding methods
5
+ that extend the OpenAIStandard class functionality.
6
+ """
7
+
8
+ from typing import Any, Dict, List, Optional
9
+ import uuid
10
+ from pydantic import BaseModel
11
+
12
+ from synth_ai.lm.tools.base import BaseTool
13
+ from synth_ai.lm.vendors.base import BaseLMResponse
14
+ from synth_ai.lm.vendors.retries import MAX_BACKOFF
15
+ import backoff
16
+
17
+
18
+ def _silent_backoff_handler(_details):
19
+ """No-op handler to keep stdout clean while still allowing visibility via logging if desired."""
20
+ pass
21
+
22
+
23
+ DEFAULT_EXCEPTIONS_TO_RETRY = (
24
+ Exception, # Will be more specific when imported
25
+ )
26
+
27
+
28
+ class OpenAIResponsesAPIMixin:
29
+ """Mixin class providing Responses API functionality for OpenAI vendors."""
30
+
31
+ async def _hit_api_async_responses(
32
+ self,
33
+ model: str,
34
+ messages: List[Dict[str, Any]],
35
+ lm_config: Dict[str, Any],
36
+ previous_response_id: Optional[str] = None,
37
+ use_ephemeral_cache_only: bool = False,
38
+ tools: Optional[List[BaseTool]] = None,
39
+ ) -> BaseLMResponse:
40
+ """Use OpenAI Responses API for supported models."""
41
+
42
+ print(f"🔍 RESPONSES API: Called for model {model}")
43
+ print(f"🔍 RESPONSES API: previous_response_id = {previous_response_id}")
44
+
45
+ # Check if the client has responses attribute
46
+ if not hasattr(self.async_client, 'responses'):
47
+ print("🔍 RESPONSES API: Client doesn't have responses attribute, using fallback")
48
+ # Fallback - use chat completions with simulated response_id
49
+ response = await self._hit_api_async(
50
+ model=model,
51
+ messages=messages,
52
+ lm_config=lm_config,
53
+ use_ephemeral_cache_only=use_ephemeral_cache_only,
54
+ tools=tools,
55
+ )
56
+
57
+ # Add Responses API fields
58
+ if not response.response_id:
59
+ import uuid
60
+ response.response_id = str(uuid.uuid4())
61
+ response.api_type = "responses"
62
+ return response
63
+
64
+ # Use the official Responses API
65
+ try:
66
+ # Common API call params for Responses API
67
+ api_params = {
68
+ "model": model,
69
+ }
70
+
71
+ # For Responses API, we use 'input' parameter
72
+ if previous_response_id:
73
+ # Continue existing thread
74
+ api_params["previous_response_id"] = previous_response_id
75
+ # Only pass the new user input
76
+ if messages and len(messages) > 0:
77
+ # Get the last user message content
78
+ last_message = messages[-1]
79
+ api_params["input"] = last_message.get("content", "")
80
+ else:
81
+ # Start new thread - combine system and user messages into input
82
+ if messages and len(messages) > 0:
83
+ # Combine messages into a single input string
84
+ input_parts = []
85
+ for msg in messages:
86
+ role = msg.get("role", "")
87
+ content = msg.get("content", "")
88
+ if role == "system":
89
+ input_parts.append(f"System: {content}")
90
+ elif role == "user":
91
+ input_parts.append(f"User: {content}")
92
+ elif role == "assistant":
93
+ input_parts.append(f"Assistant: {content}")
94
+ api_params["input"] = "\n".join(input_parts)
95
+
96
+ # Add tools if provided
97
+ if tools and all(isinstance(tool, BaseTool) for tool in tools):
98
+ api_params["tools"] = [tool.to_openai_tool() for tool in tools]
99
+ elif tools:
100
+ api_params["tools"] = tools
101
+
102
+ # Add other parameters from lm_config if needed
103
+ if "max_tokens" in lm_config:
104
+ api_params["max_tokens"] = lm_config["max_tokens"]
105
+
106
+ print(f"🔍 RESPONSES API: Calling with params: {list(api_params.keys())}")
107
+
108
+ # Call the Responses API
109
+ response = await self.async_client.responses.create(**api_params)
110
+
111
+ print(f"🔍 RESPONSES API: Response received, type: {type(response)}")
112
+
113
+ # Extract fields from response
114
+ output_text = getattr(response, 'output_text', getattr(response, 'content', ''))
115
+ reasoning_obj = getattr(response, 'reasoning', None)
116
+ response_id = getattr(response, 'id', None)
117
+
118
+ # Debug reasoning type (only first time)
119
+ if reasoning_obj and not hasattr(self, '_reasoning_logged'):
120
+ print(f"🔍 RESPONSES API: Reasoning type: {type(reasoning_obj)}")
121
+ print(f"🔍 RESPONSES API: Reasoning attributes: {[x for x in dir(reasoning_obj) if not x.startswith('_')]}")
122
+ self._reasoning_logged = True
123
+
124
+ # Handle reasoning - it might be an object or a string
125
+ reasoning = None
126
+ if reasoning_obj:
127
+ if isinstance(reasoning_obj, str):
128
+ # Synth backend returns full reasoning as string
129
+ reasoning = reasoning_obj
130
+ else:
131
+ # OpenAI returns a Reasoning object
132
+ # Try to get summary first, but preserve entire object if no summary
133
+ if hasattr(reasoning_obj, 'summary') and reasoning_obj.summary:
134
+ reasoning = reasoning_obj.summary
135
+ else:
136
+ # Preserve the full object structure as JSON
137
+ # This includes effort level and any other fields
138
+ if hasattr(reasoning_obj, 'model_dump_json'):
139
+ reasoning = reasoning_obj.model_dump_json()
140
+ elif hasattr(reasoning_obj, 'to_dict'):
141
+ import json
142
+ reasoning = json.dumps(reasoning_obj.to_dict())
143
+ else:
144
+ reasoning = str(reasoning_obj)
145
+
146
+ # Handle tool calls if present
147
+ tool_calls = None
148
+ if hasattr(response, 'tool_calls') and response.tool_calls:
149
+ tool_calls = [
150
+ {
151
+ "id": tc.id,
152
+ "type": tc.type,
153
+ "function": {
154
+ "name": tc.function.name,
155
+ "arguments": tc.function.arguments,
156
+ },
157
+ }
158
+ for tc in response.tool_calls
159
+ ]
160
+
161
+ print(f"🔍 RESPONSES API: Extracted response_id = {response_id}")
162
+
163
+ return BaseLMResponse(
164
+ raw_response=output_text,
165
+ response_id=response_id,
166
+ reasoning=reasoning,
167
+ api_type="responses",
168
+ tool_calls=tool_calls,
169
+ )
170
+
171
+ except (AttributeError, Exception) as e:
172
+ print(f"🔍 RESPONSES API: Error calling Responses API: {e}")
173
+ # No fallback - raise the error
174
+ raise
175
+
176
+ async def _hit_api_async_harmony(
177
+ self,
178
+ model: str,
179
+ messages: List[Dict[str, Any]],
180
+ lm_config: Dict[str, Any],
181
+ previous_response_id: Optional[str] = None,
182
+ use_ephemeral_cache_only: bool = False,
183
+ tools: Optional[List[BaseTool]] = None,
184
+ ) -> BaseLMResponse:
185
+ """Use Harmony encoding for OSS-GPT models."""
186
+ if not self.harmony_available:
187
+ raise ImportError("openai-harmony package required for OSS-GPT models. Install with: pip install openai-harmony")
188
+
189
+ from openai_harmony import Message, Role, Conversation
190
+
191
+ # Convert messages to Harmony format
192
+ harmony_messages = []
193
+ for msg in messages:
194
+ role = Role.SYSTEM if msg["role"] == "system" else (
195
+ Role.USER if msg["role"] == "user" else Role.ASSISTANT
196
+ )
197
+ content = msg["content"]
198
+ # Handle multimodal content
199
+ if isinstance(content, list):
200
+ # Extract text content for now
201
+ text_parts = [part.get("text", "") for part in content if part.get("type") == "text"]
202
+ content = " ".join(text_parts)
203
+ harmony_messages.append(Message.from_role_and_content(role, content))
204
+
205
+ conv = Conversation.from_messages(harmony_messages)
206
+ tokens = self.harmony_enc.render_conversation_for_completion(conv, Role.ASSISTANT)
207
+
208
+ # For now, we'll need to integrate with Synth GPU endpoint
209
+ # This would require the actual endpoint to be configured
210
+ # Placeholder for actual Synth GPU call
211
+ import aiohttp
212
+ import os
213
+
214
+ synth_gpu_endpoint = os.getenv("SYNTH_GPU_HARMONY_ENDPOINT")
215
+ if not synth_gpu_endpoint:
216
+ raise ValueError("SYNTH_GPU_HARMONY_ENDPOINT environment variable not set")
217
+
218
+ async with aiohttp.ClientSession() as session:
219
+ async with session.post(
220
+ f"{synth_gpu_endpoint}/v1/completions",
221
+ json={
222
+ "model": model,
223
+ "prompt": tokens,
224
+ "max_tokens": lm_config.get("max_tokens", 4096),
225
+ "temperature": lm_config.get("temperature", 0.8),
226
+ }
227
+ ) as resp:
228
+ result = await resp.json()
229
+
230
+ # Parse response using Harmony
231
+ response_tokens = result.get("choices", [{}])[0].get("text", "")
232
+ parsed = self.harmony_enc.parse_messages_from_completion_tokens(response_tokens, Role.ASSISTANT)
233
+
234
+ if parsed:
235
+ assistant_msg = parsed[-1].content_text() if hasattr(parsed[-1], 'content_text') else str(parsed[-1])
236
+ else:
237
+ assistant_msg = response_tokens
238
+
239
+ return BaseLMResponse(
240
+ raw_response=assistant_msg,
241
+ response_id=previous_response_id or str(uuid.uuid4()),
242
+ api_type="harmony",
243
+ )