synth-ai 0.2.2.dev0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/cli/__init__.py +66 -0
- synth_ai/cli/balance.py +205 -0
- synth_ai/cli/calc.py +70 -0
- synth_ai/cli/demo.py +74 -0
- synth_ai/{cli.py → cli/legacy_root_backup.py} +60 -15
- synth_ai/cli/man.py +103 -0
- synth_ai/cli/recent.py +126 -0
- synth_ai/cli/root.py +184 -0
- synth_ai/cli/status.py +126 -0
- synth_ai/cli/traces.py +136 -0
- synth_ai/cli/watch.py +508 -0
- synth_ai/config/base_url.py +53 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +252 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_duckdb_v2_backup.py +413 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +760 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_synth.py +34 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth.py +1740 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth_v2_backup.py +1318 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_duckdb_v2_backup.py +386 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v2_backup.py +1352 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +4 -4
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/test_crafter_react_agent_openai_v2_backup.py +2551 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1 -1
- synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +1 -1
- synth_ai/environments/examples/crafter_classic/agent_demos/old/traces/session_crafter_episode_16_15227b68-2906-416f-acc4-d6a9b4fa5828_20250725_001154.json +1363 -1
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +3 -3
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/crafter_custom/environment.py +1 -1
- synth_ai/environments/examples/enron/dataset/corbt___enron_emails_sample_questions/default/0.0.0/293c9fe8170037e01cc9cf5834e0cd5ef6f1a6bb/dataset_info.json +1 -0
- synth_ai/environments/examples/nethack/helpers/achievements.json +64 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +1 -1
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +5 -5
- synth_ai/environments/examples/red/units/test_movement_debug.py +2 -2
- synth_ai/environments/examples/red/units/test_retry_movement.py +1 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/available_envs.json +122 -0
- synth_ai/environments/examples/sokoban/verified_puzzles.json +54987 -0
- synth_ai/environments/service/core_routes.py +1 -1
- synth_ai/experimental/synth_oss.py +446 -0
- synth_ai/learning/core.py +21 -0
- synth_ai/learning/gateway.py +4 -0
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/mipro.py +8 -0
- synth_ai/lm/__init__.py +3 -0
- synth_ai/lm/core/main.py +4 -0
- synth_ai/lm/core/main_v3.py +238 -122
- synth_ai/lm/core/vendor_clients.py +4 -0
- synth_ai/lm/provider_support/openai.py +11 -2
- synth_ai/lm/vendors/base.py +7 -0
- synth_ai/lm/vendors/openai_standard.py +339 -4
- synth_ai/lm/vendors/openai_standard_responses.py +243 -0
- synth_ai/lm/vendors/synth_client.py +155 -5
- synth_ai/lm/warmup.py +54 -17
- synth_ai/tracing/__init__.py +18 -0
- synth_ai/tracing_v1/__init__.py +29 -14
- synth_ai/tracing_v3/__init__.py +2 -2
- synth_ai/tracing_v3/abstractions.py +62 -17
- synth_ai/tracing_v3/config.py +13 -7
- synth_ai/tracing_v3/db_config.py +6 -6
- synth_ai/tracing_v3/hooks.py +1 -1
- synth_ai/tracing_v3/llm_call_record_helpers.py +350 -0
- synth_ai/tracing_v3/lm_call_record_abstractions.py +257 -0
- synth_ai/tracing_v3/session_tracer.py +5 -5
- synth_ai/tracing_v3/tests/test_concurrent_operations.py +1 -1
- synth_ai/tracing_v3/tests/test_llm_call_records.py +672 -0
- synth_ai/tracing_v3/tests/test_session_tracer.py +43 -9
- synth_ai/tracing_v3/tests/test_turso_manager.py +1 -1
- synth_ai/tracing_v3/turso/manager.py +18 -11
- synth_ai/tracing_v3/turso/models.py +1 -0
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/dashboard.py +329 -0
- synth_ai/v0/tracing/__init__.py +0 -0
- synth_ai/{tracing → v0/tracing}/base_client.py +3 -3
- synth_ai/{tracing → v0/tracing}/client_manager.py +1 -1
- synth_ai/{tracing → v0/tracing}/context.py +1 -1
- synth_ai/{tracing → v0/tracing}/decorators.py +11 -11
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/{tracing → v0/tracing}/events/manage.py +4 -4
- synth_ai/{tracing → v0/tracing}/events/scope.py +6 -6
- synth_ai/{tracing → v0/tracing}/events/store.py +3 -3
- synth_ai/{tracing → v0/tracing}/immediate_client.py +6 -6
- synth_ai/{tracing → v0/tracing}/log_client_base.py +2 -2
- synth_ai/{tracing → v0/tracing}/retry_queue.py +3 -3
- synth_ai/{tracing → v0/tracing}/trackers.py +2 -2
- synth_ai/{tracing → v0/tracing}/upload.py +4 -4
- synth_ai/v0/tracing_v1/__init__.py +16 -0
- synth_ai/{tracing_v1 → v0/tracing_v1}/base_client.py +3 -3
- synth_ai/{tracing_v1 → v0/tracing_v1}/client_manager.py +1 -1
- synth_ai/{tracing_v1 → v0/tracing_v1}/context.py +1 -1
- synth_ai/{tracing_v1 → v0/tracing_v1}/decorators.py +11 -11
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/{tracing_v1 → v0/tracing_v1}/events/manage.py +4 -4
- synth_ai/{tracing_v1 → v0/tracing_v1}/events/scope.py +6 -6
- synth_ai/{tracing_v1 → v0/tracing_v1}/events/store.py +3 -3
- synth_ai/{tracing_v1 → v0/tracing_v1}/immediate_client.py +6 -6
- synth_ai/{tracing_v1 → v0/tracing_v1}/log_client_base.py +2 -2
- synth_ai/{tracing_v1 → v0/tracing_v1}/retry_queue.py +3 -3
- synth_ai/{tracing_v1 → v0/tracing_v1}/trackers.py +2 -2
- synth_ai/{tracing_v1 → v0/tracing_v1}/upload.py +4 -4
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/METADATA +100 -5
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/RECORD +115 -75
- /synth_ai/{tracing/events/__init__.py → compound/cais.py} +0 -0
- /synth_ai/{tracing_v1/events/__init__.py → environments/examples/crafter_classic/debug_translation.py} +0 -0
- /synth_ai/{tracing → v0/tracing}/abstractions.py +0 -0
- /synth_ai/{tracing → v0/tracing}/config.py +0 -0
- /synth_ai/{tracing → v0/tracing}/local.py +0 -0
- /synth_ai/{tracing → v0/tracing}/utils.py +0 -0
- /synth_ai/{tracing_v1 → v0/tracing_v1}/abstractions.py +0 -0
- /synth_ai/{tracing_v1 → v0/tracing_v1}/config.py +0 -0
- /synth_ai/{tracing_v1 → v0/tracing_v1}/local.py +0 -0
- /synth_ai/{tracing_v1 → v0/tracing_v1}/utils.py +0 -0
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.4.dev2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,10 @@
|
|
1
1
|
from typing import Any, Dict, List, Optional, Union
|
2
|
+
import asyncio
|
3
|
+
import time
|
2
4
|
|
3
5
|
import groq
|
4
6
|
import openai
|
7
|
+
import os
|
5
8
|
import pydantic_core
|
6
9
|
from pydantic import BaseModel
|
7
10
|
|
@@ -12,6 +15,7 @@ from synth_ai.lm.tools.base import BaseTool
|
|
12
15
|
from synth_ai.lm.vendors.base import BaseLMResponse, VendorBase
|
13
16
|
from synth_ai.lm.constants import SPECIAL_BASE_TEMPS
|
14
17
|
from synth_ai.lm.vendors.retries import MAX_BACKOFF
|
18
|
+
from synth_ai.lm.vendors.openai_standard_responses import OpenAIResponsesAPIMixin
|
15
19
|
import backoff
|
16
20
|
|
17
21
|
DEFAULT_EXCEPTIONS_TO_RETRY = (
|
@@ -50,7 +54,7 @@ def _silent_backoff_handler(_details):
|
|
50
54
|
pass
|
51
55
|
|
52
56
|
|
53
|
-
class OpenAIStandard(VendorBase):
|
57
|
+
class OpenAIStandard(VendorBase, OpenAIResponsesAPIMixin):
|
54
58
|
"""
|
55
59
|
Standard OpenAI-compatible vendor implementation.
|
56
60
|
|
@@ -79,6 +83,16 @@ class OpenAIStandard(VendorBase):
|
|
79
83
|
self.async_client = async_client
|
80
84
|
self.used_for_structured_outputs = used_for_structured_outputs
|
81
85
|
self.exceptions_to_retry = exceptions_to_retry
|
86
|
+
|
87
|
+
# Initialize Harmony support for OSS models
|
88
|
+
self.harmony_available = False
|
89
|
+
self.harmony_enc = None
|
90
|
+
try:
|
91
|
+
from openai_harmony import load_harmony_encoding, HarmonyEncodingName
|
92
|
+
self.harmony_available = True
|
93
|
+
self.harmony_enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
|
94
|
+
except ImportError:
|
95
|
+
pass
|
82
96
|
|
83
97
|
@backoff.on_exception(
|
84
98
|
backoff.expo,
|
@@ -99,6 +113,15 @@ class OpenAIStandard(VendorBase):
|
|
99
113
|
assert lm_config.get("response_model", None) is None, (
|
100
114
|
"response_model is not supported for standard calls"
|
101
115
|
)
|
116
|
+
|
117
|
+
DEBUG = os.getenv("SYNTH_OPENAI_DEBUG") == "1"
|
118
|
+
if DEBUG:
|
119
|
+
print(f"🔍 OPENAI DEBUG: _hit_api_async called with:")
|
120
|
+
print(f" Model: {model}")
|
121
|
+
print(f" Messages: {len(messages)} messages")
|
122
|
+
print(f" Tools: {len(tools) if tools else 0} tools")
|
123
|
+
print(f" LM config: {lm_config}")
|
124
|
+
|
102
125
|
messages = special_orion_transform(model, messages)
|
103
126
|
used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
|
104
127
|
lm_config["reasoning_effort"] = reasoning_effort
|
@@ -106,7 +129,14 @@ class OpenAIStandard(VendorBase):
|
|
106
129
|
model, messages, lm_config=lm_config, tools=tools
|
107
130
|
)
|
108
131
|
if cache_result:
|
109
|
-
|
132
|
+
if DEBUG:
|
133
|
+
print(f"🔍 OPENAI DEBUG: Cache hit! Returning cached result")
|
134
|
+
print(f" Cache result type: {type(cache_result)}")
|
135
|
+
print(f"🔍 OPENAI DEBUG: DISABLING CACHE FOR DEBUGGING - forcing API call")
|
136
|
+
# return cache_result # Commented out to force API call
|
137
|
+
|
138
|
+
if DEBUG:
|
139
|
+
print(f"🔍 OPENAI DEBUG: Cache miss, making actual API call")
|
110
140
|
|
111
141
|
# Common API call params
|
112
142
|
api_params = {
|
@@ -126,12 +156,205 @@ class OpenAIStandard(VendorBase):
|
|
126
156
|
"temperature", SPECIAL_BASE_TEMPS.get(model, 0)
|
127
157
|
)
|
128
158
|
|
159
|
+
# Forward additional sampling / control params if provided
|
160
|
+
if lm_config.get("max_tokens") is not None:
|
161
|
+
api_params["max_tokens"] = lm_config["max_tokens"]
|
162
|
+
if lm_config.get("top_p") is not None:
|
163
|
+
api_params["top_p"] = lm_config["top_p"]
|
164
|
+
if lm_config.get("frequency_penalty") is not None:
|
165
|
+
api_params["frequency_penalty"] = lm_config["frequency_penalty"]
|
166
|
+
if lm_config.get("presence_penalty") is not None:
|
167
|
+
api_params["presence_penalty"] = lm_config["presence_penalty"]
|
168
|
+
if lm_config.get("stop") is not None:
|
169
|
+
api_params["stop"] = lm_config["stop"]
|
170
|
+
if lm_config.get("tool_choice") is not None:
|
171
|
+
api_params["tool_choice"] = lm_config["tool_choice"]
|
172
|
+
# Forward GPU preference to backend (body + header)
|
173
|
+
if lm_config.get("gpu_preference") is not None:
|
174
|
+
api_params["gpu_preference"] = lm_config["gpu_preference"]
|
175
|
+
# Also set header so proxies that read headers can honor it
|
176
|
+
hdrs = api_params.get("extra_headers", {})
|
177
|
+
hdrs["X-GPU-Preference"] = lm_config["gpu_preference"]
|
178
|
+
api_params["extra_headers"] = hdrs
|
179
|
+
# Also mirror stop_after_tool_calls into a header for robustness
|
180
|
+
try:
|
181
|
+
satc_val = None
|
182
|
+
if isinstance(lm_config.get("extra_body"), dict):
|
183
|
+
satc_val = lm_config["extra_body"].get("stop_after_tool_calls")
|
184
|
+
if satc_val is not None:
|
185
|
+
hdrs = api_params.get("extra_headers", {})
|
186
|
+
hdrs["X-Stop-After-Tool-Calls"] = str(satc_val)
|
187
|
+
api_params["extra_headers"] = hdrs
|
188
|
+
except Exception:
|
189
|
+
pass
|
190
|
+
# Forward Qwen3 chat template kwargs via extra_body when requested
|
191
|
+
if lm_config.get("enable_thinking") is not None:
|
192
|
+
api_params["extra_body"] = api_params.get("extra_body", {})
|
193
|
+
ctk = api_params["extra_body"].get("chat_template_kwargs", {})
|
194
|
+
ctk["enable_thinking"] = lm_config["enable_thinking"]
|
195
|
+
api_params["extra_body"]["chat_template_kwargs"] = ctk
|
196
|
+
# Forward arbitrary extra_body from lm_config if provided (merge)
|
197
|
+
if lm_config.get("extra_body") is not None:
|
198
|
+
# Shallow-merge top-level keys; nested keys (like chat_template_kwargs) should be provided whole
|
199
|
+
api_params["extra_body"] = {**api_params.get("extra_body", {}), **(lm_config.get("extra_body") or {})}
|
200
|
+
# Forward Qwen3 chat template kwargs via extra_body when requested
|
201
|
+
if lm_config.get("enable_thinking") is not None:
|
202
|
+
api_params["extra_body"] = api_params.get("extra_body", {})
|
203
|
+
ctk = api_params["extra_body"].get("chat_template_kwargs", {})
|
204
|
+
ctk["enable_thinking"] = lm_config["enable_thinking"]
|
205
|
+
api_params["extra_body"]["chat_template_kwargs"] = ctk
|
206
|
+
|
129
207
|
# Add reasoning_effort only for o3-mini
|
130
208
|
if model in ["o3-mini"]:
|
131
209
|
print("Reasoning effort:", reasoning_effort)
|
132
210
|
api_params["reasoning_effort"] = reasoning_effort
|
133
211
|
|
134
|
-
|
212
|
+
# Filter Synth-only params when calling external OpenAI-compatible providers
|
213
|
+
# External providers (e.g., OpenAI, Groq) reject unknown fields like
|
214
|
+
# extra_body.chat_template_kwargs or stop_after_tool_calls.
|
215
|
+
try:
|
216
|
+
base_url_obj = getattr(self.async_client, "base_url", None)
|
217
|
+
base_url_str = str(base_url_obj) if base_url_obj is not None else ""
|
218
|
+
except Exception:
|
219
|
+
base_url_str = ""
|
220
|
+
|
221
|
+
is_external_provider = (
|
222
|
+
"openai.com" in base_url_str or "api.groq.com" in base_url_str
|
223
|
+
)
|
224
|
+
|
225
|
+
if is_external_provider:
|
226
|
+
# Remove extra_body entirely; this is Synth-specific plumbing
|
227
|
+
if "extra_body" in api_params:
|
228
|
+
api_params.pop("extra_body", None)
|
229
|
+
|
230
|
+
# Also ensure we don't pass stray vendor-specific fields if present
|
231
|
+
# (defensive in case upstream added them at top-level later)
|
232
|
+
for k in ["chat_template_kwargs", "stop_after_tool_calls"]:
|
233
|
+
api_params.pop(k, None)
|
234
|
+
|
235
|
+
# GPT-5 models: parameter normalization
|
236
|
+
if model.startswith("gpt-5"):
|
237
|
+
# Require max_completion_tokens instead of max_tokens
|
238
|
+
if "max_tokens" in api_params:
|
239
|
+
api_params["max_completion_tokens"] = api_params.pop("max_tokens")
|
240
|
+
# Only default temperature=1 supported; omit custom temperature
|
241
|
+
if "temperature" in api_params:
|
242
|
+
api_params.pop("temperature", None)
|
243
|
+
|
244
|
+
# Call API with better auth error reporting
|
245
|
+
#try:
|
246
|
+
if DEBUG:
|
247
|
+
print(f"🔍 OPENAI DEBUG: Making request with params:")
|
248
|
+
print(f" Model: {api_params.get('model')}")
|
249
|
+
print(f" Messages: {len(api_params.get('messages', []))} messages")
|
250
|
+
print(f" Tools: {len(api_params.get('tools', []))} tools")
|
251
|
+
print(f" Max tokens: {api_params.get('max_tokens', 'NOT SET')}")
|
252
|
+
print(f" Temperature: {api_params.get('temperature', 'NOT SET')}")
|
253
|
+
if 'tools' in api_params:
|
254
|
+
print(f" First tool: {api_params['tools'][0]}")
|
255
|
+
print(f" FULL API PARAMS: {api_params}")
|
256
|
+
|
257
|
+
# Quiet targeted retry for OpenAI 400 tool_use_failed during tool-calling
|
258
|
+
try:
|
259
|
+
max_attempts_for_tool_use = int(os.getenv("SYNTH_TOOL_USE_RETRIES", "5"))
|
260
|
+
except Exception:
|
261
|
+
max_attempts_for_tool_use = 5
|
262
|
+
try:
|
263
|
+
backoff_seconds = float(os.getenv("SYNTH_TOOL_USE_BACKOFF_INITIAL", "0.5"))
|
264
|
+
except Exception:
|
265
|
+
backoff_seconds = 0.5
|
266
|
+
|
267
|
+
attempt_index = 0
|
268
|
+
while True:
|
269
|
+
try:
|
270
|
+
output = await self.async_client.chat.completions.create(**api_params)
|
271
|
+
break
|
272
|
+
except openai.BadRequestError as err:
|
273
|
+
# Detect tool-use failure from various SDK surfaces
|
274
|
+
should_retry = False
|
275
|
+
# 1) Body dict
|
276
|
+
body = getattr(err, "body", None)
|
277
|
+
if isinstance(body, dict):
|
278
|
+
try:
|
279
|
+
err_obj = body.get("error") if isinstance(body.get("error"), dict) else {}
|
280
|
+
code_val = err_obj.get("code")
|
281
|
+
msg_val = err_obj.get("message")
|
282
|
+
if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
|
283
|
+
should_retry = True
|
284
|
+
except Exception:
|
285
|
+
pass
|
286
|
+
# 2) Response JSON
|
287
|
+
if not should_retry:
|
288
|
+
try:
|
289
|
+
resp = getattr(err, "response", None)
|
290
|
+
if resp is not None:
|
291
|
+
j = resp.json()
|
292
|
+
if isinstance(j, dict):
|
293
|
+
err_obj = j.get("error") if isinstance(j.get("error"), dict) else {}
|
294
|
+
code_val = err_obj.get("code")
|
295
|
+
msg_val = err_obj.get("message")
|
296
|
+
if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
|
297
|
+
should_retry = True
|
298
|
+
except Exception:
|
299
|
+
pass
|
300
|
+
# 3) Fallback to string match
|
301
|
+
if not should_retry:
|
302
|
+
err_text = str(err)
|
303
|
+
if "tool_use_failed" in err_text or "Failed to call a function" in err_text:
|
304
|
+
should_retry = True
|
305
|
+
if should_retry and attempt_index + 1 < max_attempts_for_tool_use:
|
306
|
+
await asyncio.sleep(backoff_seconds)
|
307
|
+
backoff_seconds = min(backoff_seconds * 2.0, 2.0)
|
308
|
+
attempt_index += 1
|
309
|
+
continue
|
310
|
+
raise
|
311
|
+
|
312
|
+
if DEBUG:
|
313
|
+
print(f"🔍 OPENAI DEBUG: Response received:")
|
314
|
+
print(f" Type: {type(output)}")
|
315
|
+
print(f" Choices: {len(output.choices) if hasattr(output, 'choices') else 'N/A'}")
|
316
|
+
if hasattr(output, 'choices') and output.choices:
|
317
|
+
choice = output.choices[0]
|
318
|
+
print(f" Choice type: {type(choice)}")
|
319
|
+
if hasattr(choice, 'message'):
|
320
|
+
message = choice.message
|
321
|
+
print(f" Message type: {type(message)}")
|
322
|
+
print(f" Has tool_calls: {hasattr(message, 'tool_calls')}")
|
323
|
+
if hasattr(message, 'tool_calls'):
|
324
|
+
print(f" Tool calls: {message.tool_calls}")
|
325
|
+
print(f" Content: {message.content[:200] if hasattr(message, 'content') and message.content else 'None'}...")
|
326
|
+
# Show finish_reason and usage if available
|
327
|
+
try:
|
328
|
+
print(f" finish_reason: {getattr(choice, 'finish_reason', None)}")
|
329
|
+
usage = getattr(output, 'usage', None)
|
330
|
+
if usage:
|
331
|
+
print(f" usage: prompt_tokens={getattr(usage, 'prompt_tokens', None)}, completion_tokens={getattr(usage, 'completion_tokens', None)}, total_tokens={getattr(usage, 'total_tokens', None)}")
|
332
|
+
except Exception:
|
333
|
+
pass
|
334
|
+
|
335
|
+
if DEBUG:
|
336
|
+
print(f"🔍 OPENAI DEBUG: FULL RAW RESPONSE:")
|
337
|
+
if hasattr(output.choices[0].message, 'content') and output.choices[0].message.content:
|
338
|
+
print(f" FULL CONTENT:\n{output.choices[0].message.content}")
|
339
|
+
print(f" Raw choice: {choice}")
|
340
|
+
print(f" Raw message: {message}")
|
341
|
+
# except Exception as e:
|
342
|
+
# try:
|
343
|
+
# from openai import AuthenticationError as _OpenAIAuthErr # type: ignore
|
344
|
+
# except ModuleNotFoundError:
|
345
|
+
# _OpenAIAuthErr = type(e)
|
346
|
+
# if isinstance(e, _OpenAIAuthErr):
|
347
|
+
# key_preview = (os.getenv("OPENAI_API_KEY") or "")[:8]
|
348
|
+
# # Create a more informative error message but preserve the original exception
|
349
|
+
# enhanced_msg = f"Invalid API key format. Expected prefix 'sk-' or 'sk_live_'. Provided key begins with '{key_preview}'. Original error: {str(e)}"
|
350
|
+
# # Re-raise the original exception with enhanced message if possible
|
351
|
+
# if hasattr(e, 'response') and hasattr(e, 'body'):
|
352
|
+
# raise _OpenAIAuthErr(enhanced_msg, response=e.response, body=e.body) from None
|
353
|
+
# else:
|
354
|
+
# # Fallback: just re-raise the original with a print for debugging
|
355
|
+
# print(f"🔑 API Key Debug: {enhanced_msg}")
|
356
|
+
# raise e from None
|
357
|
+
# raise
|
135
358
|
message = output.choices[0].message
|
136
359
|
|
137
360
|
# Convert tool calls to dict format
|
@@ -149,10 +372,24 @@ class OpenAIStandard(VendorBase):
|
|
149
372
|
for tc in message.tool_calls
|
150
373
|
]
|
151
374
|
|
375
|
+
# Attach basic usage if available
|
376
|
+
usage_dict = None
|
377
|
+
try:
|
378
|
+
usage_obj = getattr(output, 'usage', None)
|
379
|
+
if usage_obj is not None:
|
380
|
+
usage_dict = {
|
381
|
+
"prompt_tokens": getattr(usage_obj, 'prompt_tokens', None),
|
382
|
+
"completion_tokens": getattr(usage_obj, 'completion_tokens', None),
|
383
|
+
"total_tokens": getattr(usage_obj, 'total_tokens', None),
|
384
|
+
}
|
385
|
+
except Exception:
|
386
|
+
usage_dict = None
|
387
|
+
|
152
388
|
lm_response = BaseLMResponse(
|
153
389
|
raw_response=message.content or "", # Use empty string if no content
|
154
390
|
structured_output=None,
|
155
391
|
tool_calls=tool_calls,
|
392
|
+
usage=usage_dict,
|
156
393
|
)
|
157
394
|
lm_config["reasoning_effort"] = reasoning_effort
|
158
395
|
used_cache_handler.add_to_managed_cache(
|
@@ -206,12 +443,84 @@ class OpenAIStandard(VendorBase):
|
|
206
443
|
"temperature", SPECIAL_BASE_TEMPS.get(model, 0)
|
207
444
|
)
|
208
445
|
|
446
|
+
# Forward additional sampling / control params if provided
|
447
|
+
if lm_config.get("max_tokens") is not None:
|
448
|
+
api_params["max_tokens"] = lm_config["max_tokens"]
|
449
|
+
if lm_config.get("top_p") is not None:
|
450
|
+
api_params["top_p"] = lm_config["top_p"]
|
451
|
+
if lm_config.get("frequency_penalty") is not None:
|
452
|
+
api_params["frequency_penalty"] = lm_config["frequency_penalty"]
|
453
|
+
if lm_config.get("presence_penalty") is not None:
|
454
|
+
api_params["presence_penalty"] = lm_config["presence_penalty"]
|
455
|
+
if lm_config.get("stop") is not None:
|
456
|
+
api_params["stop"] = lm_config["stop"]
|
457
|
+
if lm_config.get("tool_choice") is not None:
|
458
|
+
api_params["tool_choice"] = lm_config["tool_choice"]
|
459
|
+
|
209
460
|
# Add reasoning_effort only for o3-mini
|
210
461
|
if model in ["o3-mini"]:
|
211
462
|
api_params["reasoning_effort"] = reasoning_effort
|
212
463
|
|
213
|
-
|
464
|
+
# Sync path: apply the same targeted retry
|
465
|
+
try:
|
466
|
+
max_attempts_for_tool_use = int(os.getenv("SYNTH_TOOL_USE_RETRIES", "5"))
|
467
|
+
except Exception:
|
468
|
+
max_attempts_for_tool_use = 5
|
469
|
+
try:
|
470
|
+
backoff_seconds = float(os.getenv("SYNTH_TOOL_USE_BACKOFF_INITIAL", "0.5"))
|
471
|
+
except Exception:
|
472
|
+
backoff_seconds = 0.5
|
473
|
+
|
474
|
+
attempt_index = 0
|
475
|
+
while True:
|
476
|
+
try:
|
477
|
+
output = self.sync_client.chat.completions.create(**api_params)
|
478
|
+
break
|
479
|
+
except openai.BadRequestError as err:
|
480
|
+
should_retry = False
|
481
|
+
body = getattr(err, "body", None)
|
482
|
+
if isinstance(body, dict):
|
483
|
+
try:
|
484
|
+
err_obj = body.get("error") if isinstance(body.get("error"), dict) else {}
|
485
|
+
code_val = err_obj.get("code")
|
486
|
+
msg_val = err_obj.get("message")
|
487
|
+
if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
|
488
|
+
should_retry = True
|
489
|
+
except Exception:
|
490
|
+
pass
|
491
|
+
if not should_retry:
|
492
|
+
try:
|
493
|
+
resp = getattr(err, "response", None)
|
494
|
+
if resp is not None:
|
495
|
+
j = resp.json()
|
496
|
+
if isinstance(j, dict):
|
497
|
+
err_obj = j.get("error") if isinstance(j.get("error"), dict) else {}
|
498
|
+
code_val = err_obj.get("code")
|
499
|
+
msg_val = err_obj.get("message")
|
500
|
+
if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
|
501
|
+
should_retry = True
|
502
|
+
except Exception:
|
503
|
+
pass
|
504
|
+
if not should_retry:
|
505
|
+
err_text = str(err)
|
506
|
+
if "tool_use_failed" in err_text or "Failed to call a function" in err_text:
|
507
|
+
should_retry = True
|
508
|
+
if should_retry and attempt_index + 1 < max_attempts_for_tool_use:
|
509
|
+
time.sleep(backoff_seconds)
|
510
|
+
backoff_seconds = min(backoff_seconds * 2.0, 2.0)
|
511
|
+
attempt_index += 1
|
512
|
+
continue
|
513
|
+
raise
|
214
514
|
message = output.choices[0].message
|
515
|
+
DEBUG = os.getenv("SYNTH_OPENAI_DEBUG") == "1"
|
516
|
+
if DEBUG:
|
517
|
+
try:
|
518
|
+
print(f"🔍 OPENAI DEBUG (sync): finish_reason={getattr(output.choices[0], 'finish_reason', None)}")
|
519
|
+
usage = getattr(output, 'usage', None)
|
520
|
+
if usage:
|
521
|
+
print(f"🔍 OPENAI DEBUG (sync): usage prompt_tokens={getattr(usage, 'prompt_tokens', None)}, completion_tokens={getattr(usage, 'completion_tokens', None)}, total_tokens={getattr(usage, 'total_tokens', None)}")
|
522
|
+
except Exception:
|
523
|
+
pass
|
215
524
|
|
216
525
|
# Convert tool calls to dict format
|
217
526
|
tool_calls = None
|
@@ -228,10 +537,24 @@ class OpenAIStandard(VendorBase):
|
|
228
537
|
for tc in message.tool_calls
|
229
538
|
]
|
230
539
|
|
540
|
+
# Attach basic usage if available
|
541
|
+
usage_dict = None
|
542
|
+
try:
|
543
|
+
usage_obj = getattr(output, 'usage', None)
|
544
|
+
if usage_obj is not None:
|
545
|
+
usage_dict = {
|
546
|
+
"prompt_tokens": getattr(usage_obj, 'prompt_tokens', None),
|
547
|
+
"completion_tokens": getattr(usage_obj, 'completion_tokens', None),
|
548
|
+
"total_tokens": getattr(usage_obj, 'total_tokens', None),
|
549
|
+
}
|
550
|
+
except Exception:
|
551
|
+
usage_dict = None
|
552
|
+
|
231
553
|
lm_response = BaseLMResponse(
|
232
554
|
raw_response=message.content or "", # Use empty string if no content
|
233
555
|
structured_output=None,
|
234
556
|
tool_calls=tool_calls,
|
557
|
+
usage=usage_dict,
|
235
558
|
)
|
236
559
|
lm_config["reasoning_effort"] = reasoning_effort
|
237
560
|
used_cache_handler.add_to_managed_cache(
|
@@ -342,6 +665,18 @@ class OpenAIStandard(VendorBase):
|
|
342
665
|
if model in ["o3-mini"]:
|
343
666
|
api_params["reasoning_effort"] = reasoning_effort
|
344
667
|
|
668
|
+
# Normalize for external OpenAI as well in sync path
|
669
|
+
try:
|
670
|
+
base_url_obj = getattr(self.sync_client, "base_url", None)
|
671
|
+
base_url_str_sync = str(base_url_obj) if base_url_obj is not None else ""
|
672
|
+
except Exception:
|
673
|
+
base_url_str_sync = ""
|
674
|
+
if ("openai.com" in base_url_str_sync or "api.groq.com" in base_url_str_sync) and model.startswith("gpt-5"):
|
675
|
+
if "max_tokens" in api_params:
|
676
|
+
api_params["max_completion_tokens"] = api_params.pop("max_tokens")
|
677
|
+
if "temperature" in api_params:
|
678
|
+
api_params.pop("temperature", None)
|
679
|
+
|
345
680
|
output = self.sync_client.chat.completions.create(**api_params)
|
346
681
|
|
347
682
|
structured_output_api_result = response_model(**output.choices[0].message.content)
|
@@ -0,0 +1,243 @@
|
|
1
|
+
"""
|
2
|
+
OpenAI Responses API extensions for OpenAIStandard vendor.
|
3
|
+
|
4
|
+
This module contains the Responses API and Harmony encoding methods
|
5
|
+
that extend the OpenAIStandard class functionality.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, Dict, List, Optional
|
9
|
+
import uuid
|
10
|
+
from pydantic import BaseModel
|
11
|
+
|
12
|
+
from synth_ai.lm.tools.base import BaseTool
|
13
|
+
from synth_ai.lm.vendors.base import BaseLMResponse
|
14
|
+
from synth_ai.lm.vendors.retries import MAX_BACKOFF
|
15
|
+
import backoff
|
16
|
+
|
17
|
+
|
18
|
+
def _silent_backoff_handler(_details):
|
19
|
+
"""No-op handler to keep stdout clean while still allowing visibility via logging if desired."""
|
20
|
+
pass
|
21
|
+
|
22
|
+
|
23
|
+
DEFAULT_EXCEPTIONS_TO_RETRY = (
|
24
|
+
Exception, # Will be more specific when imported
|
25
|
+
)
|
26
|
+
|
27
|
+
|
28
|
+
class OpenAIResponsesAPIMixin:
|
29
|
+
"""Mixin class providing Responses API functionality for OpenAI vendors."""
|
30
|
+
|
31
|
+
async def _hit_api_async_responses(
|
32
|
+
self,
|
33
|
+
model: str,
|
34
|
+
messages: List[Dict[str, Any]],
|
35
|
+
lm_config: Dict[str, Any],
|
36
|
+
previous_response_id: Optional[str] = None,
|
37
|
+
use_ephemeral_cache_only: bool = False,
|
38
|
+
tools: Optional[List[BaseTool]] = None,
|
39
|
+
) -> BaseLMResponse:
|
40
|
+
"""Use OpenAI Responses API for supported models."""
|
41
|
+
|
42
|
+
print(f"🔍 RESPONSES API: Called for model {model}")
|
43
|
+
print(f"🔍 RESPONSES API: previous_response_id = {previous_response_id}")
|
44
|
+
|
45
|
+
# Check if the client has responses attribute
|
46
|
+
if not hasattr(self.async_client, 'responses'):
|
47
|
+
print("🔍 RESPONSES API: Client doesn't have responses attribute, using fallback")
|
48
|
+
# Fallback - use chat completions with simulated response_id
|
49
|
+
response = await self._hit_api_async(
|
50
|
+
model=model,
|
51
|
+
messages=messages,
|
52
|
+
lm_config=lm_config,
|
53
|
+
use_ephemeral_cache_only=use_ephemeral_cache_only,
|
54
|
+
tools=tools,
|
55
|
+
)
|
56
|
+
|
57
|
+
# Add Responses API fields
|
58
|
+
if not response.response_id:
|
59
|
+
import uuid
|
60
|
+
response.response_id = str(uuid.uuid4())
|
61
|
+
response.api_type = "responses"
|
62
|
+
return response
|
63
|
+
|
64
|
+
# Use the official Responses API
|
65
|
+
try:
|
66
|
+
# Common API call params for Responses API
|
67
|
+
api_params = {
|
68
|
+
"model": model,
|
69
|
+
}
|
70
|
+
|
71
|
+
# For Responses API, we use 'input' parameter
|
72
|
+
if previous_response_id:
|
73
|
+
# Continue existing thread
|
74
|
+
api_params["previous_response_id"] = previous_response_id
|
75
|
+
# Only pass the new user input
|
76
|
+
if messages and len(messages) > 0:
|
77
|
+
# Get the last user message content
|
78
|
+
last_message = messages[-1]
|
79
|
+
api_params["input"] = last_message.get("content", "")
|
80
|
+
else:
|
81
|
+
# Start new thread - combine system and user messages into input
|
82
|
+
if messages and len(messages) > 0:
|
83
|
+
# Combine messages into a single input string
|
84
|
+
input_parts = []
|
85
|
+
for msg in messages:
|
86
|
+
role = msg.get("role", "")
|
87
|
+
content = msg.get("content", "")
|
88
|
+
if role == "system":
|
89
|
+
input_parts.append(f"System: {content}")
|
90
|
+
elif role == "user":
|
91
|
+
input_parts.append(f"User: {content}")
|
92
|
+
elif role == "assistant":
|
93
|
+
input_parts.append(f"Assistant: {content}")
|
94
|
+
api_params["input"] = "\n".join(input_parts)
|
95
|
+
|
96
|
+
# Add tools if provided
|
97
|
+
if tools and all(isinstance(tool, BaseTool) for tool in tools):
|
98
|
+
api_params["tools"] = [tool.to_openai_tool() for tool in tools]
|
99
|
+
elif tools:
|
100
|
+
api_params["tools"] = tools
|
101
|
+
|
102
|
+
# Add other parameters from lm_config if needed
|
103
|
+
if "max_tokens" in lm_config:
|
104
|
+
api_params["max_tokens"] = lm_config["max_tokens"]
|
105
|
+
|
106
|
+
print(f"🔍 RESPONSES API: Calling with params: {list(api_params.keys())}")
|
107
|
+
|
108
|
+
# Call the Responses API
|
109
|
+
response = await self.async_client.responses.create(**api_params)
|
110
|
+
|
111
|
+
print(f"🔍 RESPONSES API: Response received, type: {type(response)}")
|
112
|
+
|
113
|
+
# Extract fields from response
|
114
|
+
output_text = getattr(response, 'output_text', getattr(response, 'content', ''))
|
115
|
+
reasoning_obj = getattr(response, 'reasoning', None)
|
116
|
+
response_id = getattr(response, 'id', None)
|
117
|
+
|
118
|
+
# Debug reasoning type (only first time)
|
119
|
+
if reasoning_obj and not hasattr(self, '_reasoning_logged'):
|
120
|
+
print(f"🔍 RESPONSES API: Reasoning type: {type(reasoning_obj)}")
|
121
|
+
print(f"🔍 RESPONSES API: Reasoning attributes: {[x for x in dir(reasoning_obj) if not x.startswith('_')]}")
|
122
|
+
self._reasoning_logged = True
|
123
|
+
|
124
|
+
# Handle reasoning - it might be an object or a string
|
125
|
+
reasoning = None
|
126
|
+
if reasoning_obj:
|
127
|
+
if isinstance(reasoning_obj, str):
|
128
|
+
# Synth backend returns full reasoning as string
|
129
|
+
reasoning = reasoning_obj
|
130
|
+
else:
|
131
|
+
# OpenAI returns a Reasoning object
|
132
|
+
# Try to get summary first, but preserve entire object if no summary
|
133
|
+
if hasattr(reasoning_obj, 'summary') and reasoning_obj.summary:
|
134
|
+
reasoning = reasoning_obj.summary
|
135
|
+
else:
|
136
|
+
# Preserve the full object structure as JSON
|
137
|
+
# This includes effort level and any other fields
|
138
|
+
if hasattr(reasoning_obj, 'model_dump_json'):
|
139
|
+
reasoning = reasoning_obj.model_dump_json()
|
140
|
+
elif hasattr(reasoning_obj, 'to_dict'):
|
141
|
+
import json
|
142
|
+
reasoning = json.dumps(reasoning_obj.to_dict())
|
143
|
+
else:
|
144
|
+
reasoning = str(reasoning_obj)
|
145
|
+
|
146
|
+
# Handle tool calls if present
|
147
|
+
tool_calls = None
|
148
|
+
if hasattr(response, 'tool_calls') and response.tool_calls:
|
149
|
+
tool_calls = [
|
150
|
+
{
|
151
|
+
"id": tc.id,
|
152
|
+
"type": tc.type,
|
153
|
+
"function": {
|
154
|
+
"name": tc.function.name,
|
155
|
+
"arguments": tc.function.arguments,
|
156
|
+
},
|
157
|
+
}
|
158
|
+
for tc in response.tool_calls
|
159
|
+
]
|
160
|
+
|
161
|
+
print(f"🔍 RESPONSES API: Extracted response_id = {response_id}")
|
162
|
+
|
163
|
+
return BaseLMResponse(
|
164
|
+
raw_response=output_text,
|
165
|
+
response_id=response_id,
|
166
|
+
reasoning=reasoning,
|
167
|
+
api_type="responses",
|
168
|
+
tool_calls=tool_calls,
|
169
|
+
)
|
170
|
+
|
171
|
+
except (AttributeError, Exception) as e:
|
172
|
+
print(f"🔍 RESPONSES API: Error calling Responses API: {e}")
|
173
|
+
# No fallback - raise the error
|
174
|
+
raise
|
175
|
+
|
176
|
+
async def _hit_api_async_harmony(
|
177
|
+
self,
|
178
|
+
model: str,
|
179
|
+
messages: List[Dict[str, Any]],
|
180
|
+
lm_config: Dict[str, Any],
|
181
|
+
previous_response_id: Optional[str] = None,
|
182
|
+
use_ephemeral_cache_only: bool = False,
|
183
|
+
tools: Optional[List[BaseTool]] = None,
|
184
|
+
) -> BaseLMResponse:
|
185
|
+
"""Use Harmony encoding for OSS-GPT models."""
|
186
|
+
if not self.harmony_available:
|
187
|
+
raise ImportError("openai-harmony package required for OSS-GPT models. Install with: pip install openai-harmony")
|
188
|
+
|
189
|
+
from openai_harmony import Message, Role, Conversation
|
190
|
+
|
191
|
+
# Convert messages to Harmony format
|
192
|
+
harmony_messages = []
|
193
|
+
for msg in messages:
|
194
|
+
role = Role.SYSTEM if msg["role"] == "system" else (
|
195
|
+
Role.USER if msg["role"] == "user" else Role.ASSISTANT
|
196
|
+
)
|
197
|
+
content = msg["content"]
|
198
|
+
# Handle multimodal content
|
199
|
+
if isinstance(content, list):
|
200
|
+
# Extract text content for now
|
201
|
+
text_parts = [part.get("text", "") for part in content if part.get("type") == "text"]
|
202
|
+
content = " ".join(text_parts)
|
203
|
+
harmony_messages.append(Message.from_role_and_content(role, content))
|
204
|
+
|
205
|
+
conv = Conversation.from_messages(harmony_messages)
|
206
|
+
tokens = self.harmony_enc.render_conversation_for_completion(conv, Role.ASSISTANT)
|
207
|
+
|
208
|
+
# For now, we'll need to integrate with Synth GPU endpoint
|
209
|
+
# This would require the actual endpoint to be configured
|
210
|
+
# Placeholder for actual Synth GPU call
|
211
|
+
import aiohttp
|
212
|
+
import os
|
213
|
+
|
214
|
+
synth_gpu_endpoint = os.getenv("SYNTH_GPU_HARMONY_ENDPOINT")
|
215
|
+
if not synth_gpu_endpoint:
|
216
|
+
raise ValueError("SYNTH_GPU_HARMONY_ENDPOINT environment variable not set")
|
217
|
+
|
218
|
+
async with aiohttp.ClientSession() as session:
|
219
|
+
async with session.post(
|
220
|
+
f"{synth_gpu_endpoint}/v1/completions",
|
221
|
+
json={
|
222
|
+
"model": model,
|
223
|
+
"prompt": tokens,
|
224
|
+
"max_tokens": lm_config.get("max_tokens", 4096),
|
225
|
+
"temperature": lm_config.get("temperature", 0.8),
|
226
|
+
}
|
227
|
+
) as resp:
|
228
|
+
result = await resp.json()
|
229
|
+
|
230
|
+
# Parse response using Harmony
|
231
|
+
response_tokens = result.get("choices", [{}])[0].get("text", "")
|
232
|
+
parsed = self.harmony_enc.parse_messages_from_completion_tokens(response_tokens, Role.ASSISTANT)
|
233
|
+
|
234
|
+
if parsed:
|
235
|
+
assistant_msg = parsed[-1].content_text() if hasattr(parsed[-1], 'content_text') else str(parsed[-1])
|
236
|
+
else:
|
237
|
+
assistant_msg = response_tokens
|
238
|
+
|
239
|
+
return BaseLMResponse(
|
240
|
+
raw_response=assistant_msg,
|
241
|
+
response_id=previous_response_id or str(uuid.uuid4()),
|
242
|
+
api_type="harmony",
|
243
|
+
)
|