synth-ai 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/cli/__init__.py +66 -0
- synth_ai/cli/balance.py +205 -0
- synth_ai/cli/calc.py +70 -0
- synth_ai/cli/demo.py +74 -0
- synth_ai/{cli.py → cli/legacy_root_backup.py} +60 -15
- synth_ai/cli/man.py +103 -0
- synth_ai/cli/recent.py +126 -0
- synth_ai/cli/root.py +184 -0
- synth_ai/cli/status.py +126 -0
- synth_ai/cli/traces.py +136 -0
- synth_ai/cli/watch.py +508 -0
- synth_ai/config/base_url.py +53 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +252 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_duckdb_v2_backup.py +413 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +646 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_synth.py +34 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth.py +1740 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth_v2_backup.py +1318 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_duckdb_v2_backup.py +386 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v2_backup.py +1352 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/test_crafter_react_agent_openai_v2_backup.py +2551 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1 -1
- synth_ai/environments/examples/crafter_classic/agent_demos/old/traces/session_crafter_episode_16_15227b68-2906-416f-acc4-d6a9b4fa5828_20250725_001154.json +1363 -1
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +3 -3
- synth_ai/environments/examples/enron/dataset/corbt___enron_emails_sample_questions/default/0.0.0/293c9fe8170037e01cc9cf5834e0cd5ef6f1a6bb/dataset_info.json +1 -0
- synth_ai/environments/examples/nethack/helpers/achievements.json +64 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +1 -1
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +5 -5
- synth_ai/environments/examples/red/units/test_movement_debug.py +2 -2
- synth_ai/environments/examples/red/units/test_retry_movement.py +1 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/available_envs.json +122 -0
- synth_ai/environments/examples/sokoban/verified_puzzles.json +54987 -0
- synth_ai/experimental/synth_oss.py +446 -0
- synth_ai/learning/core.py +21 -0
- synth_ai/learning/gateway.py +4 -0
- synth_ai/learning/prompts/mipro.py +0 -0
- synth_ai/lm/__init__.py +3 -0
- synth_ai/lm/core/main.py +4 -0
- synth_ai/lm/core/main_v3.py +68 -13
- synth_ai/lm/core/vendor_clients.py +4 -0
- synth_ai/lm/provider_support/openai.py +11 -2
- synth_ai/lm/vendors/base.py +7 -0
- synth_ai/lm/vendors/openai_standard.py +339 -4
- synth_ai/lm/vendors/openai_standard_responses.py +243 -0
- synth_ai/lm/vendors/synth_client.py +155 -5
- synth_ai/lm/warmup.py +54 -17
- synth_ai/tracing/__init__.py +18 -0
- synth_ai/tracing_v1/__init__.py +29 -14
- synth_ai/tracing_v3/config.py +13 -7
- synth_ai/tracing_v3/db_config.py +6 -6
- synth_ai/tracing_v3/turso/manager.py +8 -8
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/dashboard.py +329 -0
- synth_ai/v0/tracing/__init__.py +0 -0
- synth_ai/{tracing → v0/tracing}/base_client.py +3 -3
- synth_ai/{tracing → v0/tracing}/client_manager.py +1 -1
- synth_ai/{tracing → v0/tracing}/context.py +1 -1
- synth_ai/{tracing → v0/tracing}/decorators.py +11 -11
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/{tracing → v0/tracing}/events/manage.py +4 -4
- synth_ai/{tracing → v0/tracing}/events/scope.py +6 -6
- synth_ai/{tracing → v0/tracing}/events/store.py +3 -3
- synth_ai/{tracing → v0/tracing}/immediate_client.py +6 -6
- synth_ai/{tracing → v0/tracing}/log_client_base.py +2 -2
- synth_ai/{tracing → v0/tracing}/retry_queue.py +3 -3
- synth_ai/{tracing → v0/tracing}/trackers.py +2 -2
- synth_ai/{tracing → v0/tracing}/upload.py +4 -4
- synth_ai/v0/tracing_v1/__init__.py +16 -0
- synth_ai/{tracing_v1 → v0/tracing_v1}/base_client.py +3 -3
- synth_ai/{tracing_v1 → v0/tracing_v1}/client_manager.py +1 -1
- synth_ai/{tracing_v1 → v0/tracing_v1}/context.py +1 -1
- synth_ai/{tracing_v1 → v0/tracing_v1}/decorators.py +11 -11
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/{tracing_v1 → v0/tracing_v1}/events/manage.py +4 -4
- synth_ai/{tracing_v1 → v0/tracing_v1}/events/scope.py +6 -6
- synth_ai/{tracing_v1 → v0/tracing_v1}/events/store.py +3 -3
- synth_ai/{tracing_v1 → v0/tracing_v1}/immediate_client.py +6 -6
- synth_ai/{tracing_v1 → v0/tracing_v1}/log_client_base.py +2 -2
- synth_ai/{tracing_v1 → v0/tracing_v1}/retry_queue.py +3 -3
- synth_ai/{tracing_v1 → v0/tracing_v1}/trackers.py +2 -2
- synth_ai/{tracing_v1 → v0/tracing_v1}/upload.py +4 -4
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/METADATA +98 -4
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/RECORD +98 -62
- /synth_ai/{tracing/events/__init__.py → environments/examples/crafter_classic/debug_translation.py} +0 -0
- /synth_ai/{tracing_v1/events/__init__.py → learning/prompts/gepa.py} +0 -0
- /synth_ai/{tracing → v0/tracing}/abstractions.py +0 -0
- /synth_ai/{tracing → v0/tracing}/config.py +0 -0
- /synth_ai/{tracing → v0/tracing}/local.py +0 -0
- /synth_ai/{tracing → v0/tracing}/utils.py +0 -0
- /synth_ai/{tracing_v1 → v0/tracing_v1}/abstractions.py +0 -0
- /synth_ai/{tracing_v1 → v0/tracing_v1}/config.py +0 -0
- /synth_ai/{tracing_v1 → v0/tracing_v1}/local.py +0 -0
- /synth_ai/{tracing_v1 → v0/tracing_v1}/utils.py +0 -0
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,243 @@
|
|
1
|
+
"""
|
2
|
+
OpenAI Responses API extensions for OpenAIStandard vendor.
|
3
|
+
|
4
|
+
This module contains the Responses API and Harmony encoding methods
|
5
|
+
that extend the OpenAIStandard class functionality.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, Dict, List, Optional
|
9
|
+
import uuid
|
10
|
+
from pydantic import BaseModel
|
11
|
+
|
12
|
+
from synth_ai.lm.tools.base import BaseTool
|
13
|
+
from synth_ai.lm.vendors.base import BaseLMResponse
|
14
|
+
from synth_ai.lm.vendors.retries import MAX_BACKOFF
|
15
|
+
import backoff
|
16
|
+
|
17
|
+
|
18
|
+
def _silent_backoff_handler(_details):
|
19
|
+
"""No-op handler to keep stdout clean while still allowing visibility via logging if desired."""
|
20
|
+
pass
|
21
|
+
|
22
|
+
|
23
|
+
DEFAULT_EXCEPTIONS_TO_RETRY = (
|
24
|
+
Exception, # Will be more specific when imported
|
25
|
+
)
|
26
|
+
|
27
|
+
|
28
|
+
class OpenAIResponsesAPIMixin:
|
29
|
+
"""Mixin class providing Responses API functionality for OpenAI vendors."""
|
30
|
+
|
31
|
+
async def _hit_api_async_responses(
|
32
|
+
self,
|
33
|
+
model: str,
|
34
|
+
messages: List[Dict[str, Any]],
|
35
|
+
lm_config: Dict[str, Any],
|
36
|
+
previous_response_id: Optional[str] = None,
|
37
|
+
use_ephemeral_cache_only: bool = False,
|
38
|
+
tools: Optional[List[BaseTool]] = None,
|
39
|
+
) -> BaseLMResponse:
|
40
|
+
"""Use OpenAI Responses API for supported models."""
|
41
|
+
|
42
|
+
print(f"🔍 RESPONSES API: Called for model {model}")
|
43
|
+
print(f"🔍 RESPONSES API: previous_response_id = {previous_response_id}")
|
44
|
+
|
45
|
+
# Check if the client has responses attribute
|
46
|
+
if not hasattr(self.async_client, 'responses'):
|
47
|
+
print("🔍 RESPONSES API: Client doesn't have responses attribute, using fallback")
|
48
|
+
# Fallback - use chat completions with simulated response_id
|
49
|
+
response = await self._hit_api_async(
|
50
|
+
model=model,
|
51
|
+
messages=messages,
|
52
|
+
lm_config=lm_config,
|
53
|
+
use_ephemeral_cache_only=use_ephemeral_cache_only,
|
54
|
+
tools=tools,
|
55
|
+
)
|
56
|
+
|
57
|
+
# Add Responses API fields
|
58
|
+
if not response.response_id:
|
59
|
+
import uuid
|
60
|
+
response.response_id = str(uuid.uuid4())
|
61
|
+
response.api_type = "responses"
|
62
|
+
return response
|
63
|
+
|
64
|
+
# Use the official Responses API
|
65
|
+
try:
|
66
|
+
# Common API call params for Responses API
|
67
|
+
api_params = {
|
68
|
+
"model": model,
|
69
|
+
}
|
70
|
+
|
71
|
+
# For Responses API, we use 'input' parameter
|
72
|
+
if previous_response_id:
|
73
|
+
# Continue existing thread
|
74
|
+
api_params["previous_response_id"] = previous_response_id
|
75
|
+
# Only pass the new user input
|
76
|
+
if messages and len(messages) > 0:
|
77
|
+
# Get the last user message content
|
78
|
+
last_message = messages[-1]
|
79
|
+
api_params["input"] = last_message.get("content", "")
|
80
|
+
else:
|
81
|
+
# Start new thread - combine system and user messages into input
|
82
|
+
if messages and len(messages) > 0:
|
83
|
+
# Combine messages into a single input string
|
84
|
+
input_parts = []
|
85
|
+
for msg in messages:
|
86
|
+
role = msg.get("role", "")
|
87
|
+
content = msg.get("content", "")
|
88
|
+
if role == "system":
|
89
|
+
input_parts.append(f"System: {content}")
|
90
|
+
elif role == "user":
|
91
|
+
input_parts.append(f"User: {content}")
|
92
|
+
elif role == "assistant":
|
93
|
+
input_parts.append(f"Assistant: {content}")
|
94
|
+
api_params["input"] = "\n".join(input_parts)
|
95
|
+
|
96
|
+
# Add tools if provided
|
97
|
+
if tools and all(isinstance(tool, BaseTool) for tool in tools):
|
98
|
+
api_params["tools"] = [tool.to_openai_tool() for tool in tools]
|
99
|
+
elif tools:
|
100
|
+
api_params["tools"] = tools
|
101
|
+
|
102
|
+
# Add other parameters from lm_config if needed
|
103
|
+
if "max_tokens" in lm_config:
|
104
|
+
api_params["max_tokens"] = lm_config["max_tokens"]
|
105
|
+
|
106
|
+
print(f"🔍 RESPONSES API: Calling with params: {list(api_params.keys())}")
|
107
|
+
|
108
|
+
# Call the Responses API
|
109
|
+
response = await self.async_client.responses.create(**api_params)
|
110
|
+
|
111
|
+
print(f"🔍 RESPONSES API: Response received, type: {type(response)}")
|
112
|
+
|
113
|
+
# Extract fields from response
|
114
|
+
output_text = getattr(response, 'output_text', getattr(response, 'content', ''))
|
115
|
+
reasoning_obj = getattr(response, 'reasoning', None)
|
116
|
+
response_id = getattr(response, 'id', None)
|
117
|
+
|
118
|
+
# Debug reasoning type (only first time)
|
119
|
+
if reasoning_obj and not hasattr(self, '_reasoning_logged'):
|
120
|
+
print(f"🔍 RESPONSES API: Reasoning type: {type(reasoning_obj)}")
|
121
|
+
print(f"🔍 RESPONSES API: Reasoning attributes: {[x for x in dir(reasoning_obj) if not x.startswith('_')]}")
|
122
|
+
self._reasoning_logged = True
|
123
|
+
|
124
|
+
# Handle reasoning - it might be an object or a string
|
125
|
+
reasoning = None
|
126
|
+
if reasoning_obj:
|
127
|
+
if isinstance(reasoning_obj, str):
|
128
|
+
# Synth backend returns full reasoning as string
|
129
|
+
reasoning = reasoning_obj
|
130
|
+
else:
|
131
|
+
# OpenAI returns a Reasoning object
|
132
|
+
# Try to get summary first, but preserve entire object if no summary
|
133
|
+
if hasattr(reasoning_obj, 'summary') and reasoning_obj.summary:
|
134
|
+
reasoning = reasoning_obj.summary
|
135
|
+
else:
|
136
|
+
# Preserve the full object structure as JSON
|
137
|
+
# This includes effort level and any other fields
|
138
|
+
if hasattr(reasoning_obj, 'model_dump_json'):
|
139
|
+
reasoning = reasoning_obj.model_dump_json()
|
140
|
+
elif hasattr(reasoning_obj, 'to_dict'):
|
141
|
+
import json
|
142
|
+
reasoning = json.dumps(reasoning_obj.to_dict())
|
143
|
+
else:
|
144
|
+
reasoning = str(reasoning_obj)
|
145
|
+
|
146
|
+
# Handle tool calls if present
|
147
|
+
tool_calls = None
|
148
|
+
if hasattr(response, 'tool_calls') and response.tool_calls:
|
149
|
+
tool_calls = [
|
150
|
+
{
|
151
|
+
"id": tc.id,
|
152
|
+
"type": tc.type,
|
153
|
+
"function": {
|
154
|
+
"name": tc.function.name,
|
155
|
+
"arguments": tc.function.arguments,
|
156
|
+
},
|
157
|
+
}
|
158
|
+
for tc in response.tool_calls
|
159
|
+
]
|
160
|
+
|
161
|
+
print(f"🔍 RESPONSES API: Extracted response_id = {response_id}")
|
162
|
+
|
163
|
+
return BaseLMResponse(
|
164
|
+
raw_response=output_text,
|
165
|
+
response_id=response_id,
|
166
|
+
reasoning=reasoning,
|
167
|
+
api_type="responses",
|
168
|
+
tool_calls=tool_calls,
|
169
|
+
)
|
170
|
+
|
171
|
+
except (AttributeError, Exception) as e:
|
172
|
+
print(f"🔍 RESPONSES API: Error calling Responses API: {e}")
|
173
|
+
# No fallback - raise the error
|
174
|
+
raise
|
175
|
+
|
176
|
+
async def _hit_api_async_harmony(
|
177
|
+
self,
|
178
|
+
model: str,
|
179
|
+
messages: List[Dict[str, Any]],
|
180
|
+
lm_config: Dict[str, Any],
|
181
|
+
previous_response_id: Optional[str] = None,
|
182
|
+
use_ephemeral_cache_only: bool = False,
|
183
|
+
tools: Optional[List[BaseTool]] = None,
|
184
|
+
) -> BaseLMResponse:
|
185
|
+
"""Use Harmony encoding for OSS-GPT models."""
|
186
|
+
if not self.harmony_available:
|
187
|
+
raise ImportError("openai-harmony package required for OSS-GPT models. Install with: pip install openai-harmony")
|
188
|
+
|
189
|
+
from openai_harmony import Message, Role, Conversation
|
190
|
+
|
191
|
+
# Convert messages to Harmony format
|
192
|
+
harmony_messages = []
|
193
|
+
for msg in messages:
|
194
|
+
role = Role.SYSTEM if msg["role"] == "system" else (
|
195
|
+
Role.USER if msg["role"] == "user" else Role.ASSISTANT
|
196
|
+
)
|
197
|
+
content = msg["content"]
|
198
|
+
# Handle multimodal content
|
199
|
+
if isinstance(content, list):
|
200
|
+
# Extract text content for now
|
201
|
+
text_parts = [part.get("text", "") for part in content if part.get("type") == "text"]
|
202
|
+
content = " ".join(text_parts)
|
203
|
+
harmony_messages.append(Message.from_role_and_content(role, content))
|
204
|
+
|
205
|
+
conv = Conversation.from_messages(harmony_messages)
|
206
|
+
tokens = self.harmony_enc.render_conversation_for_completion(conv, Role.ASSISTANT)
|
207
|
+
|
208
|
+
# For now, we'll need to integrate with Synth GPU endpoint
|
209
|
+
# This would require the actual endpoint to be configured
|
210
|
+
# Placeholder for actual Synth GPU call
|
211
|
+
import aiohttp
|
212
|
+
import os
|
213
|
+
|
214
|
+
synth_gpu_endpoint = os.getenv("SYNTH_GPU_HARMONY_ENDPOINT")
|
215
|
+
if not synth_gpu_endpoint:
|
216
|
+
raise ValueError("SYNTH_GPU_HARMONY_ENDPOINT environment variable not set")
|
217
|
+
|
218
|
+
async with aiohttp.ClientSession() as session:
|
219
|
+
async with session.post(
|
220
|
+
f"{synth_gpu_endpoint}/v1/completions",
|
221
|
+
json={
|
222
|
+
"model": model,
|
223
|
+
"prompt": tokens,
|
224
|
+
"max_tokens": lm_config.get("max_tokens", 4096),
|
225
|
+
"temperature": lm_config.get("temperature", 0.8),
|
226
|
+
}
|
227
|
+
) as resp:
|
228
|
+
result = await resp.json()
|
229
|
+
|
230
|
+
# Parse response using Harmony
|
231
|
+
response_tokens = result.get("choices", [{}])[0].get("text", "")
|
232
|
+
parsed = self.harmony_enc.parse_messages_from_completion_tokens(response_tokens, Role.ASSISTANT)
|
233
|
+
|
234
|
+
if parsed:
|
235
|
+
assistant_msg = parsed[-1].content_text() if hasattr(parsed[-1], 'content_text') else str(parsed[-1])
|
236
|
+
else:
|
237
|
+
assistant_msg = response_tokens
|
238
|
+
|
239
|
+
return BaseLMResponse(
|
240
|
+
raw_response=assistant_msg,
|
241
|
+
response_id=previous_response_id or str(uuid.uuid4()),
|
242
|
+
api_type="harmony",
|
243
|
+
)
|
@@ -47,6 +47,73 @@ class AsyncSynthClient:
|
|
47
47
|
},
|
48
48
|
)
|
49
49
|
|
50
|
+
async def responses_create(
|
51
|
+
self,
|
52
|
+
model: str,
|
53
|
+
messages: List[Dict[str, Any]],
|
54
|
+
previous_response_id: Optional[str] = None,
|
55
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
56
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = "auto",
|
57
|
+
**kwargs,
|
58
|
+
) -> Dict[str, Any]:
|
59
|
+
"""
|
60
|
+
Create response using Synth Responses API.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
model: Model identifier
|
64
|
+
messages: List of message dicts with 'role' and 'content'
|
65
|
+
previous_response_id: Optional ID of previous response for thread management
|
66
|
+
tools: List of available tools
|
67
|
+
tool_choice: How to choose tools
|
68
|
+
**kwargs: Additional parameters
|
69
|
+
|
70
|
+
Returns:
|
71
|
+
Responses API-compatible response dict
|
72
|
+
"""
|
73
|
+
await self._ensure_client()
|
74
|
+
|
75
|
+
# Build payload for Responses API
|
76
|
+
payload = {
|
77
|
+
"model": model,
|
78
|
+
"messages": messages,
|
79
|
+
}
|
80
|
+
|
81
|
+
# Add optional parameters
|
82
|
+
if previous_response_id is not None:
|
83
|
+
payload["previous_response_id"] = previous_response_id
|
84
|
+
if tools is not None:
|
85
|
+
payload["tools"] = tools
|
86
|
+
payload["tool_choice"] = tool_choice
|
87
|
+
|
88
|
+
# Add any additional kwargs
|
89
|
+
payload.update(kwargs)
|
90
|
+
|
91
|
+
# Retry logic
|
92
|
+
for attempt in range(self.config.max_retries):
|
93
|
+
try:
|
94
|
+
url = f"{self.config.get_base_url_without_v1()}/v1/responses"
|
95
|
+
response = await self._client.post(url, json=payload)
|
96
|
+
|
97
|
+
if response.status_code == 200:
|
98
|
+
return response.json()
|
99
|
+
|
100
|
+
# Handle rate limits with exponential backoff
|
101
|
+
if response.status_code == 429:
|
102
|
+
wait_time = 2**attempt
|
103
|
+
await asyncio.sleep(wait_time)
|
104
|
+
continue
|
105
|
+
|
106
|
+
# Other errors
|
107
|
+
response.raise_for_status()
|
108
|
+
|
109
|
+
except Exception as e:
|
110
|
+
if attempt == self.config.max_retries - 1:
|
111
|
+
logger.error(f"Failed after {self.config.max_retries} attempts: {e}")
|
112
|
+
raise
|
113
|
+
await asyncio.sleep(2**attempt)
|
114
|
+
|
115
|
+
raise Exception(f"Failed to create response after {self.config.max_retries} attempts")
|
116
|
+
|
50
117
|
async def chat_completions_create(
|
51
118
|
self,
|
52
119
|
model: str,
|
@@ -118,12 +185,32 @@ class AsyncSynthClient:
|
|
118
185
|
# Retry logic
|
119
186
|
for attempt in range(self.config.max_retries):
|
120
187
|
try:
|
121
|
-
|
122
|
-
|
123
|
-
)
|
124
|
-
|
188
|
+
url = f"{self.config.get_base_url_without_v1()}/v1/chat/completions"
|
189
|
+
print(f"🔍 SYNTH DEBUG: Making request to URL: {url}")
|
190
|
+
print(f"🔍 SYNTH DEBUG: Payload keys: {list(payload.keys())}")
|
191
|
+
if 'tools' in payload:
|
192
|
+
print(f"🔍 SYNTH DEBUG: Tools in payload: {len(payload['tools'])} tools")
|
193
|
+
print(f"🔍 SYNTH DEBUG: First tool: {json.dumps(payload['tools'][0], indent=2)}")
|
194
|
+
|
195
|
+
response = await self._client.post(url, json=payload)
|
196
|
+
|
197
|
+
print(f"🔍 SYNTH DEBUG: Response status: {response.status_code}")
|
198
|
+
|
125
199
|
if response.status_code == 200:
|
126
|
-
|
200
|
+
result = response.json()
|
201
|
+
print(f"🔍 SYNTH DEBUG: Response keys: {list(result.keys())}")
|
202
|
+
if 'choices' in result and result['choices']:
|
203
|
+
choice = result['choices'][0]
|
204
|
+
print(f"🔍 SYNTH DEBUG: Choice keys: {list(choice.keys())}")
|
205
|
+
if 'message' in choice:
|
206
|
+
message = choice['message']
|
207
|
+
print(f"🔍 SYNTH DEBUG: Message keys: {list(message.keys())}")
|
208
|
+
if 'tool_calls' in message:
|
209
|
+
print(f"🔍 SYNTH DEBUG: Tool calls: {message['tool_calls']}")
|
210
|
+
else:
|
211
|
+
print(f"🔍 SYNTH DEBUG: No tool_calls in message")
|
212
|
+
print(f"🔍 SYNTH DEBUG: Message content: {message.get('content', 'N/A')[:200]}...")
|
213
|
+
return result
|
127
214
|
|
128
215
|
# Handle rate limits with exponential backoff
|
129
216
|
if response.status_code == 429:
|
@@ -191,6 +278,69 @@ class SyncSynthClient:
|
|
191
278
|
},
|
192
279
|
)
|
193
280
|
|
281
|
+
def responses_create(
|
282
|
+
self,
|
283
|
+
model: str,
|
284
|
+
messages: List[Dict[str, Any]],
|
285
|
+
previous_response_id: Optional[str] = None,
|
286
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
287
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = "auto",
|
288
|
+
**kwargs,
|
289
|
+
) -> Dict[str, Any]:
|
290
|
+
"""
|
291
|
+
Create response using Synth Responses API (sync version).
|
292
|
+
|
293
|
+
See AsyncSynthClient.responses_create for full parameter documentation.
|
294
|
+
"""
|
295
|
+
self._ensure_client()
|
296
|
+
|
297
|
+
# Build payload for Responses API
|
298
|
+
payload = {
|
299
|
+
"model": model,
|
300
|
+
"messages": messages,
|
301
|
+
}
|
302
|
+
|
303
|
+
# Add optional parameters
|
304
|
+
if previous_response_id is not None:
|
305
|
+
payload["previous_response_id"] = previous_response_id
|
306
|
+
if tools is not None:
|
307
|
+
payload["tools"] = tools
|
308
|
+
payload["tool_choice"] = tool_choice
|
309
|
+
|
310
|
+
# Add any additional kwargs
|
311
|
+
payload.update(kwargs)
|
312
|
+
|
313
|
+
# Retry logic
|
314
|
+
for attempt in range(self.config.max_retries):
|
315
|
+
try:
|
316
|
+
response = self._client.post(
|
317
|
+
f"{self.config.get_base_url_without_v1()}/v1/responses", json=payload
|
318
|
+
)
|
319
|
+
|
320
|
+
if response.status_code == 200:
|
321
|
+
return response.json()
|
322
|
+
|
323
|
+
# Handle rate limits
|
324
|
+
if response.status_code == 429:
|
325
|
+
wait_time = 2**attempt
|
326
|
+
logger.warning(f"Rate limited, waiting {wait_time}s...")
|
327
|
+
import time
|
328
|
+
time.sleep(wait_time)
|
329
|
+
continue
|
330
|
+
|
331
|
+
# Other errors
|
332
|
+
error_msg = f"API error {response.status_code}: {response.text}"
|
333
|
+
logger.error(error_msg)
|
334
|
+
raise Exception(error_msg)
|
335
|
+
|
336
|
+
except httpx.TimeoutException:
|
337
|
+
if attempt < self.config.max_retries - 1:
|
338
|
+
logger.warning(f"Timeout on attempt {attempt + 1}, retrying...")
|
339
|
+
continue
|
340
|
+
raise
|
341
|
+
|
342
|
+
raise Exception(f"Failed after {self.config.max_retries} attempts")
|
343
|
+
|
194
344
|
def chat_completions_create(
|
195
345
|
self, model: str, messages: List[Dict[str, Any]], **kwargs
|
196
346
|
) -> Dict[str, Any]:
|
synth_ai/lm/warmup.py
CHANGED
@@ -6,6 +6,8 @@ Handles model preloading and warmup polling.
|
|
6
6
|
import httpx
|
7
7
|
import asyncio
|
8
8
|
import logging
|
9
|
+
import sys
|
10
|
+
import time
|
9
11
|
from typing import Optional, Dict, Any
|
10
12
|
from datetime import datetime, timedelta
|
11
13
|
from .config import SynthConfig
|
@@ -46,9 +48,10 @@ _warmup_status = WarmupStatus()
|
|
46
48
|
async def warmup_synth_model(
|
47
49
|
model_name: str,
|
48
50
|
config: Optional[SynthConfig] = None,
|
49
|
-
max_attempts: int =
|
51
|
+
max_attempts: Optional[int] = None,
|
50
52
|
force: bool = False,
|
51
53
|
verbose: bool = True,
|
54
|
+
gpu_preference: Optional[str] = None,
|
52
55
|
) -> bool:
|
53
56
|
"""
|
54
57
|
Warm up a model on the Synth backend using fire-and-forget approach.
|
@@ -73,6 +76,8 @@ async def warmup_synth_model(
|
|
73
76
|
|
74
77
|
async with httpx.AsyncClient() as client:
|
75
78
|
headers = {"Authorization": f"Bearer {config.api_key}"}
|
79
|
+
if gpu_preference:
|
80
|
+
headers["X-GPU-Preference"] = gpu_preference
|
76
81
|
|
77
82
|
# Step 1: Start warmup (fire and forget)
|
78
83
|
try:
|
@@ -84,8 +89,11 @@ async def warmup_synth_model(
|
|
84
89
|
|
85
90
|
if response.status_code == 200:
|
86
91
|
response_data = response.json()
|
87
|
-
if response_data.get("status") in ["warming", "already_warming"
|
92
|
+
if response_data.get("status") in ["warming", "already_warming"]:
|
88
93
|
pass
|
94
|
+
elif response_data.get("status") == "already_warmed":
|
95
|
+
_warmup_status.mark_warm(model_name)
|
96
|
+
return True
|
89
97
|
else:
|
90
98
|
logger.warning(f"Unexpected warmup response: {response_data}")
|
91
99
|
else:
|
@@ -98,8 +106,13 @@ async def warmup_synth_model(
|
|
98
106
|
logger.warning(f"Warmup start failed: {e}")
|
99
107
|
return False
|
100
108
|
|
101
|
-
# Step 2: Poll status until ready
|
102
|
-
|
109
|
+
# Step 2: Poll status until ready (indefinite by default)
|
110
|
+
spinner = "|/-\\"
|
111
|
+
spin_idx = 0
|
112
|
+
start_time = time.time()
|
113
|
+
attempt = 0
|
114
|
+
while True:
|
115
|
+
attempt += 1
|
103
116
|
try:
|
104
117
|
response = await client.get(
|
105
118
|
f"{config.get_base_url_without_v1()}/warmup/status/{model_name}",
|
@@ -113,34 +126,58 @@ async def warmup_synth_model(
|
|
113
126
|
|
114
127
|
if status == "warmed":
|
115
128
|
_warmup_status.mark_warm(model_name)
|
129
|
+
# Final spinner line as success
|
130
|
+
elapsed = int(time.time() - start_time)
|
131
|
+
sys.stdout.write(f"\r✅ Warmed {model_name} in {elapsed}s \n")
|
132
|
+
sys.stdout.flush()
|
116
133
|
return True
|
117
134
|
elif status == "failed":
|
118
135
|
error = status_data.get("error", "Unknown error")
|
119
136
|
logger.error(f"❌ Warmup failed for {model_name}: {error}")
|
137
|
+
sys.stdout.write(f"\r❌ Warmup failed: {error} \n")
|
138
|
+
sys.stdout.flush()
|
120
139
|
return False
|
121
|
-
elif status == "warming":
|
122
|
-
# Still warming up, continue polling
|
123
|
-
pass
|
124
|
-
elif status == "not_started":
|
125
|
-
# Warmup hasn't started yet, continue polling
|
126
|
-
pass
|
127
140
|
else:
|
128
|
-
|
141
|
+
# Treat unknown statuses (e.g., "cold") as still warming
|
142
|
+
elapsed = int(time.time() - start_time)
|
143
|
+
wheel = spinner[spin_idx % len(spinner)]
|
144
|
+
spin_idx += 1
|
145
|
+
label = status or "pending"
|
146
|
+
sys.stdout.write(
|
147
|
+
f"\r⏳ Warming {model_name} [{wheel}] status={label} elapsed={elapsed}s"
|
148
|
+
)
|
149
|
+
sys.stdout.flush()
|
129
150
|
|
130
151
|
# Short sleep between status checks
|
131
152
|
await asyncio.sleep(2.0)
|
132
153
|
|
133
154
|
except httpx.TimeoutException:
|
134
|
-
|
135
|
-
|
155
|
+
# Continue polling; update spinner line
|
156
|
+
elapsed = int(time.time() - start_time)
|
157
|
+
wheel = spinner[spin_idx % len(spinner)]
|
158
|
+
spin_idx += 1
|
159
|
+
sys.stdout.write(
|
160
|
+
f"\r⏳ Warming {model_name} [{wheel}] status=timeout elapsed={elapsed}s"
|
161
|
+
)
|
162
|
+
sys.stdout.flush()
|
136
163
|
await asyncio.sleep(1.0)
|
137
164
|
except Exception as e:
|
138
|
-
|
139
|
-
|
165
|
+
# Continue polling; update spinner line with error label
|
166
|
+
elapsed = int(time.time() - start_time)
|
167
|
+
wheel = spinner[spin_idx % len(spinner)]
|
168
|
+
spin_idx += 1
|
169
|
+
sys.stdout.write(
|
170
|
+
f"\r⏳ Warming {model_name} [{wheel}] status=error elapsed={elapsed}s"
|
171
|
+
)
|
172
|
+
sys.stdout.flush()
|
140
173
|
await asyncio.sleep(1.0)
|
141
174
|
|
142
|
-
|
143
|
-
|
175
|
+
# Optional max_attempts for callers who want a cap
|
176
|
+
if max_attempts is not None and attempt >= max_attempts:
|
177
|
+
logger.error(f"Failed to warm up {model_name} after {max_attempts} status checks")
|
178
|
+
sys.stdout.write("\n")
|
179
|
+
sys.stdout.flush()
|
180
|
+
return False
|
144
181
|
|
145
182
|
|
146
183
|
def get_warmup_status() -> WarmupStatus:
|
synth_ai/tracing/__init__.py
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
import sys as _sys
|
2
|
+
import importlib as _importlib
|
3
|
+
|
4
|
+
_pkg = _importlib.import_module('synth_ai.v0.tracing')
|
5
|
+
_sys.modules[__name__] = _pkg
|
6
|
+
|
7
|
+
_SUBMODULES = [
|
8
|
+
'abstractions', 'base_client', 'client_manager', 'config', 'context',
|
9
|
+
'decorators', 'immediate_client', 'local', 'log_client_base', 'retry_queue',
|
10
|
+
'trackers', 'upload', 'utils'
|
11
|
+
]
|
12
|
+
for _m in _SUBMODULES:
|
13
|
+
_sys.modules[f'{__name__}.{_m}'] = _importlib.import_module(f'synth_ai.v0.tracing.{_m}')
|
14
|
+
|
15
|
+
_events_pkg = _importlib.import_module('synth_ai.v0.tracing.events')
|
16
|
+
_sys.modules[f'{__name__}.events'] = _events_pkg
|
17
|
+
for _m in ['manage', 'scope', 'store']:
|
18
|
+
_sys.modules[f'{__name__}.events.{_m}'] = _importlib.import_module(f'synth_ai.v0.tracing.events.{_m}')
|
synth_ai/tracing_v1/__init__.py
CHANGED
@@ -1,16 +1,31 @@
|
|
1
|
-
import
|
1
|
+
import sys as _sys
|
2
|
+
import importlib as _importlib
|
2
3
|
|
3
|
-
#
|
4
|
-
|
5
|
-
|
6
|
-
"Please use synth_ai.tracing_v2 instead. "
|
7
|
-
"Backend upload functionality is no longer supported in v1.",
|
8
|
-
DeprecationWarning,
|
9
|
-
stacklevel=2,
|
10
|
-
)
|
4
|
+
# Forward top-level package
|
5
|
+
_pkg = _importlib.import_module('synth_ai.v0.tracing_v1')
|
6
|
+
_sys.modules[__name__] = _pkg
|
11
7
|
|
12
|
-
#
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
8
|
+
# Explicitly forward submodules so `synth_ai.tracing_v1.X` works
|
9
|
+
_SUBMODULES = [
|
10
|
+
'abstractions',
|
11
|
+
'base_client',
|
12
|
+
'client_manager',
|
13
|
+
'config',
|
14
|
+
'context',
|
15
|
+
'decorators',
|
16
|
+
'immediate_client',
|
17
|
+
'local',
|
18
|
+
'log_client_base',
|
19
|
+
'retry_queue',
|
20
|
+
'trackers',
|
21
|
+
'upload',
|
22
|
+
'utils',
|
23
|
+
]
|
24
|
+
for _m in _SUBMODULES:
|
25
|
+
_sys.modules[f'{__name__}.{_m}'] = _importlib.import_module(f'synth_ai.v0.tracing_v1.{_m}')
|
26
|
+
|
27
|
+
# Forward events package and its submodules
|
28
|
+
_events_pkg = _importlib.import_module('synth_ai.v0.tracing_v1.events')
|
29
|
+
_sys.modules[f'{__name__}.events'] = _events_pkg
|
30
|
+
for _m in ['manage', 'scope', 'store']:
|
31
|
+
_sys.modules[f'{__name__}.events.{_m}'] = _importlib.import_module(f'synth_ai.v0.tracing_v1.events.{_m}')
|
synth_ai/tracing_v3/config.py
CHANGED
@@ -9,15 +9,21 @@ class TursoConfig:
|
|
9
9
|
"""Configuration for Turso/sqld connection."""
|
10
10
|
|
11
11
|
# Default values matching serve.sh
|
12
|
-
DEFAULT_DB_FILE = "synth_ai.db"
|
12
|
+
DEFAULT_DB_FILE = "traces/v3/synth_ai.db"
|
13
13
|
DEFAULT_HTTP_PORT = 8080
|
14
14
|
|
15
15
|
# Local embedded database for async SQLAlchemy
|
16
|
-
#
|
17
|
-
|
18
|
-
"
|
19
|
-
|
20
|
-
|
16
|
+
# Resolve to the actual SQLite file used by sqld if the base path is a directory
|
17
|
+
def _resolve_sqlite_db_url() -> str: # type: ignore[no-redef]
|
18
|
+
base_path = os.path.abspath(os.getenv("SQLD_DB_PATH", "traces/v3/synth_ai.db"))
|
19
|
+
# If sqld is managing this DB, the real SQLite file lives under dbs/default/data
|
20
|
+
candidate = os.path.join(base_path, "dbs", "default", "data")
|
21
|
+
if os.path.isdir(base_path) and os.path.exists(candidate):
|
22
|
+
return f"sqlite+aiosqlite:///{candidate}"
|
23
|
+
return f"sqlite+aiosqlite:///{base_path}"
|
24
|
+
|
25
|
+
# Use env override if provided; otherwise resolve based on SQLD layout
|
26
|
+
db_url: str = os.getenv("TURSO_LOCAL_DB_URL", _resolve_sqlite_db_url())
|
21
27
|
|
22
28
|
# Remote database sync configuration
|
23
29
|
sync_url: str = os.getenv("TURSO_DATABASE_URL", "")
|
@@ -40,7 +46,7 @@ class TursoConfig:
|
|
40
46
|
|
41
47
|
# Daemon settings (for local sqld) - match serve.sh defaults
|
42
48
|
sqld_binary: str = os.getenv("SQLD_BINARY", "sqld")
|
43
|
-
sqld_db_path: str = os.getenv("SQLD_DB_PATH", "synth_ai.db")
|
49
|
+
sqld_db_path: str = os.getenv("SQLD_DB_PATH", "traces/v3/synth_ai.db")
|
44
50
|
sqld_http_port: int = int(os.getenv("SQLD_HTTP_PORT", "8080"))
|
45
51
|
sqld_idle_shutdown: int = int(os.getenv("SQLD_IDLE_SHUTDOWN", "0")) # 0 = no idle shutdown
|
46
52
|
|