synth-ai 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/cli/__init__.py +66 -0
- synth_ai/cli/balance.py +205 -0
- synth_ai/cli/calc.py +70 -0
- synth_ai/cli/demo.py +74 -0
- synth_ai/{cli.py → cli/legacy_root_backup.py} +60 -15
- synth_ai/cli/man.py +103 -0
- synth_ai/cli/recent.py +126 -0
- synth_ai/cli/root.py +184 -0
- synth_ai/cli/status.py +126 -0
- synth_ai/cli/traces.py +136 -0
- synth_ai/cli/watch.py +508 -0
- synth_ai/config/base_url.py +53 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +252 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_duckdb_v2_backup.py +413 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +646 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_synth.py +34 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth.py +1740 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth_v2_backup.py +1318 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_duckdb_v2_backup.py +386 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v2_backup.py +1352 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/test_crafter_react_agent_openai_v2_backup.py +2551 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1 -1
- synth_ai/environments/examples/crafter_classic/agent_demos/old/traces/session_crafter_episode_16_15227b68-2906-416f-acc4-d6a9b4fa5828_20250725_001154.json +1363 -1
- synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +3 -3
- synth_ai/environments/examples/enron/dataset/corbt___enron_emails_sample_questions/default/0.0.0/293c9fe8170037e01cc9cf5834e0cd5ef6f1a6bb/dataset_info.json +1 -0
- synth_ai/environments/examples/nethack/helpers/achievements.json +64 -0
- synth_ai/environments/examples/red/units/test_exploration_strategy.py +1 -1
- synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +5 -5
- synth_ai/environments/examples/red/units/test_movement_debug.py +2 -2
- synth_ai/environments/examples/red/units/test_retry_movement.py +1 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/available_envs.json +122 -0
- synth_ai/environments/examples/sokoban/verified_puzzles.json +54987 -0
- synth_ai/experimental/synth_oss.py +446 -0
- synth_ai/learning/core.py +21 -0
- synth_ai/learning/gateway.py +4 -0
- synth_ai/learning/prompts/mipro.py +0 -0
- synth_ai/lm/__init__.py +3 -0
- synth_ai/lm/core/main.py +4 -0
- synth_ai/lm/core/main_v3.py +68 -13
- synth_ai/lm/core/vendor_clients.py +4 -0
- synth_ai/lm/provider_support/openai.py +11 -2
- synth_ai/lm/vendors/base.py +7 -0
- synth_ai/lm/vendors/openai_standard.py +339 -4
- synth_ai/lm/vendors/openai_standard_responses.py +243 -0
- synth_ai/lm/vendors/synth_client.py +155 -5
- synth_ai/lm/warmup.py +54 -17
- synth_ai/tracing/__init__.py +18 -0
- synth_ai/tracing_v1/__init__.py +29 -14
- synth_ai/tracing_v3/config.py +13 -7
- synth_ai/tracing_v3/db_config.py +6 -6
- synth_ai/tracing_v3/turso/manager.py +8 -8
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/dashboard.py +329 -0
- synth_ai/v0/tracing/__init__.py +0 -0
- synth_ai/{tracing → v0/tracing}/base_client.py +3 -3
- synth_ai/{tracing → v0/tracing}/client_manager.py +1 -1
- synth_ai/{tracing → v0/tracing}/context.py +1 -1
- synth_ai/{tracing → v0/tracing}/decorators.py +11 -11
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/{tracing → v0/tracing}/events/manage.py +4 -4
- synth_ai/{tracing → v0/tracing}/events/scope.py +6 -6
- synth_ai/{tracing → v0/tracing}/events/store.py +3 -3
- synth_ai/{tracing → v0/tracing}/immediate_client.py +6 -6
- synth_ai/{tracing → v0/tracing}/log_client_base.py +2 -2
- synth_ai/{tracing → v0/tracing}/retry_queue.py +3 -3
- synth_ai/{tracing → v0/tracing}/trackers.py +2 -2
- synth_ai/{tracing → v0/tracing}/upload.py +4 -4
- synth_ai/v0/tracing_v1/__init__.py +16 -0
- synth_ai/{tracing_v1 → v0/tracing_v1}/base_client.py +3 -3
- synth_ai/{tracing_v1 → v0/tracing_v1}/client_manager.py +1 -1
- synth_ai/{tracing_v1 → v0/tracing_v1}/context.py +1 -1
- synth_ai/{tracing_v1 → v0/tracing_v1}/decorators.py +11 -11
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/{tracing_v1 → v0/tracing_v1}/events/manage.py +4 -4
- synth_ai/{tracing_v1 → v0/tracing_v1}/events/scope.py +6 -6
- synth_ai/{tracing_v1 → v0/tracing_v1}/events/store.py +3 -3
- synth_ai/{tracing_v1 → v0/tracing_v1}/immediate_client.py +6 -6
- synth_ai/{tracing_v1 → v0/tracing_v1}/log_client_base.py +2 -2
- synth_ai/{tracing_v1 → v0/tracing_v1}/retry_queue.py +3 -3
- synth_ai/{tracing_v1 → v0/tracing_v1}/trackers.py +2 -2
- synth_ai/{tracing_v1 → v0/tracing_v1}/upload.py +4 -4
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/METADATA +98 -4
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/RECORD +98 -62
- /synth_ai/{tracing/events/__init__.py → environments/examples/crafter_classic/debug_translation.py} +0 -0
- /synth_ai/{tracing_v1/events/__init__.py → learning/prompts/gepa.py} +0 -0
- /synth_ai/{tracing → v0/tracing}/abstractions.py +0 -0
- /synth_ai/{tracing → v0/tracing}/config.py +0 -0
- /synth_ai/{tracing → v0/tracing}/local.py +0 -0
- /synth_ai/{tracing → v0/tracing}/utils.py +0 -0
- /synth_ai/{tracing_v1 → v0/tracing_v1}/abstractions.py +0 -0
- /synth_ai/{tracing_v1 → v0/tracing_v1}/config.py +0 -0
- /synth_ai/{tracing_v1 → v0/tracing_v1}/local.py +0 -0
- /synth_ai/{tracing_v1 → v0/tracing_v1}/utils.py +0 -0
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,446 @@
|
|
1
|
+
|
2
|
+
"""
|
3
|
+
Synth OSS Integration Module
|
4
|
+
|
5
|
+
This module provides integration with Synth's open-source inference and training APIs
|
6
|
+
from the monorepo learning_v2 service. All APIs are OpenAI-compatible.
|
7
|
+
|
8
|
+
Learning V2 APIs available for integration via lm/:
|
9
|
+
"""
|
10
|
+
|
11
|
+
# API Configuration
|
12
|
+
SYNTH_BACKEND_URL = ""
|
13
|
+
|
14
|
+
# Learning V2 Modal Service URLs
|
15
|
+
LEARNING_V2_URLS = {
|
16
|
+
"dev": "https://synth-laboratories-dev--learning-v2-service-dev-fastapi-app.modal.run",
|
17
|
+
"prod": "https://synth-laboratories-prod--learning-v2-service-prod-fastapi-app.modal.run",
|
18
|
+
"main": "https://synth-laboratories--learning-v2-service-fastapi-app.modal.run"
|
19
|
+
}
|
20
|
+
|
21
|
+
# ============================================================================
|
22
|
+
# HEALTH & STATUS APIS
|
23
|
+
# ============================================================================
|
24
|
+
|
25
|
+
HEALTH_APIS = {
|
26
|
+
"basic_health": {
|
27
|
+
"method": "GET",
|
28
|
+
"endpoint": "/health",
|
29
|
+
"description": "Basic health check",
|
30
|
+
"response": {"status": "healthy"}
|
31
|
+
},
|
32
|
+
"detailed_health": {
|
33
|
+
"method": "GET",
|
34
|
+
"endpoint": "/learning/health",
|
35
|
+
"description": "Detailed health check including GPU function availability",
|
36
|
+
"response": {"status": "healthy", "components": {...}}
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
# ============================================================================
|
41
|
+
# FILE MANAGEMENT APIS
|
42
|
+
# ============================================================================
|
43
|
+
|
44
|
+
FILE_MANAGEMENT_APIS = {
|
45
|
+
"upload_file": {
|
46
|
+
"method": "POST",
|
47
|
+
"endpoint": "/files",
|
48
|
+
"description": "Upload a file for fine-tuning (JSONL format)",
|
49
|
+
"request": "multipart/form-data with 'file' and 'purpose'='fine-tune'",
|
50
|
+
"response": {
|
51
|
+
"id": "file-abc123",
|
52
|
+
"object": "file",
|
53
|
+
"bytes": 1234,
|
54
|
+
"created_at": 1638360000,
|
55
|
+
"filename": "data.jsonl",
|
56
|
+
"purpose": "fine-tune"
|
57
|
+
}
|
58
|
+
},
|
59
|
+
"list_files": {
|
60
|
+
"method": "GET",
|
61
|
+
"endpoint": "/files",
|
62
|
+
"description": "List all uploaded files",
|
63
|
+
"params": {"limit": "optional"},
|
64
|
+
"response": {"object": "list", "data": ["file_objects"]}
|
65
|
+
},
|
66
|
+
"get_file": {
|
67
|
+
"method": "GET",
|
68
|
+
"endpoint": "/files/{file_id}",
|
69
|
+
"description": "Get file metadata by ID",
|
70
|
+
"response": "Single file object with metadata"
|
71
|
+
},
|
72
|
+
"delete_file": {
|
73
|
+
"method": "DELETE",
|
74
|
+
"endpoint": "/files/{file_id}",
|
75
|
+
"description": "Delete a file",
|
76
|
+
"response": {"id": "file-abc123", "object": "file", "deleted": True}
|
77
|
+
},
|
78
|
+
"get_file_content": {
|
79
|
+
"method": "GET",
|
80
|
+
"endpoint": "/files/{file_id}/content",
|
81
|
+
"description": "Download raw file content",
|
82
|
+
"response": "Raw file content stream"
|
83
|
+
}
|
84
|
+
}
|
85
|
+
|
86
|
+
# ============================================================================
|
87
|
+
# TRAINING/FINE-TUNING APIS
|
88
|
+
# ============================================================================
|
89
|
+
|
90
|
+
TRAINING_APIS = {
|
91
|
+
"create_training_job": {
|
92
|
+
"method": "POST",
|
93
|
+
"endpoint": "/fine_tuning/jobs",
|
94
|
+
"description": "Create a fine-tuning job",
|
95
|
+
"request": {
|
96
|
+
"model": "Qwen/Qwen3-0.5B",
|
97
|
+
"training_file": "file-abc123",
|
98
|
+
"training_type": "sft", # or "dpo"
|
99
|
+
"hyperparameters": {...},
|
100
|
+
"suffix": "optional"
|
101
|
+
},
|
102
|
+
"response": {
|
103
|
+
"object": "fine_tuning.job",
|
104
|
+
"id": "ftjob-xyz789",
|
105
|
+
"model": "...",
|
106
|
+
"status": "validating_files",
|
107
|
+
"training_file": "file-abc123",
|
108
|
+
"hyperparameters": {...}
|
109
|
+
}
|
110
|
+
},
|
111
|
+
"list_training_jobs": {
|
112
|
+
"method": "GET",
|
113
|
+
"endpoint": "/fine_tuning/jobs",
|
114
|
+
"description": "List all training jobs",
|
115
|
+
"response": {"object": "list", "data": ["job_objects"]}
|
116
|
+
},
|
117
|
+
"get_training_job": {
|
118
|
+
"method": "GET",
|
119
|
+
"endpoint": "/fine_tuning/jobs/{job_id}",
|
120
|
+
"description": "Get training job status",
|
121
|
+
"response": {
|
122
|
+
"object": "fine_tuning.job",
|
123
|
+
"id": "ftjob-xyz789",
|
124
|
+
"status": "running", # or "completed", "failed", "cancelled"
|
125
|
+
"fine_tuned_model": "ft:model:suffix" # when completed
|
126
|
+
}
|
127
|
+
},
|
128
|
+
"cancel_training_job": {
|
129
|
+
"method": "POST",
|
130
|
+
"endpoint": "/fine_tuning/jobs/{job_id}/cancel",
|
131
|
+
"description": "Cancel a running training job",
|
132
|
+
"response": {"object": "fine_tuning.job", "id": "...", "status": "cancelled"}
|
133
|
+
},
|
134
|
+
"get_training_events": {
|
135
|
+
"method": "GET",
|
136
|
+
"endpoint": "/fine_tuning/jobs/{job_id}/events",
|
137
|
+
"description": "Get training logs/events",
|
138
|
+
"response": {
|
139
|
+
"object": "list",
|
140
|
+
"data": [{
|
141
|
+
"object": "fine_tuning.job.event",
|
142
|
+
"level": "info",
|
143
|
+
"message": "Training started",
|
144
|
+
"created_at": 1638360000
|
145
|
+
}]
|
146
|
+
}
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
# ============================================================================
|
151
|
+
# INFERENCE APIS
|
152
|
+
# ============================================================================
|
153
|
+
|
154
|
+
INFERENCE_APIS = {
|
155
|
+
"chat_completions": {
|
156
|
+
"method": "POST",
|
157
|
+
"endpoint": "/chat/completions",
|
158
|
+
"description": "OpenAI-compatible chat completions for base and fine-tuned models",
|
159
|
+
"request": {
|
160
|
+
"model": "Qwen/Qwen3-0.5B", # or "ft:Qwen/Qwen3-0.5B:suffix"
|
161
|
+
"messages": [{"role": "user", "content": "Hello"}],
|
162
|
+
"temperature": 0.7,
|
163
|
+
"max_tokens": 100,
|
164
|
+
"top_p": 1.0,
|
165
|
+
"stream": False, # Set to True for streaming
|
166
|
+
"tools": [], # For tool calling
|
167
|
+
"tool_choice": "auto"
|
168
|
+
},
|
169
|
+
"response": {
|
170
|
+
"id": "chatcmpl-123",
|
171
|
+
"object": "chat.completion",
|
172
|
+
"created": 1638360000,
|
173
|
+
"model": "Qwen/Qwen3-0.5B",
|
174
|
+
"choices": [{
|
175
|
+
"index": 0,
|
176
|
+
"message": {
|
177
|
+
"role": "assistant",
|
178
|
+
"content": "Hello! How can I help you?",
|
179
|
+
"tool_calls": [] # If tools were used
|
180
|
+
},
|
181
|
+
"finish_reason": "stop"
|
182
|
+
}],
|
183
|
+
"usage": {
|
184
|
+
"prompt_tokens": 10,
|
185
|
+
"completion_tokens": 20,
|
186
|
+
"total_tokens": 30
|
187
|
+
}
|
188
|
+
},
|
189
|
+
"streaming": "Server-sent events with data: {...} format when stream=True"
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
# ============================================================================
|
194
|
+
# MODEL MANAGEMENT APIS
|
195
|
+
# ============================================================================
|
196
|
+
|
197
|
+
MODEL_APIS = {
|
198
|
+
"list_models": {
|
199
|
+
"method": "GET",
|
200
|
+
"endpoint": "/models",
|
201
|
+
"description": "List all available models (base and fine-tuned)",
|
202
|
+
"response": {
|
203
|
+
"object": "list",
|
204
|
+
"data": [{
|
205
|
+
"id": "Qwen/Qwen3-0.5B",
|
206
|
+
"object": "model",
|
207
|
+
"created": 1638360000,
|
208
|
+
"owned_by": "learning_v2"
|
209
|
+
}]
|
210
|
+
}
|
211
|
+
},
|
212
|
+
"delete_model": {
|
213
|
+
"method": "DELETE",
|
214
|
+
"endpoint": "/models/{model_id}",
|
215
|
+
"description": "Delete a fine-tuned model",
|
216
|
+
"response": {"id": "ft:model:suffix", "object": "model", "deleted": True}
|
217
|
+
}
|
218
|
+
}
|
219
|
+
|
220
|
+
# ============================================================================
|
221
|
+
# SUPPORTED MODELS
|
222
|
+
# ============================================================================
|
223
|
+
|
224
|
+
SUPPORTED_MODELS = {
|
225
|
+
"base_models": [
|
226
|
+
# Qwen 3 family
|
227
|
+
"Qwen/Qwen3-0.6B",
|
228
|
+
"Qwen/Qwen3-1.8B",
|
229
|
+
"Qwen/Qwen3-8B",
|
230
|
+
"Qwen/Qwen3-14B",
|
231
|
+
"Qwen/Qwen3-32B",
|
232
|
+
# Qwen 2.5 family
|
233
|
+
"Qwen/Qwen2.5-0.5B-Instruct",
|
234
|
+
"Qwen/Qwen2.5-1.5B-Instruct",
|
235
|
+
"Qwen/Qwen2.5-3B-Instruct",
|
236
|
+
"Qwen/Qwen2.5-7B-Instruct",
|
237
|
+
"Qwen/Qwen2.5-14B-Instruct",
|
238
|
+
"Qwen/Qwen2.5-32B-Instruct",
|
239
|
+
"Qwen/Qwen2.5-72B-Instruct",
|
240
|
+
# OLMo 2 family
|
241
|
+
"allenai/OLMo-2-0425-1B-Instruct",
|
242
|
+
"allenai/OLMo-2-1124-7B-Instruct",
|
243
|
+
"allenai/OLMo-2-1124-13B-Instruct"
|
244
|
+
],
|
245
|
+
"training_types": ["sft", "dpo"],
|
246
|
+
"gpu_types": ["A10G", "L40S", "A100", "H100"],
|
247
|
+
"features": [
|
248
|
+
"Tool calling",
|
249
|
+
"Streaming responses",
|
250
|
+
"Fine-tuning",
|
251
|
+
"Multi-GPU training",
|
252
|
+
"JSONL data format",
|
253
|
+
"OpenAI compatibility"
|
254
|
+
]
|
255
|
+
}
|
256
|
+
|
257
|
+
# ============================================================================
|
258
|
+
# INTEGRATION PLAN – Synth OSS
|
259
|
+
# ==========================================================================
|
260
|
+
"""
|
261
|
+
GPU & Resource Selection
|
262
|
+
------------------------
|
263
|
+
Synth OSS decides the GPU based on the `ModelFamily` definition:
|
264
|
+
• Each `ModelConfig` lists `inference_gpus` and `training_gpus`.
|
265
|
+
• The API’s `InferenceRouter` calls `_select_gpu_for_model`, which chooses the **first recommended GPU** returned by `get_model_gpu_recommendations` (usually the `default_inference_gpu`).
|
266
|
+
• By default the server picks the first recommended GPU, **but** we can request
|
267
|
+
another GPU type via a custom header that the server *can* opt to honor:
|
268
|
+
|
269
|
+
X-GPU-Preference: L40S # or A10G, A100, H100
|
270
|
+
|
271
|
+
The current dev deployment already forwards this header to `InferenceRouter`,
|
272
|
+
so adding it makes the GPU configurable without breaking existing behaviour.
|
273
|
+
|
274
|
+
`InferenceConfig` therefore gets a new optional field:
|
275
|
+
|
276
|
+
```python
|
277
|
+
class InferenceConfig(BaseModel):
|
278
|
+
stream: bool = False
|
279
|
+
gpu_preference: Optional[str] = None # "A10G", "L40S", "A100", "H100"
|
280
|
+
# ...future knobs (temperature, max_tokens, etc.)
|
281
|
+
```
|
282
|
+
|
283
|
+
LM will include `gpu_preference` as that header when `backend="synth"`. If the
|
284
|
+
header is omitted or the value is not valid for the chosen model, the server
|
285
|
+
falls back to its default selection. This keeps the API forward-compatible and
|
286
|
+
provides explicit GPU control when supported.
|
287
|
+
|
288
|
+
Only two parts of synth-ai need to change for Synth OSS inference:
|
289
|
+
|
290
|
+
1. LM() class (synth_ai.lm)
|
291
|
+
2. The async respond(...) coroutine on that class
|
292
|
+
|
293
|
+
Extend LM with backend="synth"; when selected, issue POST requests to
|
294
|
+
`${LEARNING_V2_URL}/chat/completions`, supporting both streaming and
|
295
|
+
non-streaming modes and returning the same dict structure as today.
|
296
|
+
|
297
|
+
Everything else (file upload, fine-tuning, model listing) lives in the
|
298
|
+
`synth_ai.learning` package and does NOT affect LM:
|
299
|
+
|
300
|
+
synth_ai/learning/
|
301
|
+
├─ files.py
|
302
|
+
├─ training.py
|
303
|
+
├─ models.py
|
304
|
+
├─ client.py
|
305
|
+
└─ types.py
|
306
|
+
|
307
|
+
Warm-up flow
|
308
|
+
~~~~~~~~~~~~
|
309
|
+
`learning_v2` exposes `POST /warmup/{model_id}` and `GET /warmup/status/{model_id}`
|
310
|
+
(via the Render proxy). We can exploit that to reduce first-token latency.
|
311
|
+
|
312
|
+
LM API addition:
|
313
|
+
|
314
|
+
```python
|
315
|
+
async def warmup(self, model: str | None = None, gpu_preference: str | None = None) -> dict:
|
316
|
+
"""Pre-spin the container & load weights for *model* on the requested GPU.
|
317
|
+
Returns the JSON response from /warmup. If *model* is None we warm-up
|
318
|
+
`self.model`.
|
319
|
+
"""
|
320
|
+
```
|
321
|
+
|
322
|
+
Implementation sketch (backend == "synth")
|
323
|
+
------------------------------------------
|
324
|
+
1. Determine `model_id = model or self.model`.
|
325
|
+
2. Build headers:
|
326
|
+
```python
|
327
|
+
headers = {}
|
328
|
+
if gpu_preference:
|
329
|
+
headers["X-GPU-Preference"] = gpu_preference
|
330
|
+
```
|
331
|
+
3. `POST f"{url}/warmup/{model_id}"`.
|
332
|
+
4. Optionally call `GET /warmup/status/{model_id}` in a loop until
|
333
|
+
`status == "ready"` (exponential backoff) – or expose a separate
|
334
|
+
`await LM.warmup_status(model)` helper.
|
335
|
+
|
336
|
+
The method is a *no-op* for the default (OpenAI) backend so existing code keeps
|
337
|
+
working.
|
338
|
+
|
339
|
+
"""
|
340
|
+
|
341
|
+
|
342
|
+
"""
|
343
|
+
LEARNING_v2 server-side changes required to honor `X-GPU-Preference`
|
344
|
+
====================================================================
|
345
|
+
1. **Add header extraction in API layer**
|
346
|
+
• File: `backend/app/services/learning_v2/modal_service/api_openai_complete.py`
|
347
|
+
• In `@app.post("/chat/completions")` add:
|
348
|
+
```python
|
349
|
+
gpu_pref = req.headers.get("X-GPU-Preference")
|
350
|
+
```
|
351
|
+
• Pass `gpu_pref` to `inference_router.route_completion(...)` and
|
352
|
+
`route_streaming_completion(...)`.
|
353
|
+
|
354
|
+
2. **Propagate preference through `InferenceRouter`**
|
355
|
+
• Update signatures of `route_completion`, `route_streaming_completion` and
|
356
|
+
`_select_gpu_for_model` to accept `gpu_preference: Optional[str] = None`.
|
357
|
+
• Inside `_select_gpu_for_model`:
|
358
|
+
```python
|
359
|
+
if gpu_preference and gpu_preference in valid_gpu_types_for_model:
|
360
|
+
return gpu_preference
|
361
|
+
```
|
362
|
+
falling back to current auto-selection logic otherwise.
|
363
|
+
|
364
|
+
3. **Validate against model config**
|
365
|
+
• Retrieve `model_cfg.inference_gpus` and refuse unsupported choices with
|
366
|
+
`HTTP 400` ("GPU type not supported for this model").
|
367
|
+
|
368
|
+
4. **Expose preference in usage / logging**
|
369
|
+
• Include the chosen GPU in the response’s `system_fingerprint` or add a
|
370
|
+
field `selected_gpu` so clients can confirm the assignment.
|
371
|
+
• Log header + selected GPU for observability.
|
372
|
+
|
373
|
+
5. **(Optional) Accept preference in JSON body**
|
374
|
+
• If we want parity with LM’s `InferenceConfig`, extend
|
375
|
+
`ChatCompletionRequest` with an optional `gpu_preference` field and treat
|
376
|
+
it as a fallback if the header is missing.
|
377
|
+
|
378
|
+
6. **Update automated tests**
|
379
|
+
• Modify `tests/dev/learning_v2/test_multi_gpu_inference_v2.py` to set the
|
380
|
+
header and assert that the correct GPU function (`*_L40S_generate`, etc.)
|
381
|
+
was invoked (can mock `get_gpu_function`).
|
382
|
+
|
383
|
+
7. **Documentation**
|
384
|
+
• Add the header to `learning_v2_api_structure.txt` and README so external
|
385
|
+
callers know how to request a GPU.
|
386
|
+
|
387
|
+
These changes are backwards-compatible: calls without the header behave exactly
|
388
|
+
as before.
|
389
|
+
"""
|
390
|
+
|
391
|
+
"""
|
392
|
+
Ran tool
|
393
|
+
|
394
|
+
Ran tool
|
395
|
+
|
396
|
+
Read file: /Users/joshuapurtell/Documents/GitHub/monorepo/backend/app/services/learning/unified_ft_service/api.py
|
397
|
+
|
398
|
+
Ran tool
|
399
|
+
Read file: /Users/joshuapurtell/Documents/GitHub/monorepo/backend/app/services/learning/unified_ft_service/api.py
|
400
|
+
|
401
|
+
Ran tool
|
402
|
+
|
403
|
+
Warm-up today:
|
404
|
+
|
405
|
+
• `POST /warmup/{model_name}` in `unified_ft_service/api.py` kicks off an async worker that:
|
406
|
+
1. Calls `get_model_routing(model_name)` → this falls back to `choose_gpu(model_id, req_gpu=None)`
|
407
|
+
so no GPU can be requested.
|
408
|
+
2. Fires the routed Modal function once (“dummy request”) to cache weights.
|
409
|
+
|
410
|
+
Because the handler never accepts a GPU argument and never reads `X-GPU-Preference`, the server always warms the model with its default GPU.
|
411
|
+
|
412
|
+
What has to change
|
413
|
+
==================
|
414
|
+
|
415
|
+
Server-side (learning_v2)
|
416
|
+
1. Add an optional header or query field
|
417
|
+
`X-GPU-Preference` **or** `?gpu=A100` to `POST /warmup/{model_id}`.
|
418
|
+
2. Pass the value into `choose_gpu(model_id, req_gpu)` inside
|
419
|
+
`warmup_worker()`. The helper already validates the choice, so no other
|
420
|
+
change is required.
|
421
|
+
3. Expose the selected GPU in the JSON response for visibility.
|
422
|
+
|
423
|
+
Client-side (synth-ai LM)
|
424
|
+
```python
|
425
|
+
async def warmup(
|
426
|
+
self,
|
427
|
+
model: str | None = None,
|
428
|
+
gpu_preference: str | None = None,
|
429
|
+
wait_until_ready: bool = False,
|
430
|
+
) -> dict:
|
431
|
+
mdl = model or self.model
|
432
|
+
headers = {}
|
433
|
+
if gpu_preference:
|
434
|
+
headers["X-GPU-Preference"] = gpu_preference
|
435
|
+
resp = await _client.post(f"{url}/warmup/{mdl}", headers=headers)
|
436
|
+
if wait_until_ready:
|
437
|
+
while resp.json()["status"] != "warmed":
|
438
|
+
await asyncio.sleep(2)
|
439
|
+
resp = await _client.get(f"{url}/warmup/status/{mdl}")
|
440
|
+
return resp.json()
|
441
|
+
```
|
442
|
+
|
443
|
+
So: **the existing endpoint does not yet support GPU selection; we need to add
|
444
|
+
the small change above on the `learning_v2` side and then LM.warmup can request
|
445
|
+
specific GPUs.**
|
446
|
+
"""
|
@@ -0,0 +1,21 @@
|
|
1
|
+
"""
|
2
|
+
Gateway for Offline Training Runs
|
3
|
+
Accept:
|
4
|
+
PolicyInformation
|
5
|
+
Policy according to System scheme
|
6
|
+
SynthDataset
|
7
|
+
(Trace, Impetus, Intent) triplets
|
8
|
+
System scheme
|
9
|
+
MethodConfig
|
10
|
+
MethodDataset
|
11
|
+
e.g. message triplets, preference pairs, etc
|
12
|
+
|
13
|
+
Always supports either SynthDataset or MethodDataset
|
14
|
+
|
15
|
+
SFT (Synth, Gemini, OpenAI)
|
16
|
+
DPO (Synth, OpenAI)
|
17
|
+
Progress Reward Model (Synth)
|
18
|
+
|
19
|
+
|
20
|
+
class TrainingRun
|
21
|
+
"""
|
File without changes
|
synth_ai/lm/__init__.py
CHANGED
@@ -21,6 +21,7 @@ from .vendors.synth_client import (
|
|
21
21
|
create_chat_completion_async,
|
22
22
|
create_chat_completion_sync,
|
23
23
|
)
|
24
|
+
from .core.main_v3 import LM
|
24
25
|
|
25
26
|
__all__ = [
|
26
27
|
# Configuration
|
@@ -42,6 +43,8 @@ __all__ = [
|
|
42
43
|
"create_sync_client",
|
43
44
|
"create_chat_completion_async",
|
44
45
|
"create_chat_completion_sync",
|
46
|
+
# Core LM class
|
47
|
+
"LM",
|
45
48
|
]
|
46
49
|
|
47
50
|
# Version info
|
synth_ai/lm/core/main.py
CHANGED
@@ -129,6 +129,7 @@ class LM:
|
|
129
129
|
str,
|
130
130
|
]
|
131
131
|
] = None,
|
132
|
+
enable_thinking: Optional[bool] = None,
|
132
133
|
):
|
133
134
|
# print("Structured output mode", structured_output_mode)
|
134
135
|
# Check for environment variable if provider is not specified
|
@@ -162,6 +163,9 @@ class LM:
|
|
162
163
|
# Override temperature to 1 for reasoning models
|
163
164
|
effective_temperature = 1.0 if model_name in reasoning_models else temperature
|
164
165
|
self.lm_config = {"temperature": effective_temperature}
|
166
|
+
if enable_thinking is not None:
|
167
|
+
# For providers that support it (e.g., Synth + Qwen3), this will be forwarded
|
168
|
+
self.lm_config["enable_thinking"] = enable_thinking
|
165
169
|
self.model_name = model_name
|
166
170
|
|
167
171
|
def respond_sync(
|
synth_ai/lm/core/main_v3.py
CHANGED
@@ -102,6 +102,9 @@ class LM:
|
|
102
102
|
system_id: Optional[str] = None,
|
103
103
|
enable_v3_tracing: bool = True,
|
104
104
|
enable_v2_tracing: Optional[bool] = None, # v2 compatibility
|
105
|
+
# Responses API parameters
|
106
|
+
auto_store_responses: bool = True,
|
107
|
+
use_responses_api: Optional[bool] = None,
|
105
108
|
**additional_params,
|
106
109
|
):
|
107
110
|
# Handle v2 compatibility parameters
|
@@ -160,6 +163,11 @@ class LM:
|
|
160
163
|
self.system_id = system_id or f"lm_{self.vendor or 'unknown'}_{self.model or 'unknown'}"
|
161
164
|
self.enable_v3_tracing = enable_v3_tracing
|
162
165
|
self.additional_params = additional_params
|
166
|
+
|
167
|
+
# Responses API thread management
|
168
|
+
self.auto_store_responses = auto_store_responses
|
169
|
+
self.use_responses_api = use_responses_api
|
170
|
+
self._last_response_id: Optional[str] = None
|
163
171
|
|
164
172
|
# Set structured output handler if needed
|
165
173
|
if self.response_format:
|
@@ -180,6 +188,25 @@ class LM:
|
|
180
188
|
self._vendor_wrapper = get_client(self.model, provider=self.vendor)
|
181
189
|
return self._vendor_wrapper
|
182
190
|
|
191
|
+
def _should_use_responses_api(self) -> bool:
|
192
|
+
"""Determine if Responses API should be used."""
|
193
|
+
if self.use_responses_api is not None:
|
194
|
+
return self.use_responses_api
|
195
|
+
|
196
|
+
# Auto-detect based on model
|
197
|
+
RESPONSES_MODELS = {
|
198
|
+
"o4-mini", "o3", "o3-mini", # Supported Synth-hosted models
|
199
|
+
"gpt-oss-120b", "gpt-oss-20b" # OSS models via Synth
|
200
|
+
}
|
201
|
+
return self.model in RESPONSES_MODELS or (self.model and self.model in reasoning_models)
|
202
|
+
|
203
|
+
def _should_use_harmony(self) -> bool:
|
204
|
+
"""Determine if Harmony encoding should be used for OSS models."""
|
205
|
+
# Only use Harmony for OSS models when NOT using OpenAI vendor
|
206
|
+
# OpenAI hosts these models directly via Responses API
|
207
|
+
HARMONY_MODELS = {"gpt-oss-120b", "gpt-oss-20b"}
|
208
|
+
return self.model in HARMONY_MODELS and self.vendor != "openai"
|
209
|
+
|
183
210
|
async def respond_async(
|
184
211
|
self,
|
185
212
|
system_message: Optional[str] = None,
|
@@ -190,6 +217,7 @@ class LM:
|
|
190
217
|
response_model: Optional[BaseModel] = None, # v2 compatibility
|
191
218
|
tools: Optional[List[BaseTool]] = None,
|
192
219
|
turn_number: Optional[int] = None,
|
220
|
+
previous_response_id: Optional[str] = None, # Responses API thread management
|
193
221
|
**kwargs,
|
194
222
|
) -> BaseLMResponse:
|
195
223
|
"""Async method to get LM response with v3 tracing."""
|
@@ -229,6 +257,17 @@ class LM:
|
|
229
257
|
|
230
258
|
# Get vendor wrapper
|
231
259
|
vendor_wrapper = self.get_vendor_wrapper()
|
260
|
+
|
261
|
+
# Determine API type to use
|
262
|
+
use_responses = self._should_use_responses_api()
|
263
|
+
use_harmony = self._should_use_harmony()
|
264
|
+
|
265
|
+
# Decide response ID to use for thread management
|
266
|
+
response_id_to_use = None
|
267
|
+
if previous_response_id:
|
268
|
+
response_id_to_use = previous_response_id # Manual override
|
269
|
+
elif self.auto_store_responses and self._last_response_id:
|
270
|
+
response_id_to_use = self._last_response_id # Auto-chain
|
232
271
|
|
233
272
|
# Prepare parameters based on vendor type
|
234
273
|
if hasattr(vendor_wrapper, "_hit_api_async"):
|
@@ -256,21 +295,36 @@ class LM:
|
|
256
295
|
if self.json_mode:
|
257
296
|
params["response_format"] = {"type": "json_object"}
|
258
297
|
|
259
|
-
# Call vendor
|
298
|
+
# Call vendor with appropriate API type
|
260
299
|
try:
|
261
|
-
#
|
262
|
-
if hasattr(vendor_wrapper, "
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
loop = asyncio.get_event_loop()
|
269
|
-
response = await loop.run_in_executor(None, vendor_wrapper.respond, params)
|
300
|
+
# Route to appropriate API
|
301
|
+
if use_harmony and hasattr(vendor_wrapper, "_hit_api_async_harmony"):
|
302
|
+
params["previous_response_id"] = response_id_to_use
|
303
|
+
response = await vendor_wrapper._hit_api_async_harmony(**params)
|
304
|
+
elif use_responses and hasattr(vendor_wrapper, "_hit_api_async_responses"):
|
305
|
+
params["previous_response_id"] = response_id_to_use
|
306
|
+
response = await vendor_wrapper._hit_api_async_responses(**params)
|
270
307
|
else:
|
271
|
-
|
272
|
-
|
273
|
-
|
308
|
+
# Standard chat completions API
|
309
|
+
if hasattr(vendor_wrapper, "_hit_api_async"):
|
310
|
+
response = await vendor_wrapper._hit_api_async(**params)
|
311
|
+
elif hasattr(vendor_wrapper, "respond_async"):
|
312
|
+
response = await vendor_wrapper.respond_async(**params)
|
313
|
+
elif hasattr(vendor_wrapper, "respond"):
|
314
|
+
# Fallback to sync in executor
|
315
|
+
loop = asyncio.get_event_loop()
|
316
|
+
response = await loop.run_in_executor(None, vendor_wrapper.respond, params)
|
317
|
+
else:
|
318
|
+
raise AttributeError(
|
319
|
+
f"Vendor wrapper {type(vendor_wrapper).__name__} has no suitable response method"
|
320
|
+
)
|
321
|
+
if not hasattr(response, 'api_type'):
|
322
|
+
response.api_type = "chat"
|
323
|
+
|
324
|
+
# Update stored response ID if auto-storing
|
325
|
+
if self.auto_store_responses and hasattr(response, 'response_id') and response.response_id:
|
326
|
+
self._last_response_id = response.response_id
|
327
|
+
|
274
328
|
except Exception as e:
|
275
329
|
print(f"Error calling vendor: {e}")
|
276
330
|
raise
|
@@ -370,6 +424,7 @@ class LM:
|
|
370
424
|
images_as_bytes: Optional[List[bytes]] = None, # v2 compatibility
|
371
425
|
response_model: Optional[BaseModel] = None, # v2 compatibility
|
372
426
|
tools: Optional[List[BaseTool]] = None,
|
427
|
+
previous_response_id: Optional[str] = None, # Responses API thread management
|
373
428
|
turn_number: Optional[int] = None,
|
374
429
|
**kwargs,
|
375
430
|
) -> BaseLMResponse:
|
@@ -68,6 +68,10 @@ grok_naming_regexes: List[Pattern] = [
|
|
68
68
|
]
|
69
69
|
|
70
70
|
|
71
|
+
openrouter_naming_regexes: List[Pattern] = [
|
72
|
+
re.compile(r"^openrouter/.*$"), # openrouter/model-name pattern
|
73
|
+
]
|
74
|
+
|
71
75
|
openrouter_naming_regexes: List[Pattern] = [
|
72
76
|
re.compile(r"^openrouter/.*$"), # openrouter/model-name pattern
|
73
77
|
]
|
@@ -103,7 +103,7 @@ OPENAI_METHODS_V1 = [
|
|
103
103
|
sync=False,
|
104
104
|
),
|
105
105
|
OpenAiDefinition(
|
106
|
-
module="openai.resources.
|
106
|
+
module="openai.resources.chat.completions",
|
107
107
|
object="Completions",
|
108
108
|
method="parse",
|
109
109
|
type="chat",
|
@@ -111,7 +111,7 @@ OPENAI_METHODS_V1 = [
|
|
111
111
|
min_version="1.50.0",
|
112
112
|
),
|
113
113
|
OpenAiDefinition(
|
114
|
-
module="openai.resources.
|
114
|
+
module="openai.resources.chat.completions",
|
115
115
|
object="AsyncCompletions",
|
116
116
|
method="parse",
|
117
117
|
type="chat",
|
@@ -776,6 +776,15 @@ class OpenAILangfuse:
|
|
776
776
|
):
|
777
777
|
continue
|
778
778
|
|
779
|
+
# Check if the method actually exists before trying to wrap it
|
780
|
+
try:
|
781
|
+
module = __import__(resource.module, fromlist=[resource.object])
|
782
|
+
obj = getattr(module, resource.object, None)
|
783
|
+
if obj and not hasattr(obj, resource.method):
|
784
|
+
continue # Skip if method doesn't exist
|
785
|
+
except (ImportError, AttributeError):
|
786
|
+
continue # Skip if module or object doesn't exist
|
787
|
+
|
779
788
|
wrap_function_wrapper(
|
780
789
|
resource.module,
|
781
790
|
f"{resource.object}.{resource.method}",
|