synth-ai 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. synth_ai/cli/__init__.py +66 -0
  2. synth_ai/cli/balance.py +205 -0
  3. synth_ai/cli/calc.py +70 -0
  4. synth_ai/cli/demo.py +74 -0
  5. synth_ai/{cli.py → cli/legacy_root_backup.py} +60 -15
  6. synth_ai/cli/man.py +103 -0
  7. synth_ai/cli/recent.py +126 -0
  8. synth_ai/cli/root.py +184 -0
  9. synth_ai/cli/status.py +126 -0
  10. synth_ai/cli/traces.py +136 -0
  11. synth_ai/cli/watch.py +508 -0
  12. synth_ai/config/base_url.py +53 -0
  13. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +252 -0
  14. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_duckdb_v2_backup.py +413 -0
  15. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +646 -0
  16. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_synth.py +34 -0
  17. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth.py +1740 -0
  18. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/test_crafter_react_agent_lm_synth_v2_backup.py +1318 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_duckdb_v2_backup.py +386 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v2_backup.py +1352 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/test_crafter_react_agent_openai_v2_backup.py +2551 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1 -1
  24. synth_ai/environments/examples/crafter_classic/agent_demos/old/traces/session_crafter_episode_16_15227b68-2906-416f-acc4-d6a9b4fa5828_20250725_001154.json +1363 -1
  25. synth_ai/environments/examples/crafter_classic/agent_demos/test_crafter_react_agent.py +3 -3
  26. synth_ai/environments/examples/enron/dataset/corbt___enron_emails_sample_questions/default/0.0.0/293c9fe8170037e01cc9cf5834e0cd5ef6f1a6bb/dataset_info.json +1 -0
  27. synth_ai/environments/examples/nethack/helpers/achievements.json +64 -0
  28. synth_ai/environments/examples/red/units/test_exploration_strategy.py +1 -1
  29. synth_ai/environments/examples/red/units/test_menu_bug_reproduction.py +5 -5
  30. synth_ai/environments/examples/red/units/test_movement_debug.py +2 -2
  31. synth_ai/environments/examples/red/units/test_retry_movement.py +1 -1
  32. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/available_envs.json +122 -0
  33. synth_ai/environments/examples/sokoban/verified_puzzles.json +54987 -0
  34. synth_ai/experimental/synth_oss.py +446 -0
  35. synth_ai/learning/core.py +21 -0
  36. synth_ai/learning/gateway.py +4 -0
  37. synth_ai/learning/prompts/mipro.py +0 -0
  38. synth_ai/lm/__init__.py +3 -0
  39. synth_ai/lm/core/main.py +4 -0
  40. synth_ai/lm/core/main_v3.py +68 -13
  41. synth_ai/lm/core/vendor_clients.py +4 -0
  42. synth_ai/lm/provider_support/openai.py +11 -2
  43. synth_ai/lm/vendors/base.py +7 -0
  44. synth_ai/lm/vendors/openai_standard.py +339 -4
  45. synth_ai/lm/vendors/openai_standard_responses.py +243 -0
  46. synth_ai/lm/vendors/synth_client.py +155 -5
  47. synth_ai/lm/warmup.py +54 -17
  48. synth_ai/tracing/__init__.py +18 -0
  49. synth_ai/tracing_v1/__init__.py +29 -14
  50. synth_ai/tracing_v3/config.py +13 -7
  51. synth_ai/tracing_v3/db_config.py +6 -6
  52. synth_ai/tracing_v3/turso/manager.py +8 -8
  53. synth_ai/tui/__main__.py +13 -0
  54. synth_ai/tui/dashboard.py +329 -0
  55. synth_ai/v0/tracing/__init__.py +0 -0
  56. synth_ai/{tracing → v0/tracing}/base_client.py +3 -3
  57. synth_ai/{tracing → v0/tracing}/client_manager.py +1 -1
  58. synth_ai/{tracing → v0/tracing}/context.py +1 -1
  59. synth_ai/{tracing → v0/tracing}/decorators.py +11 -11
  60. synth_ai/v0/tracing/events/__init__.py +0 -0
  61. synth_ai/{tracing → v0/tracing}/events/manage.py +4 -4
  62. synth_ai/{tracing → v0/tracing}/events/scope.py +6 -6
  63. synth_ai/{tracing → v0/tracing}/events/store.py +3 -3
  64. synth_ai/{tracing → v0/tracing}/immediate_client.py +6 -6
  65. synth_ai/{tracing → v0/tracing}/log_client_base.py +2 -2
  66. synth_ai/{tracing → v0/tracing}/retry_queue.py +3 -3
  67. synth_ai/{tracing → v0/tracing}/trackers.py +2 -2
  68. synth_ai/{tracing → v0/tracing}/upload.py +4 -4
  69. synth_ai/v0/tracing_v1/__init__.py +16 -0
  70. synth_ai/{tracing_v1 → v0/tracing_v1}/base_client.py +3 -3
  71. synth_ai/{tracing_v1 → v0/tracing_v1}/client_manager.py +1 -1
  72. synth_ai/{tracing_v1 → v0/tracing_v1}/context.py +1 -1
  73. synth_ai/{tracing_v1 → v0/tracing_v1}/decorators.py +11 -11
  74. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  75. synth_ai/{tracing_v1 → v0/tracing_v1}/events/manage.py +4 -4
  76. synth_ai/{tracing_v1 → v0/tracing_v1}/events/scope.py +6 -6
  77. synth_ai/{tracing_v1 → v0/tracing_v1}/events/store.py +3 -3
  78. synth_ai/{tracing_v1 → v0/tracing_v1}/immediate_client.py +6 -6
  79. synth_ai/{tracing_v1 → v0/tracing_v1}/log_client_base.py +2 -2
  80. synth_ai/{tracing_v1 → v0/tracing_v1}/retry_queue.py +3 -3
  81. synth_ai/{tracing_v1 → v0/tracing_v1}/trackers.py +2 -2
  82. synth_ai/{tracing_v1 → v0/tracing_v1}/upload.py +4 -4
  83. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/METADATA +98 -4
  84. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/RECORD +98 -62
  85. /synth_ai/{tracing/events/__init__.py → environments/examples/crafter_classic/debug_translation.py} +0 -0
  86. /synth_ai/{tracing_v1/events/__init__.py → learning/prompts/gepa.py} +0 -0
  87. /synth_ai/{tracing → v0/tracing}/abstractions.py +0 -0
  88. /synth_ai/{tracing → v0/tracing}/config.py +0 -0
  89. /synth_ai/{tracing → v0/tracing}/local.py +0 -0
  90. /synth_ai/{tracing → v0/tracing}/utils.py +0 -0
  91. /synth_ai/{tracing_v1 → v0/tracing_v1}/abstractions.py +0 -0
  92. /synth_ai/{tracing_v1 → v0/tracing_v1}/config.py +0 -0
  93. /synth_ai/{tracing_v1 → v0/tracing_v1}/local.py +0 -0
  94. /synth_ai/{tracing_v1 → v0/tracing_v1}/utils.py +0 -0
  95. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/WHEEL +0 -0
  96. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/entry_points.txt +0 -0
  97. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/licenses/LICENSE +0 -0
  98. {synth_ai-0.2.2.dev0.dist-info → synth_ai-0.2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,446 @@
1
+
2
+ """
3
+ Synth OSS Integration Module
4
+
5
+ This module provides integration with Synth's open-source inference and training APIs
6
+ from the monorepo learning_v2 service. All APIs are OpenAI-compatible.
7
+
8
+ Learning V2 APIs available for integration via lm/:
9
+ """
10
+
11
+ # API Configuration
12
+ SYNTH_BACKEND_URL = ""
13
+
14
+ # Learning V2 Modal Service URLs
15
+ LEARNING_V2_URLS = {
16
+ "dev": "https://synth-laboratories-dev--learning-v2-service-dev-fastapi-app.modal.run",
17
+ "prod": "https://synth-laboratories-prod--learning-v2-service-prod-fastapi-app.modal.run",
18
+ "main": "https://synth-laboratories--learning-v2-service-fastapi-app.modal.run"
19
+ }
20
+
21
+ # ============================================================================
22
+ # HEALTH & STATUS APIS
23
+ # ============================================================================
24
+
25
+ HEALTH_APIS = {
26
+ "basic_health": {
27
+ "method": "GET",
28
+ "endpoint": "/health",
29
+ "description": "Basic health check",
30
+ "response": {"status": "healthy"}
31
+ },
32
+ "detailed_health": {
33
+ "method": "GET",
34
+ "endpoint": "/learning/health",
35
+ "description": "Detailed health check including GPU function availability",
36
+ "response": {"status": "healthy", "components": {...}}
37
+ }
38
+ }
39
+
40
+ # ============================================================================
41
+ # FILE MANAGEMENT APIS
42
+ # ============================================================================
43
+
44
+ FILE_MANAGEMENT_APIS = {
45
+ "upload_file": {
46
+ "method": "POST",
47
+ "endpoint": "/files",
48
+ "description": "Upload a file for fine-tuning (JSONL format)",
49
+ "request": "multipart/form-data with 'file' and 'purpose'='fine-tune'",
50
+ "response": {
51
+ "id": "file-abc123",
52
+ "object": "file",
53
+ "bytes": 1234,
54
+ "created_at": 1638360000,
55
+ "filename": "data.jsonl",
56
+ "purpose": "fine-tune"
57
+ }
58
+ },
59
+ "list_files": {
60
+ "method": "GET",
61
+ "endpoint": "/files",
62
+ "description": "List all uploaded files",
63
+ "params": {"limit": "optional"},
64
+ "response": {"object": "list", "data": ["file_objects"]}
65
+ },
66
+ "get_file": {
67
+ "method": "GET",
68
+ "endpoint": "/files/{file_id}",
69
+ "description": "Get file metadata by ID",
70
+ "response": "Single file object with metadata"
71
+ },
72
+ "delete_file": {
73
+ "method": "DELETE",
74
+ "endpoint": "/files/{file_id}",
75
+ "description": "Delete a file",
76
+ "response": {"id": "file-abc123", "object": "file", "deleted": True}
77
+ },
78
+ "get_file_content": {
79
+ "method": "GET",
80
+ "endpoint": "/files/{file_id}/content",
81
+ "description": "Download raw file content",
82
+ "response": "Raw file content stream"
83
+ }
84
+ }
85
+
86
+ # ============================================================================
87
+ # TRAINING/FINE-TUNING APIS
88
+ # ============================================================================
89
+
90
+ TRAINING_APIS = {
91
+ "create_training_job": {
92
+ "method": "POST",
93
+ "endpoint": "/fine_tuning/jobs",
94
+ "description": "Create a fine-tuning job",
95
+ "request": {
96
+ "model": "Qwen/Qwen3-0.5B",
97
+ "training_file": "file-abc123",
98
+ "training_type": "sft", # or "dpo"
99
+ "hyperparameters": {...},
100
+ "suffix": "optional"
101
+ },
102
+ "response": {
103
+ "object": "fine_tuning.job",
104
+ "id": "ftjob-xyz789",
105
+ "model": "...",
106
+ "status": "validating_files",
107
+ "training_file": "file-abc123",
108
+ "hyperparameters": {...}
109
+ }
110
+ },
111
+ "list_training_jobs": {
112
+ "method": "GET",
113
+ "endpoint": "/fine_tuning/jobs",
114
+ "description": "List all training jobs",
115
+ "response": {"object": "list", "data": ["job_objects"]}
116
+ },
117
+ "get_training_job": {
118
+ "method": "GET",
119
+ "endpoint": "/fine_tuning/jobs/{job_id}",
120
+ "description": "Get training job status",
121
+ "response": {
122
+ "object": "fine_tuning.job",
123
+ "id": "ftjob-xyz789",
124
+ "status": "running", # or "completed", "failed", "cancelled"
125
+ "fine_tuned_model": "ft:model:suffix" # when completed
126
+ }
127
+ },
128
+ "cancel_training_job": {
129
+ "method": "POST",
130
+ "endpoint": "/fine_tuning/jobs/{job_id}/cancel",
131
+ "description": "Cancel a running training job",
132
+ "response": {"object": "fine_tuning.job", "id": "...", "status": "cancelled"}
133
+ },
134
+ "get_training_events": {
135
+ "method": "GET",
136
+ "endpoint": "/fine_tuning/jobs/{job_id}/events",
137
+ "description": "Get training logs/events",
138
+ "response": {
139
+ "object": "list",
140
+ "data": [{
141
+ "object": "fine_tuning.job.event",
142
+ "level": "info",
143
+ "message": "Training started",
144
+ "created_at": 1638360000
145
+ }]
146
+ }
147
+ }
148
+ }
149
+
150
+ # ============================================================================
151
+ # INFERENCE APIS
152
+ # ============================================================================
153
+
154
+ INFERENCE_APIS = {
155
+ "chat_completions": {
156
+ "method": "POST",
157
+ "endpoint": "/chat/completions",
158
+ "description": "OpenAI-compatible chat completions for base and fine-tuned models",
159
+ "request": {
160
+ "model": "Qwen/Qwen3-0.5B", # or "ft:Qwen/Qwen3-0.5B:suffix"
161
+ "messages": [{"role": "user", "content": "Hello"}],
162
+ "temperature": 0.7,
163
+ "max_tokens": 100,
164
+ "top_p": 1.0,
165
+ "stream": False, # Set to True for streaming
166
+ "tools": [], # For tool calling
167
+ "tool_choice": "auto"
168
+ },
169
+ "response": {
170
+ "id": "chatcmpl-123",
171
+ "object": "chat.completion",
172
+ "created": 1638360000,
173
+ "model": "Qwen/Qwen3-0.5B",
174
+ "choices": [{
175
+ "index": 0,
176
+ "message": {
177
+ "role": "assistant",
178
+ "content": "Hello! How can I help you?",
179
+ "tool_calls": [] # If tools were used
180
+ },
181
+ "finish_reason": "stop"
182
+ }],
183
+ "usage": {
184
+ "prompt_tokens": 10,
185
+ "completion_tokens": 20,
186
+ "total_tokens": 30
187
+ }
188
+ },
189
+ "streaming": "Server-sent events with data: {...} format when stream=True"
190
+ }
191
+ }
192
+
193
+ # ============================================================================
194
+ # MODEL MANAGEMENT APIS
195
+ # ============================================================================
196
+
197
+ MODEL_APIS = {
198
+ "list_models": {
199
+ "method": "GET",
200
+ "endpoint": "/models",
201
+ "description": "List all available models (base and fine-tuned)",
202
+ "response": {
203
+ "object": "list",
204
+ "data": [{
205
+ "id": "Qwen/Qwen3-0.5B",
206
+ "object": "model",
207
+ "created": 1638360000,
208
+ "owned_by": "learning_v2"
209
+ }]
210
+ }
211
+ },
212
+ "delete_model": {
213
+ "method": "DELETE",
214
+ "endpoint": "/models/{model_id}",
215
+ "description": "Delete a fine-tuned model",
216
+ "response": {"id": "ft:model:suffix", "object": "model", "deleted": True}
217
+ }
218
+ }
219
+
220
+ # ============================================================================
221
+ # SUPPORTED MODELS
222
+ # ============================================================================
223
+
224
+ SUPPORTED_MODELS = {
225
+ "base_models": [
226
+ # Qwen 3 family
227
+ "Qwen/Qwen3-0.6B",
228
+ "Qwen/Qwen3-1.8B",
229
+ "Qwen/Qwen3-8B",
230
+ "Qwen/Qwen3-14B",
231
+ "Qwen/Qwen3-32B",
232
+ # Qwen 2.5 family
233
+ "Qwen/Qwen2.5-0.5B-Instruct",
234
+ "Qwen/Qwen2.5-1.5B-Instruct",
235
+ "Qwen/Qwen2.5-3B-Instruct",
236
+ "Qwen/Qwen2.5-7B-Instruct",
237
+ "Qwen/Qwen2.5-14B-Instruct",
238
+ "Qwen/Qwen2.5-32B-Instruct",
239
+ "Qwen/Qwen2.5-72B-Instruct",
240
+ # OLMo 2 family
241
+ "allenai/OLMo-2-0425-1B-Instruct",
242
+ "allenai/OLMo-2-1124-7B-Instruct",
243
+ "allenai/OLMo-2-1124-13B-Instruct"
244
+ ],
245
+ "training_types": ["sft", "dpo"],
246
+ "gpu_types": ["A10G", "L40S", "A100", "H100"],
247
+ "features": [
248
+ "Tool calling",
249
+ "Streaming responses",
250
+ "Fine-tuning",
251
+ "Multi-GPU training",
252
+ "JSONL data format",
253
+ "OpenAI compatibility"
254
+ ]
255
+ }
256
+
257
+ # ============================================================================
258
+ # INTEGRATION PLAN – Synth OSS
259
+ # ==========================================================================
260
+ """
261
+ GPU & Resource Selection
262
+ ------------------------
263
+ Synth OSS decides the GPU based on the `ModelFamily` definition:
264
+ • Each `ModelConfig` lists `inference_gpus` and `training_gpus`.
265
+ • The API’s `InferenceRouter` calls `_select_gpu_for_model`, which chooses the **first recommended GPU** returned by `get_model_gpu_recommendations` (usually the `default_inference_gpu`).
266
+ • By default the server picks the first recommended GPU, **but** we can request
267
+ another GPU type via a custom header that the server *can* opt to honor:
268
+
269
+ X-GPU-Preference: L40S # or A10G, A100, H100
270
+
271
+ The current dev deployment already forwards this header to `InferenceRouter`,
272
+ so adding it makes the GPU configurable without breaking existing behaviour.
273
+
274
+ `InferenceConfig` therefore gets a new optional field:
275
+
276
+ ```python
277
+ class InferenceConfig(BaseModel):
278
+ stream: bool = False
279
+ gpu_preference: Optional[str] = None # "A10G", "L40S", "A100", "H100"
280
+ # ...future knobs (temperature, max_tokens, etc.)
281
+ ```
282
+
283
+ LM will include `gpu_preference` as that header when `backend="synth"`. If the
284
+ header is omitted or the value is not valid for the chosen model, the server
285
+ falls back to its default selection. This keeps the API forward-compatible and
286
+ provides explicit GPU control when supported.
287
+
288
+ Only two parts of synth-ai need to change for Synth OSS inference:
289
+
290
+ 1. LM() class (synth_ai.lm)
291
+ 2. The async respond(...) coroutine on that class
292
+
293
+ Extend LM with backend="synth"; when selected, issue POST requests to
294
+ `${LEARNING_V2_URL}/chat/completions`, supporting both streaming and
295
+ non-streaming modes and returning the same dict structure as today.
296
+
297
+ Everything else (file upload, fine-tuning, model listing) lives in the
298
+ `synth_ai.learning` package and does NOT affect LM:
299
+
300
+ synth_ai/learning/
301
+ ├─ files.py
302
+ ├─ training.py
303
+ ├─ models.py
304
+ ├─ client.py
305
+ └─ types.py
306
+
307
+ Warm-up flow
308
+ ~~~~~~~~~~~~
309
+ `learning_v2` exposes `POST /warmup/{model_id}` and `GET /warmup/status/{model_id}`
310
+ (via the Render proxy). We can exploit that to reduce first-token latency.
311
+
312
+ LM API addition:
313
+
314
+ ```python
315
+ async def warmup(self, model: str | None = None, gpu_preference: str | None = None) -> dict:
316
+ """Pre-spin the container & load weights for *model* on the requested GPU.
317
+ Returns the JSON response from /warmup. If *model* is None we warm-up
318
+ `self.model`.
319
+ """
320
+ ```
321
+
322
+ Implementation sketch (backend == "synth")
323
+ ------------------------------------------
324
+ 1. Determine `model_id = model or self.model`.
325
+ 2. Build headers:
326
+ ```python
327
+ headers = {}
328
+ if gpu_preference:
329
+ headers["X-GPU-Preference"] = gpu_preference
330
+ ```
331
+ 3. `POST f"{url}/warmup/{model_id}"`.
332
+ 4. Optionally call `GET /warmup/status/{model_id}` in a loop until
333
+ `status == "ready"` (exponential backoff) – or expose a separate
334
+ `await LM.warmup_status(model)` helper.
335
+
336
+ The method is a *no-op* for the default (OpenAI) backend so existing code keeps
337
+ working.
338
+
339
+ """
340
+
341
+
342
+ """
343
+ LEARNING_v2 server-side changes required to honor `X-GPU-Preference`
344
+ ====================================================================
345
+ 1. **Add header extraction in API layer**
346
+ • File: `backend/app/services/learning_v2/modal_service/api_openai_complete.py`
347
+ • In `@app.post("/chat/completions")` add:
348
+ ```python
349
+ gpu_pref = req.headers.get("X-GPU-Preference")
350
+ ```
351
+ • Pass `gpu_pref` to `inference_router.route_completion(...)` and
352
+ `route_streaming_completion(...)`.
353
+
354
+ 2. **Propagate preference through `InferenceRouter`**
355
+ • Update signatures of `route_completion`, `route_streaming_completion` and
356
+ `_select_gpu_for_model` to accept `gpu_preference: Optional[str] = None`.
357
+ • Inside `_select_gpu_for_model`:
358
+ ```python
359
+ if gpu_preference and gpu_preference in valid_gpu_types_for_model:
360
+ return gpu_preference
361
+ ```
362
+ falling back to current auto-selection logic otherwise.
363
+
364
+ 3. **Validate against model config**
365
+ • Retrieve `model_cfg.inference_gpus` and refuse unsupported choices with
366
+ `HTTP 400` ("GPU type not supported for this model").
367
+
368
+ 4. **Expose preference in usage / logging**
369
+ • Include the chosen GPU in the response’s `system_fingerprint` or add a
370
+ field `selected_gpu` so clients can confirm the assignment.
371
+ • Log header + selected GPU for observability.
372
+
373
+ 5. **(Optional) Accept preference in JSON body**
374
+ • If we want parity with LM’s `InferenceConfig`, extend
375
+ `ChatCompletionRequest` with an optional `gpu_preference` field and treat
376
+ it as a fallback if the header is missing.
377
+
378
+ 6. **Update automated tests**
379
+ • Modify `tests/dev/learning_v2/test_multi_gpu_inference_v2.py` to set the
380
+ header and assert that the correct GPU function (`*_L40S_generate`, etc.)
381
+ was invoked (can mock `get_gpu_function`).
382
+
383
+ 7. **Documentation**
384
+ • Add the header to `learning_v2_api_structure.txt` and README so external
385
+ callers know how to request a GPU.
386
+
387
+ These changes are backwards-compatible: calls without the header behave exactly
388
+ as before.
389
+ """
390
+
391
+ """
392
+ Ran tool
393
+
394
+ Ran tool
395
+
396
+ Read file: /Users/joshuapurtell/Documents/GitHub/monorepo/backend/app/services/learning/unified_ft_service/api.py
397
+
398
+ Ran tool
399
+ Read file: /Users/joshuapurtell/Documents/GitHub/monorepo/backend/app/services/learning/unified_ft_service/api.py
400
+
401
+ Ran tool
402
+
403
+ Warm-up today:
404
+
405
+ • `POST /warmup/{model_name}` in `unified_ft_service/api.py` kicks off an async worker that:
406
+ 1. Calls `get_model_routing(model_name)` → this falls back to `choose_gpu(model_id, req_gpu=None)`
407
+ so no GPU can be requested.
408
+ 2. Fires the routed Modal function once (“dummy request”) to cache weights.
409
+
410
+ Because the handler never accepts a GPU argument and never reads `X-GPU-Preference`, the server always warms the model with its default GPU.
411
+
412
+ What has to change
413
+ ==================
414
+
415
+ Server-side (learning_v2)
416
+ 1. Add an optional header or query field
417
+ `X-GPU-Preference` **or** `?gpu=A100` to `POST /warmup/{model_id}`.
418
+ 2. Pass the value into `choose_gpu(model_id, req_gpu)` inside
419
+ `warmup_worker()`. The helper already validates the choice, so no other
420
+ change is required.
421
+ 3. Expose the selected GPU in the JSON response for visibility.
422
+
423
+ Client-side (synth-ai LM)
424
+ ```python
425
+ async def warmup(
426
+ self,
427
+ model: str | None = None,
428
+ gpu_preference: str | None = None,
429
+ wait_until_ready: bool = False,
430
+ ) -> dict:
431
+ mdl = model or self.model
432
+ headers = {}
433
+ if gpu_preference:
434
+ headers["X-GPU-Preference"] = gpu_preference
435
+ resp = await _client.post(f"{url}/warmup/{mdl}", headers=headers)
436
+ if wait_until_ready:
437
+ while resp.json()["status"] != "warmed":
438
+ await asyncio.sleep(2)
439
+ resp = await _client.get(f"{url}/warmup/status/{mdl}")
440
+ return resp.json()
441
+ ```
442
+
443
+ So: **the existing endpoint does not yet support GPU selection; we need to add
444
+ the small change above on the `learning_v2` side and then LM.warmup can request
445
+ specific GPUs.**
446
+ """
@@ -0,0 +1,21 @@
1
+ """
2
+ Gateway for Offline Training Runs
3
+ Accept:
4
+ PolicyInformation
5
+ Policy according to System scheme
6
+ SynthDataset
7
+ (Trace, Impetus, Intent) triplets
8
+ System scheme
9
+ MethodConfig
10
+ MethodDataset
11
+ e.g. message triplets, preference pairs, etc
12
+
13
+ Always supports either SynthDataset or MethodDataset
14
+
15
+ SFT (Synth, Gemini, OpenAI)
16
+ DPO (Synth, OpenAI)
17
+ Progress Reward Model (Synth)
18
+
19
+
20
+ class TrainingRun
21
+ """
@@ -0,0 +1,4 @@
1
+
2
+
3
+ class OfflineGateway:
4
+ pass
File without changes
synth_ai/lm/__init__.py CHANGED
@@ -21,6 +21,7 @@ from .vendors.synth_client import (
21
21
  create_chat_completion_async,
22
22
  create_chat_completion_sync,
23
23
  )
24
+ from .core.main_v3 import LM
24
25
 
25
26
  __all__ = [
26
27
  # Configuration
@@ -42,6 +43,8 @@ __all__ = [
42
43
  "create_sync_client",
43
44
  "create_chat_completion_async",
44
45
  "create_chat_completion_sync",
46
+ # Core LM class
47
+ "LM",
45
48
  ]
46
49
 
47
50
  # Version info
synth_ai/lm/core/main.py CHANGED
@@ -129,6 +129,7 @@ class LM:
129
129
  str,
130
130
  ]
131
131
  ] = None,
132
+ enable_thinking: Optional[bool] = None,
132
133
  ):
133
134
  # print("Structured output mode", structured_output_mode)
134
135
  # Check for environment variable if provider is not specified
@@ -162,6 +163,9 @@ class LM:
162
163
  # Override temperature to 1 for reasoning models
163
164
  effective_temperature = 1.0 if model_name in reasoning_models else temperature
164
165
  self.lm_config = {"temperature": effective_temperature}
166
+ if enable_thinking is not None:
167
+ # For providers that support it (e.g., Synth + Qwen3), this will be forwarded
168
+ self.lm_config["enable_thinking"] = enable_thinking
165
169
  self.model_name = model_name
166
170
 
167
171
  def respond_sync(
@@ -102,6 +102,9 @@ class LM:
102
102
  system_id: Optional[str] = None,
103
103
  enable_v3_tracing: bool = True,
104
104
  enable_v2_tracing: Optional[bool] = None, # v2 compatibility
105
+ # Responses API parameters
106
+ auto_store_responses: bool = True,
107
+ use_responses_api: Optional[bool] = None,
105
108
  **additional_params,
106
109
  ):
107
110
  # Handle v2 compatibility parameters
@@ -160,6 +163,11 @@ class LM:
160
163
  self.system_id = system_id or f"lm_{self.vendor or 'unknown'}_{self.model or 'unknown'}"
161
164
  self.enable_v3_tracing = enable_v3_tracing
162
165
  self.additional_params = additional_params
166
+
167
+ # Responses API thread management
168
+ self.auto_store_responses = auto_store_responses
169
+ self.use_responses_api = use_responses_api
170
+ self._last_response_id: Optional[str] = None
163
171
 
164
172
  # Set structured output handler if needed
165
173
  if self.response_format:
@@ -180,6 +188,25 @@ class LM:
180
188
  self._vendor_wrapper = get_client(self.model, provider=self.vendor)
181
189
  return self._vendor_wrapper
182
190
 
191
+ def _should_use_responses_api(self) -> bool:
192
+ """Determine if Responses API should be used."""
193
+ if self.use_responses_api is not None:
194
+ return self.use_responses_api
195
+
196
+ # Auto-detect based on model
197
+ RESPONSES_MODELS = {
198
+ "o4-mini", "o3", "o3-mini", # Supported Synth-hosted models
199
+ "gpt-oss-120b", "gpt-oss-20b" # OSS models via Synth
200
+ }
201
+ return self.model in RESPONSES_MODELS or (self.model and self.model in reasoning_models)
202
+
203
+ def _should_use_harmony(self) -> bool:
204
+ """Determine if Harmony encoding should be used for OSS models."""
205
+ # Only use Harmony for OSS models when NOT using OpenAI vendor
206
+ # OpenAI hosts these models directly via Responses API
207
+ HARMONY_MODELS = {"gpt-oss-120b", "gpt-oss-20b"}
208
+ return self.model in HARMONY_MODELS and self.vendor != "openai"
209
+
183
210
  async def respond_async(
184
211
  self,
185
212
  system_message: Optional[str] = None,
@@ -190,6 +217,7 @@ class LM:
190
217
  response_model: Optional[BaseModel] = None, # v2 compatibility
191
218
  tools: Optional[List[BaseTool]] = None,
192
219
  turn_number: Optional[int] = None,
220
+ previous_response_id: Optional[str] = None, # Responses API thread management
193
221
  **kwargs,
194
222
  ) -> BaseLMResponse:
195
223
  """Async method to get LM response with v3 tracing."""
@@ -229,6 +257,17 @@ class LM:
229
257
 
230
258
  # Get vendor wrapper
231
259
  vendor_wrapper = self.get_vendor_wrapper()
260
+
261
+ # Determine API type to use
262
+ use_responses = self._should_use_responses_api()
263
+ use_harmony = self._should_use_harmony()
264
+
265
+ # Decide response ID to use for thread management
266
+ response_id_to_use = None
267
+ if previous_response_id:
268
+ response_id_to_use = previous_response_id # Manual override
269
+ elif self.auto_store_responses and self._last_response_id:
270
+ response_id_to_use = self._last_response_id # Auto-chain
232
271
 
233
272
  # Prepare parameters based on vendor type
234
273
  if hasattr(vendor_wrapper, "_hit_api_async"):
@@ -256,21 +295,36 @@ class LM:
256
295
  if self.json_mode:
257
296
  params["response_format"] = {"type": "json_object"}
258
297
 
259
- # Call vendor
298
+ # Call vendor with appropriate API type
260
299
  try:
261
- # Try the standard method names
262
- if hasattr(vendor_wrapper, "_hit_api_async"):
263
- response = await vendor_wrapper._hit_api_async(**params)
264
- elif hasattr(vendor_wrapper, "respond_async"):
265
- response = await vendor_wrapper.respond_async(**params)
266
- elif hasattr(vendor_wrapper, "respond"):
267
- # Fallback to sync in executor
268
- loop = asyncio.get_event_loop()
269
- response = await loop.run_in_executor(None, vendor_wrapper.respond, params)
300
+ # Route to appropriate API
301
+ if use_harmony and hasattr(vendor_wrapper, "_hit_api_async_harmony"):
302
+ params["previous_response_id"] = response_id_to_use
303
+ response = await vendor_wrapper._hit_api_async_harmony(**params)
304
+ elif use_responses and hasattr(vendor_wrapper, "_hit_api_async_responses"):
305
+ params["previous_response_id"] = response_id_to_use
306
+ response = await vendor_wrapper._hit_api_async_responses(**params)
270
307
  else:
271
- raise AttributeError(
272
- f"Vendor wrapper {type(vendor_wrapper).__name__} has no suitable response method"
273
- )
308
+ # Standard chat completions API
309
+ if hasattr(vendor_wrapper, "_hit_api_async"):
310
+ response = await vendor_wrapper._hit_api_async(**params)
311
+ elif hasattr(vendor_wrapper, "respond_async"):
312
+ response = await vendor_wrapper.respond_async(**params)
313
+ elif hasattr(vendor_wrapper, "respond"):
314
+ # Fallback to sync in executor
315
+ loop = asyncio.get_event_loop()
316
+ response = await loop.run_in_executor(None, vendor_wrapper.respond, params)
317
+ else:
318
+ raise AttributeError(
319
+ f"Vendor wrapper {type(vendor_wrapper).__name__} has no suitable response method"
320
+ )
321
+ if not hasattr(response, 'api_type'):
322
+ response.api_type = "chat"
323
+
324
+ # Update stored response ID if auto-storing
325
+ if self.auto_store_responses and hasattr(response, 'response_id') and response.response_id:
326
+ self._last_response_id = response.response_id
327
+
274
328
  except Exception as e:
275
329
  print(f"Error calling vendor: {e}")
276
330
  raise
@@ -370,6 +424,7 @@ class LM:
370
424
  images_as_bytes: Optional[List[bytes]] = None, # v2 compatibility
371
425
  response_model: Optional[BaseModel] = None, # v2 compatibility
372
426
  tools: Optional[List[BaseTool]] = None,
427
+ previous_response_id: Optional[str] = None, # Responses API thread management
373
428
  turn_number: Optional[int] = None,
374
429
  **kwargs,
375
430
  ) -> BaseLMResponse:
@@ -68,6 +68,10 @@ grok_naming_regexes: List[Pattern] = [
68
68
  ]
69
69
 
70
70
 
71
+ openrouter_naming_regexes: List[Pattern] = [
72
+ re.compile(r"^openrouter/.*$"), # openrouter/model-name pattern
73
+ ]
74
+
71
75
  openrouter_naming_regexes: List[Pattern] = [
72
76
  re.compile(r"^openrouter/.*$"), # openrouter/model-name pattern
73
77
  ]
@@ -103,7 +103,7 @@ OPENAI_METHODS_V1 = [
103
103
  sync=False,
104
104
  ),
105
105
  OpenAiDefinition(
106
- module="openai.resources.beta.chat.completions",
106
+ module="openai.resources.chat.completions",
107
107
  object="Completions",
108
108
  method="parse",
109
109
  type="chat",
@@ -111,7 +111,7 @@ OPENAI_METHODS_V1 = [
111
111
  min_version="1.50.0",
112
112
  ),
113
113
  OpenAiDefinition(
114
- module="openai.resources.beta.chat.completions",
114
+ module="openai.resources.chat.completions",
115
115
  object="AsyncCompletions",
116
116
  method="parse",
117
117
  type="chat",
@@ -776,6 +776,15 @@ class OpenAILangfuse:
776
776
  ):
777
777
  continue
778
778
 
779
+ # Check if the method actually exists before trying to wrap it
780
+ try:
781
+ module = __import__(resource.module, fromlist=[resource.object])
782
+ obj = getattr(module, resource.object, None)
783
+ if obj and not hasattr(obj, resource.method):
784
+ continue # Skip if method doesn't exist
785
+ except (ImportError, AttributeError):
786
+ continue # Skip if module or object doesn't exist
787
+
779
788
  wrap_function_wrapper(
780
789
  resource.module,
781
790
  f"{resource.object}.{resource.method}",