synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (48) hide show
  1. examples/agora_ex/README_MoE.md +224 -0
  2. examples/agora_ex/__init__.py +7 -0
  3. examples/agora_ex/agora_ex.py +65 -0
  4. examples/agora_ex/agora_ex_task_app.py +590 -0
  5. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +121 -0
  6. examples/agora_ex/reward_fn_grpo-human.py +129 -0
  7. examples/agora_ex/system_prompt_CURRENT.md +63 -0
  8. examples/agora_ex/task_app/agora_ex_task_app.py +590 -0
  9. examples/agora_ex/task_app/reward_fn_grpo-human.py +129 -0
  10. examples/agora_ex/task_app/system_prompt_CURRENT.md +63 -0
  11. examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
  12. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +175 -0
  13. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
  14. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
  15. examples/multi_step/crafter_rl_lora.md +51 -10
  16. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  17. examples/multi_step/task_app_config_notes.md +7 -1
  18. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
  19. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
  20. examples/warming_up_to_rl/run_eval.py +127 -18
  21. examples/warming_up_to_rl/task_app/grpo_crafter.py +3 -33
  22. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
  23. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +42 -46
  24. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +232 -193
  25. synth_ai/__init__.py +41 -1
  26. synth_ai/api/train/builders.py +49 -19
  27. synth_ai/api/train/configs/__init__.py +44 -0
  28. synth_ai/api/train/configs/rl.py +133 -0
  29. synth_ai/api/train/configs/sft.py +94 -0
  30. synth_ai/api/train/configs/shared.py +24 -0
  31. synth_ai/cli/demo.py +38 -39
  32. synth_ai/cli/rl_demo.py +81 -102
  33. synth_ai/cli/task_apps.py +3 -0
  34. synth_ai/demos/core/cli.py +121 -159
  35. synth_ai/environments/examples/crafter_classic/environment.py +16 -0
  36. synth_ai/evals/__init__.py +15 -0
  37. synth_ai/evals/client.py +85 -0
  38. synth_ai/evals/types.py +42 -0
  39. synth_ai/judge_schemas.py +127 -0
  40. synth_ai/rubrics/__init__.py +22 -0
  41. synth_ai/rubrics/validators.py +126 -0
  42. synth_ai/tracing_v3/serialization.py +130 -0
  43. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/METADATA +1 -1
  44. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/RECORD +48 -22
  45. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/entry_points.txt +0 -1
  46. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/WHEEL +0 -0
  47. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/licenses/LICENSE +0 -0
  48. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,10 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import contextlib
5
+ import json
5
6
  import logging
7
+ import os
8
+ import time
6
9
  from typing import Any
7
10
 
8
11
  import httpx
@@ -23,9 +26,15 @@ class OpenAIClient:
23
26
  self.api_key = api_key
24
27
  self.timeout_s = timeout_s
25
28
  self.headers = {}
26
-
27
- if api_key:
28
- self.headers["Authorization"] = f"Bearer {api_key}"
29
+ # If we're calling back into our own task app proxy (e.g., /proxy/groq),
30
+ # the FastAPI app still enforces X-API-Key. Include it when available so
31
+ # intra-app proxy calls authenticate correctly.
32
+ try:
33
+ env_key = os.getenv("ENVIRONMENT_API_KEY")
34
+ if env_key and isinstance(env_key, str):
35
+ self.headers.setdefault("X-API-Key", env_key)
36
+ except Exception:
37
+ pass
29
38
 
30
39
  def _fix_model_parameters(
31
40
  self, request: dict[str, Any], target_url: str | None = None
@@ -52,6 +61,8 @@ class OpenAIClient:
52
61
  or ("azure" in low and ".openai." in low)
53
62
  or ("groq.com" in low)
54
63
  or ("/openai" in low)
64
+ or ("/proxy/groq" in low)
65
+ or ("/proxy/openai" in low)
55
66
  )
56
67
  except Exception:
57
68
  is_openai = False
@@ -137,13 +148,53 @@ class OpenAIClient:
137
148
  Returns:
138
149
  OpenAI-compatible chat completion response
139
150
  """
140
- url = (base_url or self.base_url).rstrip("/") + "/v1/chat/completions"
151
+ base = (base_url or self.base_url).rstrip("/")
152
+ url = base + "/v1/chat/completions"
141
153
  timeout = timeout_s or self.timeout_s
142
154
 
143
155
  # Merge headers
144
156
  headers = self.headers.copy()
145
157
  if extra_headers:
146
158
  headers.update(extra_headers)
159
+ # Always include X-API-Key for intra-app requests
160
+ try:
161
+ envk = os.getenv("ENVIRONMENT_API_KEY")
162
+ if envk and isinstance(envk, str):
163
+ headers["X-API-Key"] = envk
164
+ except Exception:
165
+ pass
166
+
167
+ # If target is our in-app Groq proxy, force Authorization to use GROQ_API_KEY
168
+ try:
169
+ low_url = (url or "").lower()
170
+ if "/proxy/groq" in low_url or "groq" in low_url:
171
+ gk = os.getenv("GROQ_API_KEY")
172
+ if gk and isinstance(gk, str):
173
+ headers["Authorization"] = f"Bearer {gk}"
174
+ except Exception:
175
+ pass
176
+
177
+ # In-process proxy path: avoid HTTP round-trip and auth dependency
178
+ try:
179
+ if base.endswith("/proxy/groq") or base.endswith("/proxy/groq/"):
180
+ from synth_ai.task.server import prepare_for_groq, inject_system_hint
181
+ # Prepare payload similar to server-side proxy
182
+ model = request.get("model") if isinstance(request.get("model"), str) else None
183
+ payload = prepare_for_groq(model, request)
184
+ payload = inject_system_hint(payload, "")
185
+ # Call vendor directly
186
+ gk = os.getenv("GROQ_API_KEY") or ""
187
+ async with httpx.AsyncClient(timeout=timeout) as client:
188
+ resp = await client.post(
189
+ "https://api.groq.com/openai/v1/chat/completions",
190
+ json=payload,
191
+ headers={"Authorization": f"Bearer {gk}"},
192
+ )
193
+ resp.raise_for_status()
194
+ return resp.json()
195
+ except Exception as _local_proxy_err:
196
+ # Do NOT fall back silently; surface the error so callers fail fast
197
+ raise
147
198
 
148
199
  # Fix parameter compatibility for newer models
149
200
  processed_request = self._fix_model_parameters(request, target_url=url)
@@ -227,11 +278,7 @@ class OpenAIClient:
227
278
  logger.info(
228
279
  f"Inference response status=200, content-type={content_type}, bytes={len(body_text)}"
229
280
  )
230
- if body_text:
231
- preview_len = min(800, len(body_text))
232
- logger.info(
233
- f"Inference response preview ({preview_len} bytes): {body_text[:preview_len]}"
234
- )
281
+ # Do not log prompt or full response body
235
282
 
236
283
  result = response.json()
237
284
  logger.info(f"Inference response parsed_type={type(result).__name__}")
@@ -243,34 +290,10 @@ class OpenAIClient:
243
290
  except httpx.HTTPStatusError as e:
244
291
  status = e.response.status_code if e.response is not None else None
245
292
  text = e.response.text if e.response is not None else str(e)
246
- # Log full body for debugging remote failures
247
- try:
248
- logger.error(
249
- {
250
- "openai_http_error": True,
251
- "status": status,
252
- "url": url,
253
- "body": text,
254
- }
255
- )
256
- except Exception:
257
- logger.error(f"HTTP error from {url}: {status} - {text}")
293
+ # Log minimal error info only
294
+ logger.error({"openai_http_error": True, "status": status})
258
295
  # For 4xx/5xx, print full sanitized request to aid debugging (especially Groq 400s)
259
- try:
260
- redacted_headers = dict(headers)
261
- if "Authorization" in redacted_headers:
262
- redacted_headers["Authorization"] = "***REDACTED***"
263
- logger.error(
264
- {
265
- "request_debug": True,
266
- "status": status,
267
- "target": url,
268
- "headers": redacted_headers,
269
- "payload": processed_request,
270
- }
271
- )
272
- except Exception:
273
- pass
296
+ # Suppress prompt/payload logging entirely
274
297
  # Special case: token budget exceeded (OpenAI-compatible error schema)
275
298
  try:
276
299
  if status == 400 and e.response is not None:
@@ -324,8 +347,6 @@ class OpenAIClient:
324
347
  logger.warning(
325
348
  {
326
349
  "token_budget_recovery": True,
327
- "messages_tokens": messages_tokens,
328
- "model_limit": model_limit,
329
350
  "retry_max_tokens": new_max,
330
351
  }
331
352
  )
@@ -348,13 +369,8 @@ class OpenAIClient:
348
369
  try:
349
370
  err = e.response.json()
350
371
  except Exception:
351
- err = {"error": "unprocessable", "detail": (text or "")[:200]}
352
- logger.warning(
353
- {
354
- "inference_422_recovered": True,
355
- "detail": err,
356
- }
357
- )
372
+ err = {"error": "unprocessable"}
373
+ logger.warning({"inference_422_recovered": True})
358
374
  except Exception:
359
375
  pass
360
376
  # Return a minimal OpenAI-compatible response with no tool_calls/content
@@ -471,6 +487,54 @@ class OpenAIClient:
471
487
  f"Inference service overloaded (400). {response_data} Retrying after {wait_time}s..."
472
488
  )
473
489
  else:
490
+ error_block = response_data.get("error")
491
+ error_code = ""
492
+ if isinstance(error_block, dict):
493
+ error_code = str(
494
+ error_block.get("code") or error_block.get("type") or ""
495
+ ).lower()
496
+ if error_code in {"tool_use_failed", "tool_call_failed"}:
497
+ logger.warning(
498
+ {
499
+ "tool_use_failed": True,
500
+ "target": (base_url or self.base_url),
501
+ "message": error_block.get("message") if isinstance(error_block, dict) else None,
502
+ }
503
+ )
504
+ fallback_actions = ["move_right", "move_up", "do"]
505
+ fallback_response = {
506
+ "id": f"fallback-{int(time.time() * 1000)}",
507
+ "object": "chat.completion",
508
+ "created": int(time.time()),
509
+ "model": processed_request.get("model"),
510
+ "choices": [
511
+ {
512
+ "index": 0,
513
+ "message": {
514
+ "role": "assistant",
515
+ "content": "",
516
+ "tool_calls": [
517
+ {
518
+ "id": f"call_fallback_{int(time.time() * 1000)}",
519
+ "type": "function",
520
+ "function": {
521
+ "name": "interact_many",
522
+ "arguments": json.dumps(
523
+ {"actions": fallback_actions}
524
+ ),
525
+ },
526
+ }
527
+ ],
528
+ },
529
+ "finish_reason": "tool_calls",
530
+ }
531
+ ],
532
+ }
533
+ if isinstance(response_data.get("usage"), dict):
534
+ fallback_response["usage"] = response_data["usage"]
535
+ if isinstance(error_block, dict):
536
+ fallback_response["error"] = error_block
537
+ return fallback_response
474
538
  # This is a different type of 400 error, don't retry
475
539
  try:
476
540
  redacted_headers = {}
@@ -9,6 +9,8 @@ from typing import Any
9
9
  from fastapi import APIRouter, HTTPException, Request
10
10
  from pydantic import BaseModel
11
11
 
12
+ from synth_ai.task.auth import allowed_environment_api_keys, normalize_environment_api_key
13
+
12
14
  from .envs.crafter.policy import CrafterPolicy
13
15
  from .inference.openai_client import create_inference_client
14
16
  from .registry import registry
@@ -435,34 +437,33 @@ async def step_policy(
435
437
  elif role == "user":
436
438
  user_prompt_records.append(record)
437
439
 
440
+ last_user_chars = (
441
+ len(user_prompt_records[-1].get("text", "")) if user_prompt_records else 0
442
+ )
438
443
  logger.info(
439
- "PROMPTS: system_msgs=%d user_msgs=%d last_user_chars=%d",
444
+ "PROMPTS: system_msgs=%d user_msgs=%d last_user_chars=%d (content suppressed)",
440
445
  len(system_prompt_records),
441
446
  len(user_prompt_records),
442
- len(user_prompt_records[-1].get("text", "")) if user_prompt_records else 0,
447
+ last_user_chars,
443
448
  )
444
449
 
445
- if system_prompt_records:
446
- logger.info("PROMPT_DUMP_SYSTEM_BEGIN")
447
- for idx, rec in enumerate(system_prompt_records):
448
- smsg = rec.get("text", "")
449
- logger.info(f"SYSTEM[{idx}]\n{smsg}")
450
- logger.info("PROMPT_DUMP_SYSTEM_END")
451
-
452
- if user_prompt_records:
453
- logger.info("PROMPT_DUMP_USER_BEGIN")
454
- for idx, rec in enumerate(user_prompt_records):
455
- umsg = rec.get("text", "")
456
- logger.info(f"USER[{idx}]\n{umsg}")
457
- logger.info("PROMPT_DUMP_USER_END")
458
- # Print concise preview for visibility in standard logs
459
- with contextlib.suppress(Exception):
460
- last_user = (
461
- user_prompt_records[-1].get("text", "")
462
- if user_prompt_records
463
- else ""
464
- )
465
- print(f"[task:crafter] user prompt: {last_user}", flush=True)
450
+ log_prompt_details = (
451
+ os.getenv("CRAFT_LOG_PROMPTS", "").strip().lower()
452
+ in {"1", "true", "yes", "debug"}
453
+ )
454
+ if log_prompt_details:
455
+ if system_prompt_records:
456
+ logger.info("PROMPT_DETAILS_SYSTEM_BEGIN")
457
+ for idx, rec in enumerate(system_prompt_records):
458
+ smsg = rec.get("text", "")
459
+ logger.info("SYSTEM[%d]: %s", idx, smsg)
460
+ logger.info("PROMPT_DETAILS_SYSTEM_END")
461
+ if user_prompt_records:
462
+ logger.info("PROMPT_DETAILS_USER_BEGIN")
463
+ for idx, rec in enumerate(user_prompt_records):
464
+ umsg = rec.get("text", "")
465
+ logger.info("USER[%d]: %s", idx, umsg)
466
+ logger.info("PROMPT_DETAILS_USER_END")
466
467
  except Exception as e:
467
468
  logger.warning(f"PROMPT_DUMP_FAILED: {e}")
468
469
 
@@ -524,15 +525,29 @@ async def step_policy(
524
525
  masked = "<masked>"
525
526
  logger.debug(f"INFERENCE_AUTH: Using bearer key {masked}")
526
527
  else:
527
- logger.warning(
528
- "INFERENCE_AUTH: No API key resolved for inference request; downstream may 401"
528
+ logger.debug(
529
+ "INFERENCE_AUTH: No bearer key resolved for inference request (expected when using in-app proxy)"
529
530
  )
530
531
 
531
532
  client = create_inference_client(task_app, api_key=api_key_override)
532
533
 
533
- # Add policy identification header for observability
534
+ # Add policy identification header and task auth for proxy fallback
534
535
  policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
535
536
  extra_headers = {"X-Policy-Name": policy_name}
537
+ try:
538
+ env_key = normalize_environment_api_key()
539
+ if not env_key:
540
+ allowed_keys = allowed_environment_api_keys()
541
+ if allowed_keys:
542
+ env_key = next(iter(sorted(allowed_keys)))
543
+ if isinstance(env_key, str) and env_key:
544
+ extra_headers["X-API-Key"] = env_key
545
+ else:
546
+ logger.warning(
547
+ "INFERENCE_AUTH: Failed to resolve ENVIRONMENT_API_KEY for proxy request headers"
548
+ )
549
+ except Exception as exc:
550
+ logger.warning(f"INFERENCE_AUTH: Error resolving ENVIRONMENT_API_KEY: {exc}")
536
551
 
537
552
  # Apply input truncation to avoid 422 from inference server
538
553
  try:
@@ -761,26 +776,7 @@ async def step_policy(
761
776
  }
762
777
 
763
778
  # Emit the exact prompt/messages and tools before calling the LLM (bounded preview)
764
- with contextlib.suppress(Exception):
765
- req_dump = meta.get("inference_request", {})
766
- msgs = req_dump.get("messages")
767
- tools_dump = req_dump.get("tools")
768
- if isinstance(msgs, list):
769
- # Print compact messages structure and tool schema with bounded length
770
- import json as _json
771
-
772
- msgs_compact = _json.dumps(msgs)[:20000]
773
- tools_compact = (
774
- _json.dumps(tools_dump)[:8000] if tools_dump is not None else None
775
- )
776
- print(
777
- {
778
- "llm.call": True,
779
- "policy": str(policy_name),
780
- "messages_preview": msgs_compact,
781
- "tools_preview": tools_compact,
782
- }
783
- )
779
+ # Do not print prompts; only log response content later
784
780
 
785
781
  # Normalize request for non-OpenAI endpoints (strict schemas)
786
782
  with contextlib.suppress(Exception):