synth-ai 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (169) hide show
  1. examples/baseline/banking77_baseline.py +204 -0
  2. examples/baseline/crafter_baseline.py +407 -0
  3. examples/baseline/pokemon_red_baseline.py +326 -0
  4. examples/baseline/simple_baseline.py +56 -0
  5. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  6. examples/blog_posts/gepa/README.md +355 -0
  7. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  9. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  10. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  13. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  15. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  16. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  18. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  19. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  20. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  21. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  22. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  23. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  24. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  25. examples/blog_posts/gepa/task_apps.py +105 -0
  26. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  27. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  28. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  29. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +12 -10
  30. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +1 -0
  31. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  32. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  33. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  34. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  35. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  36. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  37. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  38. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  39. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  40. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  41. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  42. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  43. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +1 -1
  44. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  45. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +60 -10
  46. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +1 -1
  47. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  48. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  49. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  50. examples/multi_step/configs/crafter_rl_outcome.toml +1 -0
  51. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -0
  52. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -0
  53. examples/rl/configs/rl_from_base_qwen17.toml +1 -0
  54. examples/swe/task_app/hosted/inference/openai_client.py +0 -34
  55. examples/swe/task_app/hosted/policy_routes.py +17 -0
  56. examples/swe/task_app/hosted/rollout.py +4 -2
  57. examples/task_apps/banking77/__init__.py +6 -0
  58. examples/task_apps/banking77/banking77_task_app.py +841 -0
  59. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  60. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  61. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  62. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  63. examples/task_apps/crafter/task_app/grpo_crafter.py +24 -2
  64. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  65. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +355 -58
  66. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +68 -7
  67. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +78 -21
  68. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  69. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  70. examples/task_apps/gepa_benchmarks/common.py +260 -0
  71. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  72. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  73. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  74. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  75. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  76. examples/task_apps/pokemon_red/task_app.py +254 -36
  77. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +1 -0
  78. examples/warming_up_to_rl/task_app/grpo_crafter.py +53 -4
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +152 -41
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +31 -1
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  83. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  84. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +1 -0
  85. synth_ai/api/train/builders.py +90 -1
  86. synth_ai/api/train/cli.py +396 -21
  87. synth_ai/api/train/config_finder.py +13 -2
  88. synth_ai/api/train/configs/__init__.py +15 -1
  89. synth_ai/api/train/configs/prompt_learning.py +442 -0
  90. synth_ai/api/train/configs/rl.py +29 -0
  91. synth_ai/api/train/task_app.py +1 -1
  92. synth_ai/api/train/validators.py +277 -0
  93. synth_ai/baseline/__init__.py +25 -0
  94. synth_ai/baseline/config.py +209 -0
  95. synth_ai/baseline/discovery.py +214 -0
  96. synth_ai/baseline/execution.py +146 -0
  97. synth_ai/cli/__init__.py +85 -17
  98. synth_ai/cli/__main__.py +0 -0
  99. synth_ai/cli/claude.py +70 -0
  100. synth_ai/cli/codex.py +84 -0
  101. synth_ai/cli/commands/__init__.py +1 -0
  102. synth_ai/cli/commands/baseline/__init__.py +12 -0
  103. synth_ai/cli/commands/baseline/core.py +637 -0
  104. synth_ai/cli/commands/baseline/list.py +93 -0
  105. synth_ai/cli/commands/eval/core.py +13 -10
  106. synth_ai/cli/commands/filter/core.py +53 -17
  107. synth_ai/cli/commands/help/core.py +0 -1
  108. synth_ai/cli/commands/smoke/__init__.py +7 -0
  109. synth_ai/cli/commands/smoke/core.py +1436 -0
  110. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  111. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  112. synth_ai/cli/commands/train/judge_schemas.py +1 -0
  113. synth_ai/cli/commands/train/judge_validation.py +1 -0
  114. synth_ai/cli/commands/train/validation.py +0 -57
  115. synth_ai/cli/demo.py +35 -3
  116. synth_ai/cli/deploy/__init__.py +40 -25
  117. synth_ai/cli/deploy.py +162 -0
  118. synth_ai/cli/legacy_root_backup.py +14 -8
  119. synth_ai/cli/opencode.py +107 -0
  120. synth_ai/cli/root.py +9 -5
  121. synth_ai/cli/task_app_deploy.py +1 -1
  122. synth_ai/cli/task_apps.py +53 -53
  123. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  124. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  125. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  126. synth_ai/judge_schemas.py +1 -0
  127. synth_ai/learning/__init__.py +10 -0
  128. synth_ai/learning/prompt_learning_client.py +276 -0
  129. synth_ai/learning/prompt_learning_types.py +184 -0
  130. synth_ai/pricing/__init__.py +2 -0
  131. synth_ai/pricing/model_pricing.py +57 -0
  132. synth_ai/streaming/handlers.py +53 -4
  133. synth_ai/streaming/streamer.py +19 -0
  134. synth_ai/task/apps/__init__.py +1 -0
  135. synth_ai/task/config.py +2 -0
  136. synth_ai/task/tracing_utils.py +25 -25
  137. synth_ai/task/validators.py +44 -8
  138. synth_ai/task_app_cfgs.py +21 -0
  139. synth_ai/tracing_v3/config.py +162 -19
  140. synth_ai/tracing_v3/constants.py +1 -1
  141. synth_ai/tracing_v3/db_config.py +24 -38
  142. synth_ai/tracing_v3/storage/config.py +47 -13
  143. synth_ai/tracing_v3/storage/factory.py +3 -3
  144. synth_ai/tracing_v3/turso/daemon.py +113 -11
  145. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  146. synth_ai/types.py +8 -0
  147. synth_ai/urls.py +11 -0
  148. synth_ai/utils/__init__.py +30 -1
  149. synth_ai/utils/agents.py +74 -0
  150. synth_ai/utils/bin.py +39 -0
  151. synth_ai/utils/cli.py +149 -5
  152. synth_ai/utils/env.py +17 -17
  153. synth_ai/utils/json.py +72 -0
  154. synth_ai/utils/modal.py +283 -1
  155. synth_ai/utils/paths.py +48 -0
  156. synth_ai/utils/uvicorn.py +113 -0
  157. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/METADATA +102 -4
  158. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/RECORD +162 -88
  159. synth_ai/cli/commands/deploy/__init__.py +0 -23
  160. synth_ai/cli/commands/deploy/core.py +0 -614
  161. synth_ai/cli/commands/deploy/errors.py +0 -72
  162. synth_ai/cli/commands/deploy/validation.py +0 -11
  163. synth_ai/cli/deploy/core.py +0 -5
  164. synth_ai/cli/deploy/errors.py +0 -23
  165. synth_ai/cli/deploy/validation.py +0 -5
  166. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  167. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  168. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  169. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,107 @@
1
+ import subprocess
2
+ from pathlib import Path
3
+
4
+ import click
5
+ from synth_ai.types import MODEL_NAMES, ModelName
6
+ from synth_ai.urls import BACKEND_URL_SYNTH_RESEARCH_BASE
7
+ from synth_ai.utils import (
8
+ create_and_write_json,
9
+ find_bin_path,
10
+ install_bin,
11
+ load_json_to_dict,
12
+ resolve_env_var,
13
+ verify_bin,
14
+ write_agents_md,
15
+ )
16
+
17
+ CONFIG_PATH = Path.home() / ".config" / "opencode" / "opencode.json"
18
+ AUTH_PATH = Path.home() / ".local" / "share" / "opencode" / "auth.json"
19
+ SYNTH_PROVIDER_ID = "synth"
20
+
21
+
22
+ @click.command("opencode")
23
+ @click.option(
24
+ "--model",
25
+ "model_name",
26
+ type=str,
27
+ default=None
28
+ )
29
+ @click.option(
30
+ "--force",
31
+ is_flag=True,
32
+ help="Prompt for API keys even if cached values exist."
33
+ )
34
+ @click.option(
35
+ "--url",
36
+ "override_url",
37
+ type=str,
38
+ default=None,
39
+ )
40
+ def opencode_cmd(
41
+ model_name: ModelName | None = None,
42
+ force: bool = False,
43
+ override_url: str | None = None
44
+ ) -> None:
45
+
46
+ while True:
47
+ bin_path = find_bin_path("opencode")
48
+ if bin_path:
49
+ break
50
+ if not install_bin(
51
+ "OpenCode",
52
+ [
53
+ "brew install opencode",
54
+ "bun add -g opencode-ai",
55
+ "curl -fsSL https://opencode.ai/install | bash",
56
+ "npm i -g opencode-ai",
57
+ "paru -S opencode"
58
+ ]
59
+ ):
60
+ print("Failed to find your installed OpenCode")
61
+ print("Please install from: https://opencode.ai")
62
+ return
63
+ print(f"Using OpenCode at {bin_path}")
64
+
65
+ if not verify_bin(bin_path):
66
+ print("Failed to verify OpenCode is runnable")
67
+ return
68
+
69
+ write_agents_md()
70
+
71
+ if model_name is not None:
72
+ if model_name not in MODEL_NAMES:
73
+ raise ValueError(
74
+ f"model_name={model_name} is invalid. Valid values for model_name: {MODEL_NAMES}"
75
+ )
76
+ synth_api_key = resolve_env_var("SYNTH_API_KEY", override_process_env=force)
77
+ data = load_json_to_dict(AUTH_PATH)
78
+ good_entry = {
79
+ "type": "api",
80
+ "key": synth_api_key,
81
+ }
82
+ if data.get(SYNTH_PROVIDER_ID) != good_entry:
83
+ data[SYNTH_PROVIDER_ID] = good_entry
84
+ create_and_write_json(AUTH_PATH, data)
85
+ config = load_json_to_dict(CONFIG_PATH)
86
+ config.setdefault("$schema", "https://opencode.ai/config.json")
87
+ if override_url:
88
+ url = override_url
89
+ print("Using override URL:", url)
90
+ else:
91
+ url = BACKEND_URL_SYNTH_RESEARCH_BASE
92
+ provider_section = config.setdefault("provider", {})
93
+ synth_provider = provider_section.setdefault(SYNTH_PROVIDER_ID, {})
94
+ synth_provider["npm"] = "@ai-sdk/openai-compatible"
95
+ synth_provider.setdefault("name", "Synth")
96
+ models = synth_provider.setdefault("models", {})
97
+ models.setdefault(model_name, {})
98
+ options = synth_provider.setdefault("options", {})
99
+ options["baseURL"] = url
100
+ full_model_name = f"{SYNTH_PROVIDER_ID}/{model_name}"
101
+ config["model"] = full_model_name
102
+ create_and_write_json(CONFIG_PATH, config)
103
+
104
+ try:
105
+ subprocess.run([str(bin_path)], check=True)
106
+ except subprocess.CalledProcessError:
107
+ print("Failed to launch OpenCode")
synth_ai/cli/root.py CHANGED
@@ -158,7 +158,7 @@ def cli():
158
158
 
159
159
  @cli.command()
160
160
  @click.option("--db-file", default="traces/v3/synth_ai.db", help="Database file path")
161
- @click.option("--sqld-port", default=8080, type=int, help="Port for sqld HTTP interface")
161
+ @click.option("--sqld-port", default=8080, type=int, help="Port for sqld Hrana WebSocket interface (HTTP API will be port+1)")
162
162
  @click.option("--env-port", default=8901, type=int, help="Port for environment service")
163
163
  @click.option("--no-sqld", is_flag=True, help="Skip starting sqld daemon")
164
164
  @click.option("--no-env", is_flag=True, help="Skip starting environment service")
@@ -204,21 +204,25 @@ def serve_deprecated(
204
204
 
205
205
  if not no_sqld:
206
206
  try:
207
+ hrana_port = sqld_port
208
+ http_port = sqld_port + 1
207
209
  result = subprocess.run(
208
- ["pgrep", "-f", f"sqld.*--http-listen-addr.*:{sqld_port}"],
210
+ ["pgrep", "-f", f"sqld.*(--hrana-listen-addr.*:{hrana_port}|--http-listen-addr.*:{http_port})"],
209
211
  capture_output=True,
210
212
  text=True,
211
213
  )
212
214
  if result.returncode != 0:
213
215
  sqld_bin = find_sqld_binary() or install_sqld()
214
- click.echo(f"🗄️ Starting sqld (local only) on port {sqld_port}")
216
+ click.echo(f"🗄️ Starting sqld (local only) on hrana port {hrana_port}, HTTP API port {http_port}")
215
217
  proc = subprocess.Popen(
216
218
  [
217
219
  sqld_bin,
218
220
  "--db-path",
219
221
  db_file,
222
+ "--hrana-listen-addr",
223
+ f"127.0.0.1:{hrana_port}",
220
224
  "--http-listen-addr",
221
- f"127.0.0.1:{sqld_port}",
225
+ f"127.0.0.1:{http_port}",
222
226
  ],
223
227
  stdout=open("sqld.log", "w"), # noqa: SIM115
224
228
  stderr=subprocess.STDOUT,
@@ -274,7 +278,7 @@ def serve_deprecated(
274
278
  click.echo(f" Working directory: {os.getcwd()}")
275
279
  click.echo("")
276
280
  click.echo("🔄 Starting services...")
277
- click.echo(f" - sqld daemon: http://127.0.0.1:{sqld_port}")
281
+ click.echo(f" - sqld daemon: libsql://127.0.0.1:{sqld_port} (HTTP API: http://127.0.0.1:{sqld_port + 1})")
278
282
  click.echo(f" - Environment service: http://127.0.0.1:{env_port}")
279
283
  click.echo("")
280
284
  click.echo("💡 Tips:")
@@ -2,6 +2,6 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from synth_ai.cli.commands.deploy import command as deploy_command
5
+ from synth_ai.cli.deploy import deploy_cmd as deploy_command # type: ignore[attr-defined]
6
6
 
7
7
  __all__ = ["deploy_command"]
synth_ai/cli/task_apps.py CHANGED
@@ -32,7 +32,6 @@ except Exception: # pragma: no cover - fallback
32
32
 
33
33
  import click
34
34
  from click.exceptions import Abort
35
- from synth_ai.cli.commands import deploy as _deploy_commands
36
35
  from synth_ai.cli.commands.eval import core as eval_core
37
36
  from synth_ai.cli.commands.filter import core as filter_core
38
37
 
@@ -269,20 +268,25 @@ def _markov_message_from_dict(payload: dict[str, Any]) -> SessionEventMarkovBlan
269
268
  json_payload=content_payload.get("json_payload"),
270
269
  )
271
270
  raw_type = (payload.get("message_type") or "").lower()
272
- if raw_type == "observation":
271
+ original_type = payload.get("message_type") or raw_type
272
+
273
+ if raw_type in ("observation", "policy_system_prompt"):
273
274
  normalized_type = "system"
274
- elif raw_type == "action":
275
+ elif raw_type in ("action", "policy_tool_call"):
275
276
  normalized_type = "assistant"
276
277
  elif raw_type in {"user", "assistant", "system", "tool_use", "tool_result"}:
277
278
  normalized_type = raw_type
278
279
  else:
279
280
  normalized_type = "system"
280
281
 
282
+ metadata = dict(payload.get("metadata") or {})
283
+ metadata["original_message_type"] = original_type
284
+
281
285
  return SessionEventMarkovBlanketMessage(
282
286
  content=content,
283
287
  message_type=normalized_type,
284
288
  time_record=_time_record_from_dict(payload.get("time_record")),
285
- metadata=payload.get("metadata") or {},
289
+ metadata=metadata,
286
290
  )
287
291
 
288
292
 
@@ -354,12 +358,8 @@ async def _store_trace(
354
358
  _logger.info(f"[STORE_TRACE_DEBUG] Called with tracer={tracer is not None}, trace_namespace={trace_namespace is not None}")
355
359
 
356
360
  if tracer is None or not isinstance(trace_namespace, dict):
357
- message = (
358
- f"Trace storage requires a tracer instance and dict payload. "
359
- f"Got tracer_present={tracer is not None}, payload_type={type(trace_namespace)}"
360
- )
361
- _logger.error("[STORE_TRACE_DEBUG] %s", message)
362
- raise ValueError(message)
361
+ _logger.warning(f"[STORE_TRACE_DEBUG] Early return: tracer={tracer is not None}, trace_namespace type={type(trace_namespace)}")
362
+ return
363
363
 
364
364
  _logger.info(f"[STORE_TRACE_DEBUG] trace_namespace keys: {list(trace_namespace.keys())}")
365
365
 
@@ -373,13 +373,8 @@ async def _store_trace(
373
373
  session_payload = trace_namespace
374
374
  _logger.info("[STORE_TRACE_DEBUG] Using trace_namespace directly as session_payload (no session_trace key)")
375
375
  else:
376
- message = (
377
- "Trace payload did not contain a 'session_trace' dict and lacked top-level "
378
- "session fields (session_id, markov_blanket_message_history). "
379
- f"Payload keys: {list(trace_namespace.keys())}"
380
- )
381
- _logger.error("[STORE_TRACE_DEBUG] %s", message)
382
- raise ValueError(message)
376
+ _logger.warning(f"[STORE_TRACE_DEBUG] No session_trace found or wrong type: {type(session_payload)}")
377
+ return
383
378
 
384
379
  _logger.info(f"[STORE_TRACE_DEBUG] session_payload keys: {list(session_payload.keys())}")
385
380
  msg_count = len(session_payload.get("markov_blanket_message_history", []))
@@ -387,26 +382,8 @@ async def _store_trace(
387
382
 
388
383
  trace_obj = _session_trace_from_dict(session_payload)
389
384
  if trace_obj is None:
390
- message = "Session trace payload could not be parsed into a SessionTrace object."
391
- _logger.error("[STORE_TRACE_DEBUG] %s", message)
392
- raise ValueError(message)
393
-
394
- if not trace_obj.markov_blanket_message_history:
395
- message = (
396
- "Session trace is missing markov_blanket_message_history; "
397
- "eval output must include all prompts/tool calls. "
398
- f"session_id={trace_obj.session_id}"
399
- )
400
- _logger.error("[STORE_TRACE_DEBUG] %s", message)
401
- raise ValueError(message)
402
-
403
- if not trace_obj.event_history:
404
- message = (
405
- "Session trace is missing event_history; rollout should emit environment/LLM events. "
406
- f"session_id={trace_obj.session_id}"
407
- )
408
- _logger.error("[STORE_TRACE_DEBUG] %s", message)
409
- raise ValueError(message)
385
+ _logger.warning("[STORE_TRACE_DEBUG] _session_trace_from_dict returned None")
386
+ return
410
387
 
411
388
  _logger.info(f"[STORE_TRACE_DEBUG] Created SessionTrace object with {len(trace_obj.markov_blanket_message_history)} messages")
412
389
 
@@ -2454,16 +2431,7 @@ def serve_command(
2454
2431
  trace_dir: str | None,
2455
2432
  trace_db: str | None,
2456
2433
  ) -> None:
2457
- _deploy_commands.run_uvicorn_runtime(
2458
- app_id,
2459
- host,
2460
- port,
2461
- env_file,
2462
- reload_flag,
2463
- force,
2464
- trace_dir,
2465
- trace_db,
2466
- )
2434
+ return None
2467
2435
 
2468
2436
 
2469
2437
  @task_app_group.command("info")
@@ -2575,20 +2543,52 @@ def serve_task_group(
2575
2543
  trace_dir: str | None,
2576
2544
  trace_db: str | None,
2577
2545
  ) -> None:
2578
- _deploy_commands.run_uvicorn_runtime(
2579
- app_id,
2546
+ """Serve a TaskAppConfig-based task app using uvicorn."""
2547
+ import contextlib
2548
+
2549
+ if not host:
2550
+ host = "0.0.0.0"
2551
+
2552
+ if port is None:
2553
+ port = 8001
2554
+
2555
+ # Auto-enable tracing by default
2556
+ try:
2557
+ auto_trace = os.getenv("SYNTH_AUTO_TRACE", "1")
2558
+ auto_trace_enabled = auto_trace not in {"0", "false", "False", ""}
2559
+ except Exception:
2560
+ auto_trace_enabled = True
2561
+
2562
+ if auto_trace_enabled:
2563
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
2564
+ if trace_dir is None:
2565
+ default_trace_dir = (demo_base / "traces" / "v3").resolve()
2566
+ with contextlib.suppress(Exception):
2567
+ default_trace_dir.mkdir(parents=True, exist_ok=True)
2568
+ trace_dir = str(default_trace_dir)
2569
+ click.echo(f"[trace] Using trace directory: {trace_dir}")
2570
+ if trace_dir and trace_db is None:
2571
+ default_trace_db = (Path(trace_dir) / "synth_ai.db").resolve()
2572
+ with contextlib.suppress(Exception):
2573
+ default_trace_db.parent.mkdir(parents=True, exist_ok=True)
2574
+ trace_db = str(default_trace_db)
2575
+ click.echo(f"[trace] Using trace DB: {trace_db}")
2576
+
2577
+ # Select and serve the app
2578
+ choice = _select_app_choice(app_id, purpose="serve")
2579
+ entry = choice.ensure_entry()
2580
+ _serve_entry(
2581
+ entry,
2580
2582
  host,
2581
2583
  port,
2582
2584
  env_file,
2583
2585
  reload_flag,
2584
2586
  force,
2585
- trace_dir,
2586
- trace_db,
2587
+ trace_dir=trace_dir,
2588
+ trace_db=trace_db,
2587
2589
  )
2588
2590
 
2589
2591
 
2590
- _deploy_commands.register_task_app_commands(task_app_group)
2591
-
2592
2592
 
2593
2593
  def _determine_env_files(
2594
2594
  entry: TaskAppEntryType, user_env_files: Sequence[str], *, original_path: Path | None = None
@@ -6,16 +6,18 @@ the hash-based set-iteration nondeterminism that caused the drift.
6
6
  """
7
7
 
8
8
  import collections
9
+ import os
9
10
 
10
11
  import crafter
11
12
 
12
- print("[PATCH] Attempting to apply Crafter deterministic patch...")
13
+ # Patch messages permanently disabled to reduce noise
14
+ # print("[PATCH] Attempting to apply Crafter deterministic patch...")
13
15
 
14
16
  # -----------------------------------------------------------------------------
15
17
  # 1. Make per–chunk object order stable
16
18
  # -----------------------------------------------------------------------------
17
19
  if not hasattr(crafter.Env, "_orig_balance_object"):
18
- print("[PATCH] Patching crafter.Env._balance_object...")
20
+ # print("[PATCH] Patching crafter.Env._balance_object...")
19
21
  crafter.Env._orig_balance_object = crafter.Env._balance_object
20
22
 
21
23
  def _balance_object_det(self, chunk, objs, *args, **kwargs):
@@ -25,9 +27,10 @@ if not hasattr(crafter.Env, "_orig_balance_object"):
25
27
  return crafter.Env._orig_balance_object(self, chunk, objs, *args, **kwargs)
26
28
 
27
29
  crafter.Env._balance_object = _balance_object_det
28
- print("[PATCH] crafter.Env._balance_object patched.")
30
+ # print("[PATCH] crafter.Env._balance_object patched.")
29
31
  else:
30
- print("[PATCH] crafter.Env._balance_object already patched or _orig_balance_object exists.")
32
+ pass
33
+ # print("[PATCH] crafter.Env._balance_object already patched or _orig_balance_object exists.")
31
34
 
32
35
  # -----------------------------------------------------------------------------
33
36
  # 2. Make *chunk* iteration order stable
@@ -4,6 +4,7 @@ This version handles player references for Zombie and Skeleton objects.
4
4
  """
5
5
 
6
6
  import collections
7
+ import os
7
8
  import pickle
8
9
  from typing import Any, Dict, Optional, Set
9
10
 
@@ -11,11 +12,12 @@ import crafter
11
12
  import numpy as np
12
13
  from crafter import objects
13
14
 
14
- print("[PATCH] Attempting to apply Crafter serialization patch v3...")
15
+ # Patch messages permanently disabled
16
+ # print("[PATCH] Attempting to apply Crafter serialization patch v3...")
15
17
 
16
18
  # Check if already patched
17
19
  if not hasattr(crafter.Env, "save"):
18
- print("[PATCH] Adding enhanced save/load methods to crafter.Env...")
20
+ # print("[PATCH] Adding enhanced save/load methods to crafter.Env...")
19
21
 
20
22
  def _save(self) -> Dict[str, Any]:
21
23
  """Save complete environment state including all details."""
@@ -260,8 +262,10 @@ if not hasattr(crafter.Env, "save"):
260
262
  crafter.Env.save = _save
261
263
  crafter.Env.load = _load
262
264
 
263
- print("[PATCH] crafter.Env.save() and load() methods added (v3).")
265
+ pass
266
+ # print("[PATCH] crafter.Env.save() and load() methods added (v3).")
264
267
  else:
265
- print("[PATCH] crafter.Env already has save/load methods.")
268
+ pass
269
+ # print("[PATCH] crafter.Env already has save/load methods.")
266
270
 
267
- print("[PATCH] Crafter serialization patch v3 complete.")
271
+ # print("[PATCH] Crafter serialization patch v3 complete.")
@@ -9,7 +9,8 @@ from typing import Any, Dict, Optional
9
9
 
10
10
  import crafter
11
11
 
12
- print("[PATCH] Attempting to apply simplified Crafter world configuration patch...")
12
+ # Patch messages permanently disabled
13
+ # print("[PATCH] Attempting to apply simplified Crafter world configuration patch...")
13
14
 
14
15
  # World configuration presets
15
16
  WORLD_CONFIGS = {
@@ -279,8 +280,8 @@ def patched_env_init(
279
280
 
280
281
  crafter.Env.__init__ = patched_env_init
281
282
 
282
- print("[PATCH] Simplified Crafter world configuration patch complete.")
283
- print("[PATCH] Available configs: easy, normal, hard, peaceful")
283
+ # print("[PATCH] Simplified Crafter world configuration patch complete.")
284
+ # print("[PATCH] Available configs: easy, normal, hard, peaceful")
284
285
 
285
286
  # Example custom config
286
287
  EXAMPLE_CUSTOM_CONFIG = {
synth_ai/judge_schemas.py CHANGED
@@ -124,3 +124,4 @@ class JudgeScoreRequest(BaseModel):
124
124
  trace: JudgeTracePayload = Field(..., description="Trajectory trace to evaluate")
125
125
  options: JudgeOptions = Field(default_factory=lambda: JudgeOptions(), description="Judge options")
126
126
  rubric: Optional[dict[str, Any]] = Field(None, description="Optional explicit rubric criteria")
127
+
@@ -3,6 +3,12 @@ from synth_ai.task import task_app_health, validate_task_app_url
3
3
  from .client import LearningClient
4
4
  from .health import backend_health, balance_autumn_normalized, pricing_preflight
5
5
  from .jobs import JobHandle, JobsApiResolver
6
+ from .prompt_learning_client import (
7
+ PromptLearningClient,
8
+ get_prompt_text,
9
+ get_prompts,
10
+ get_scoring_summary,
11
+ )
6
12
  from .rl import (
7
13
  MAX_ENVIRONMENT_API_KEY_BYTES,
8
14
  RlClient,
@@ -32,6 +38,10 @@ __all__ = [
32
38
  "FtClient",
33
39
  "SFTJobConfig",
34
40
  "prepare_sft_job_payload",
41
+ "PromptLearningClient",
42
+ "get_prompts",
43
+ "get_prompt_text",
44
+ "get_scoring_summary",
35
45
  "RolloutEnvSpec",
36
46
  "RolloutPolicySpec",
37
47
  "RolloutRecordConfig",