synth-ai 0.2.9.dev3__py3-none-any.whl → 0.2.9.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (107) hide show
  1. examples/analyze_semantic_words.sh +17 -0
  2. examples/common_old/backend.py +21 -0
  3. examples/crafter_debug_render.py +180 -0
  4. examples/evals_old/README.md +98 -0
  5. examples/evals_old/__init__.py +6 -0
  6. examples/evals_old/compare_models.py +1037 -0
  7. examples/evals_old/example_log.md +145 -0
  8. examples/evals_old/run_demo.sh +126 -0
  9. examples/evals_old/trace_analysis.py +270 -0
  10. examples/finetuning_old/_backup_synth_qwen/config.toml +29 -0
  11. examples/finetuning_old/_backup_synth_qwen/example_log.md +324 -0
  12. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +60 -0
  13. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +239 -0
  14. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +109 -0
  15. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +1924 -0
  16. examples/finetuning_old/_backup_synth_qwen/readme.md +49 -0
  17. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +114 -0
  18. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +195 -0
  19. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +118 -0
  20. examples/finetuning_old/synth_qwen_v1/README.md +68 -0
  21. examples/finetuning_old/synth_qwen_v1/filter_traces.py +60 -0
  22. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +239 -0
  23. examples/finetuning_old/synth_qwen_v1/finetune.py +46 -0
  24. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +71 -0
  25. examples/finetuning_old/synth_qwen_v1/infer.py +37 -0
  26. examples/finetuning_old/synth_qwen_v1/poll.py +44 -0
  27. examples/finetuning_old/synth_qwen_v1/prepare_data.py +35 -0
  28. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +109 -0
  29. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +1932 -0
  30. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +207 -0
  31. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +232 -0
  32. examples/finetuning_old/synth_qwen_v1/upload_data.py +34 -0
  33. examples/finetuning_old/synth_qwen_v1/util.py +147 -0
  34. examples/rl/README.md +169 -0
  35. examples/rl/configs/eval_base_qwen.toml +15 -0
  36. examples/rl/configs/eval_rl_qwen.toml +11 -0
  37. examples/rl/configs/rl_from_base_qwen.toml +35 -0
  38. examples/rl/configs/rl_from_base_qwen17.toml +74 -0
  39. examples/rl/configs/rl_from_ft_qwen.toml +35 -0
  40. examples/rl/download_dataset.py +64 -0
  41. examples/rl/run_eval.py +435 -0
  42. examples/rl/run_rl_and_save.py +94 -0
  43. examples/rl/task_app/README.md +22 -0
  44. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +8 -8
  45. examples/rl/task_app/math_task_app.py +107 -0
  46. examples/rl_old/task_app.py +962 -0
  47. examples/run_crafter_demo.sh +10 -0
  48. examples/warming_up_to_rl/analyze_trace_db.py +420 -0
  49. examples/warming_up_to_rl/configs/crafter_fft.toml +48 -0
  50. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  51. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +20 -0
  52. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +13 -0
  53. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +23 -0
  54. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +73 -0
  55. examples/warming_up_to_rl/configs/rl_from_ft.toml +56 -0
  56. examples/warming_up_to_rl/export_trace_sft.py +541 -0
  57. examples/warming_up_to_rl/groq_test.py +88 -0
  58. examples/warming_up_to_rl/manage_secrets.py +127 -0
  59. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  60. examples/warming_up_to_rl/old/notes.md +73 -0
  61. examples/warming_up_to_rl/readme.md +172 -0
  62. examples/warming_up_to_rl/run_eval.py +434 -0
  63. examples/warming_up_to_rl/run_fft_and_save.py +309 -0
  64. examples/warming_up_to_rl/run_local_rollout.py +188 -0
  65. examples/warming_up_to_rl/run_local_rollout_modal.py +160 -0
  66. examples/warming_up_to_rl/run_local_rollout_parallel.py +342 -0
  67. examples/warming_up_to_rl/run_local_rollout_traced.py +372 -0
  68. examples/warming_up_to_rl/run_rl_and_save.py +101 -0
  69. examples/warming_up_to_rl/run_rollout_remote.py +129 -0
  70. examples/warming_up_to_rl/task_app/README.md +38 -0
  71. {synth_ai/task/apps → examples/warming_up_to_rl/task_app}/grpo_crafter.py +7 -7
  72. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +165 -0
  73. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  74. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  75. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +145 -0
  76. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1271 -0
  77. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  78. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +429 -0
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +442 -0
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +96 -0
  83. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +302 -0
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +202 -0
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +512 -0
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +102 -0
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +985 -0
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +197 -0
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1749 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +217 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +160 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +146 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +58 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +61 -0
  98. synth_ai/api/train/config_finder.py +18 -18
  99. synth_ai/api/train/env_resolver.py +28 -1
  100. synth_ai/cli/task_apps.py +264 -55
  101. synth_ai/task/apps/__init__.py +54 -13
  102. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/METADATA +1 -1
  103. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/RECORD +107 -12
  104. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/top_level.txt +1 -0
  105. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/WHEEL +0 -0
  106. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/entry_points.txt +0 -0
  107. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,10 @@
1
+ #!/bin/bash
2
+
3
+ # Run a Crafter agent demo with Gemini
4
+ # This script demonstrates a reactive agent in the Crafter environment
5
+
6
+ echo "🚀 Starting Crafter agent demo with Gemini 1.5 Flash..."
7
+ echo "Make sure the synth-ai service is running: uvx synth-ai serve"
8
+ echo ""
9
+
10
+ uv run python -m synth_ai.environments.examples.crafter_classic.agent_demos.test_crafter_react_agent --model gemini-1.5-flash
@@ -0,0 +1,420 @@
1
+ #!/usr/bin/env python3
2
+ """Summarise tracing_v3 SQLite data (models, rewards, achievements)."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import argparse
7
+ import json
8
+ import sqlite3
9
+ import sys
10
+ from collections import Counter, defaultdict
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Set, Tuple
13
+
14
+ Row = sqlite3.Row
15
+
16
+
17
+ def connect(db_path: Path) -> sqlite3.Connection:
18
+ conn = sqlite3.connect(str(db_path))
19
+ conn.row_factory = sqlite3.Row
20
+ return conn
21
+
22
+
23
+ def fetch_model_usage(conn: sqlite3.Connection) -> list[dict[str, Any]]:
24
+ rows = conn.execute(
25
+ """
26
+ SELECT
27
+ model_name,
28
+ provider,
29
+ COUNT(*) AS calls,
30
+ COALESCE(SUM(total_tokens), 0) AS total_tokens,
31
+ COALESCE(SUM(input_tokens), 0) AS input_tokens,
32
+ COALESCE(SUM(output_tokens), 0) AS output_tokens,
33
+ COALESCE(AVG(latency_ms), 0) AS avg_latency_ms
34
+ FROM events
35
+ WHERE event_type = 'cais' AND model_name IS NOT NULL
36
+ GROUP BY model_name, provider
37
+ ORDER BY calls DESC
38
+ """
39
+ ).fetchall()
40
+ stats: list[dict[str, Any]] = []
41
+ for row in rows:
42
+ stats.append(
43
+ {
44
+ "model_name": row["model_name"],
45
+ "provider": row["provider"],
46
+ "calls": int(row["calls"] or 0),
47
+ "total_tokens": int(row["total_tokens"] or 0),
48
+ "input_tokens": int(row["input_tokens"] or 0),
49
+ "output_tokens": int(row["output_tokens"] or 0),
50
+ "avg_latency_ms": float(row["avg_latency_ms"] or 0.0),
51
+ }
52
+ )
53
+ return stats
54
+
55
+
56
+ def _parse_json(value: Any) -> Any:
57
+ if value is None:
58
+ return None
59
+ if isinstance(value, (dict, list)):
60
+ return value
61
+ try:
62
+ return json.loads(value)
63
+ except Exception:
64
+ return None
65
+
66
+
67
+ AchievementMap = dict[Tuple[str, int], dict[str, list[str]]]
68
+
69
+
70
+ def fetch_achievement_data(
71
+ conn: sqlite3.Connection,
72
+ ) -> tuple[
73
+ AchievementMap,
74
+ Counter,
75
+ Counter,
76
+ Counter,
77
+ dict[str, set[str]],
78
+ dict[str, set[str]],
79
+ ]:
80
+ """Return per-turn achievement map and summary counters.
81
+
82
+ Returns:
83
+ achievements_map: {(session_id, turn) -> {"unique": [...], "all": [...]}}
84
+ unique_counts_per_session: Counter mapping session -> total unique achievements
85
+ achievement_name_counts: Counter mapping achievement name -> occurrences (unique)
86
+ achievement_size_counts: Counter mapping number of unique achievements per session -> frequency
87
+ """
88
+
89
+ achievements_map: AchievementMap = defaultdict(lambda: {"unique": [], "all": []})
90
+ session_unique_sets: dict[str, set[str]] = defaultdict(set)
91
+ session_final_achievements: dict[str, set[str]] = defaultdict(set)
92
+ achievement_name_counts: Counter = Counter()
93
+
94
+ # Unique achievements (reward_type = unique_achievement_delta)
95
+ rows = conn.execute(
96
+ """
97
+ SELECT er.session_id, er.reward_value, er.annotation, ev.metadata
98
+ FROM event_rewards er
99
+ JOIN events ev ON er.event_id = ev.id
100
+ WHERE er.reward_type = 'unique_achievement_delta' AND er.reward_value > 0
101
+ """
102
+ ).fetchall()
103
+ for row in rows:
104
+ session_id = row["session_id"]
105
+ annotation = _parse_json(row["annotation"]) or {}
106
+ metadata = _parse_json(row["metadata"]) or {}
107
+ turn = metadata.get("turn")
108
+ if turn is None:
109
+ continue
110
+ new_unique = annotation.get("new_unique") or []
111
+ if not isinstance(new_unique, list):
112
+ continue
113
+ if new_unique:
114
+ achievements_map[(session_id, int(turn))]["unique"].extend(new_unique)
115
+ session_unique_sets[session_id].update(new_unique)
116
+
117
+ # All achievements (reward_type = achievement_delta)
118
+ rows = conn.execute(
119
+ """
120
+ SELECT er.session_id, er.reward_value, er.annotation, ev.metadata
121
+ FROM event_rewards er
122
+ JOIN events ev ON er.event_id = ev.id
123
+ WHERE er.reward_type = 'achievement_delta' AND er.reward_value > 0
124
+ """
125
+ ).fetchall()
126
+ for row in rows:
127
+ session_id = row["session_id"]
128
+ annotation = _parse_json(row["annotation"]) or {}
129
+ metadata = _parse_json(row["metadata"]) or {}
130
+ turn = metadata.get("turn")
131
+ if turn is None:
132
+ continue
133
+ turned_true = annotation.get("turned_true") or []
134
+ if not isinstance(turned_true, list):
135
+ continue
136
+ if turned_true:
137
+ achievements_map[(session_id, int(turn))]["all"].extend(turned_true)
138
+
139
+ # Fallback to outcome rewards metadata to capture final achievements
140
+ rows = conn.execute(
141
+ """
142
+ SELECT session_id, reward_metadata
143
+ FROM outcome_rewards
144
+ WHERE reward_metadata IS NOT NULL
145
+ """
146
+ ).fetchall()
147
+ for row in rows:
148
+ session_id = row["session_id"]
149
+ metadata = _parse_json(row["reward_metadata"])
150
+ if not isinstance(metadata, dict):
151
+ continue
152
+ final_achievements = metadata.get("achievements") or []
153
+ if isinstance(final_achievements, list):
154
+ cleaned = [a for a in final_achievements if isinstance(a, str)]
155
+ session_unique_sets[session_id].update(cleaned)
156
+ session_final_achievements[session_id].update(cleaned)
157
+
158
+ # Build counters from the unique sets
159
+ unique_counts_per_session: Counter = Counter()
160
+ for session_id, achievement_set in session_unique_sets.items():
161
+ unique_counts_per_session[session_id] = len(achievement_set)
162
+ achievement_name_counts.update(achievement_set)
163
+
164
+ achievement_size_counts: Counter = Counter()
165
+ for session_id, count in unique_counts_per_session.items():
166
+ achievement_size_counts[count] += 1
167
+
168
+ return (
169
+ achievements_map,
170
+ unique_counts_per_session,
171
+ achievement_name_counts,
172
+ achievement_size_counts,
173
+ session_unique_sets,
174
+ session_final_achievements,
175
+ )
176
+
177
+
178
+ def fetch_reward_summary(conn: sqlite3.Connection) -> tuple[dict[str, Any], list[dict[str, Any]]]:
179
+ """Aggregate reward information from outcome_rewards and event_rewards."""
180
+
181
+ outcome_row = conn.execute(
182
+ """
183
+ SELECT
184
+ COUNT(*) AS episodes,
185
+ COALESCE(SUM(total_reward), 0) AS total_reward,
186
+ COALESCE(AVG(total_reward), 0) AS avg_reward,
187
+ COALESCE(MIN(total_reward), 0) AS min_reward,
188
+ COALESCE(MAX(total_reward), 0) AS max_reward
189
+ FROM outcome_rewards
190
+ """
191
+ ).fetchone()
192
+
193
+ reward_breakdown_rows = conn.execute(
194
+ """
195
+ SELECT
196
+ reward_type,
197
+ COUNT(*) AS events,
198
+ COALESCE(SUM(reward_value), 0) AS total_value,
199
+ COALESCE(AVG(reward_value), 0) AS avg_value
200
+ FROM event_rewards
201
+ GROUP BY reward_type
202
+ ORDER BY events DESC
203
+ """
204
+ ).fetchall()
205
+
206
+ breakdown: list[dict[str, Any]] = []
207
+ for row in reward_breakdown_rows:
208
+ breakdown.append(
209
+ {
210
+ "reward_type": row["reward_type"],
211
+ "events": int(row["events"] or 0),
212
+ "total_value": float(row["total_value"] or 0.0),
213
+ "avg_value": float(row["avg_value"] or 0.0),
214
+ }
215
+ )
216
+
217
+ outcome = {
218
+ "episodes": int(outcome_row["episodes"] or 0),
219
+ "total_reward": float(outcome_row["total_reward"] or 0.0),
220
+ "avg_reward": float(outcome_row["avg_reward"] or 0.0),
221
+ "min_reward": float(outcome_row["min_reward"] or 0.0),
222
+ "max_reward": float(outcome_row["max_reward"] or 0.0),
223
+ }
224
+
225
+ return outcome, breakdown
226
+
227
+
228
+ def format_model_stats(stats: list[dict[str, Any]]) -> str:
229
+ if not stats:
230
+ return "No model usage recorded."
231
+ lines = ["Model usage (by LLM calls):"]
232
+ header = f"{'Model':30} {'Provider':10} {'Calls':>7} {'Tokens (in/out)':>20} {'Avg latency ms':>15}"
233
+ lines.append(header)
234
+ lines.append("-" * len(header))
235
+ for item in stats:
236
+ lines.append(
237
+ f"{item['model_name'][:30]:30} "
238
+ f"{(item['provider'] or '')[:10]:10} "
239
+ f"{item['calls']:7d} "
240
+ f"{item['total_tokens']:10d}/{item['output_tokens']:>8d} "
241
+ f"{item['avg_latency_ms']:15.1f}"
242
+ )
243
+ return "\n".join(lines)
244
+
245
+
246
+ def format_achievement_summary(
247
+ name_counts: Counter, size_counts: Counter
248
+ ) -> str:
249
+ lines = ["Unique achievements unlocked:"]
250
+ if name_counts:
251
+ top = name_counts.most_common()
252
+ for name, count in top:
253
+ lines.append(f" {name:25} -> {count}")
254
+ else:
255
+ lines.append(" (none recorded)")
256
+
257
+ lines.append("")
258
+ lines.append("Sessions bucketed by unique achievement count:")
259
+ if size_counts:
260
+ for size in sorted(size_counts):
261
+ lines.append(f" {size:2d} unique -> {size_counts[size]} session(s)")
262
+ else:
263
+ lines.append(" (no sessions with achievements)")
264
+ return "\n".join(lines)
265
+
266
+
267
+ def format_reward_summary(outcome: dict[str, Any], breakdown: list[dict[str, Any]]) -> str:
268
+ lines = ["Episode outcome rewards:"]
269
+ episodes = outcome.get("episodes", 0)
270
+ if episodes:
271
+ lines.append(
272
+ f" Episodes: {episodes} | total={outcome['total_reward']:.2f} | "
273
+ f"avg={outcome['avg_reward']:.2f} | min/max={outcome['min_reward']:.2f}/{outcome['max_reward']:.2f}"
274
+ )
275
+ else:
276
+ lines.append(" (no outcome rewards recorded)")
277
+
278
+ lines.append("")
279
+ lines.append("Event reward breakdown (event_rewards table):")
280
+ if breakdown:
281
+ header = f"{'Reward type':20} {'Events':>8} {'Total value':>14} {'Avg value':>12}"
282
+ lines.append(header)
283
+ lines.append("-" * len(header))
284
+ for row in breakdown:
285
+ lines.append(
286
+ f"{row['reward_type'][:20]:20} "
287
+ f"{row['events']:8d} "
288
+ f"{row['total_value']:14.3f} "
289
+ f"{row['avg_value']:12.3f}"
290
+ )
291
+ else:
292
+ lines.append(" (no event rewards recorded)")
293
+
294
+ return "\n".join(lines)
295
+
296
+
297
+ def compute_model_achievement_stats(
298
+ conn: sqlite3.Connection, session_unique_sets: dict[str, Set[str]]
299
+ ) -> dict[str, dict[str, Any]]:
300
+ """Aggregate unique-achievement stats per model."""
301
+
302
+ rows = conn.execute(
303
+ """
304
+ SELECT session_id, model_name, provider, COUNT(*) AS calls
305
+ FROM events
306
+ WHERE event_type = 'cais' AND model_name IS NOT NULL
307
+ GROUP BY session_id, model_name, provider
308
+ """
309
+ ).fetchall()
310
+
311
+ session_models: dict[str, tuple[str, str, int]] = {}
312
+ for row in rows:
313
+ session_id = row["session_id"]
314
+ calls = int(row["calls"] or 0)
315
+ current = session_models.get(session_id)
316
+ if current is None or calls > current[2]:
317
+ session_models[session_id] = (row["model_name"], row["provider"], calls)
318
+
319
+ model_stats: dict[str, dict[str, Any]] = {}
320
+ for session_id, (model_name, provider, _calls) in session_models.items():
321
+ achievements = session_unique_sets.get(session_id, set())
322
+ unique_count = len(achievements)
323
+
324
+ stats = model_stats.setdefault(
325
+ model_name,
326
+ {
327
+ "providers": set(),
328
+ "sessions": 0,
329
+ "sessions_with_unique": 0,
330
+ "total_unique": 0,
331
+ "max_unique": 0,
332
+ "achievement_counts": Counter(),
333
+ },
334
+ )
335
+
336
+ stats["providers"].add(provider or "unknown")
337
+ stats["sessions"] += 1
338
+ stats["total_unique"] += unique_count
339
+ stats["max_unique"] = max(stats["max_unique"], unique_count)
340
+ if unique_count > 0:
341
+ stats["sessions_with_unique"] += 1
342
+ stats["achievement_counts"].update(achievements)
343
+
344
+ return model_stats
345
+
346
+
347
+ def format_model_achievement_stats(model_stats: dict[str, dict[str, Any]]) -> str:
348
+ if not model_stats:
349
+ return "Achievement stats by model:\n (no model sessions recorded)"
350
+
351
+ lines = ["Achievement stats by model:"]
352
+ for model_name in sorted(model_stats.keys(), key=lambda m: model_stats[m]["sessions"], reverse=True):
353
+ stats = model_stats[model_name]
354
+ providers = ", ".join(sorted(stats["providers"])) if stats["providers"] else "-"
355
+ sessions = stats["sessions"]
356
+ total_unique = stats["total_unique"]
357
+ avg_unique = total_unique / sessions if sessions else 0.0
358
+ sessions_with_unique = stats["sessions_with_unique"]
359
+ max_unique = stats["max_unique"]
360
+ lines.append(
361
+ f" {model_name} (providers: {providers})\n"
362
+ f" sessions={sessions} with_unique={sessions_with_unique} "
363
+ f"avg_unique={avg_unique:.2f} max_unique={max_unique}"
364
+ )
365
+
366
+ achievement_counts = stats["achievement_counts"]
367
+ if achievement_counts:
368
+ lines.append(" achievements:")
369
+ for name, count in sorted(
370
+ achievement_counts.items(), key=lambda item: item[1], reverse=True
371
+ ):
372
+ lines.append(f" {name}: {count}")
373
+ else:
374
+ lines.append(" achievements: none")
375
+
376
+ return "\n".join(lines)
377
+
378
+
379
+ def main() -> None:
380
+ parser = argparse.ArgumentParser(description=__doc__)
381
+ parser.add_argument(
382
+ "--db",
383
+ type=Path,
384
+ default=Path("traces/v3/synth_ai.db"),
385
+ help="Path to the tracing_v3 SQLite database",
386
+ )
387
+ args = parser.parse_args()
388
+
389
+ if not args.db.exists():
390
+ print(f"Database not found: {args.db}", file=sys.stderr)
391
+ raise SystemExit(1)
392
+
393
+ conn = connect(args.db)
394
+ try:
395
+ model_stats = fetch_model_usage(conn)
396
+ print(format_model_stats(model_stats))
397
+ print("")
398
+
399
+ (
400
+ _achievements_map,
401
+ _unique_counts_per_session,
402
+ name_counts,
403
+ size_counts,
404
+ session_unique_sets,
405
+ _session_final_achievements,
406
+ ) = fetch_achievement_data(conn)
407
+ outcome_summary, reward_breakdown = fetch_reward_summary(conn)
408
+
409
+ print(format_reward_summary(outcome_summary, reward_breakdown))
410
+ print("")
411
+ print(format_achievement_summary(name_counts, size_counts))
412
+ print("")
413
+ model_achievement_stats = compute_model_achievement_stats(conn, session_unique_sets)
414
+ print(format_model_achievement_stats(model_achievement_stats))
415
+ finally:
416
+ conn.close()
417
+
418
+
419
+ if __name__ == "__main__":
420
+ main()
@@ -0,0 +1,48 @@
1
+ # Crafter Full Finetune (FFT) example on H100
2
+ # Adjust paths and hyperparameters to your environment before running.
3
+
4
+ [job]
5
+ model = "Qwen/Qwen3-4B" # base model to finetune
6
+ # Path to your SFT JSONL dataset
7
+ # You can point this to an absolute path or keep relative to this TOML
8
+ # data = "../data/crafter_sft.jsonl"
9
+
10
+ # Optional: how long to poll the job (seconds)
11
+ poll_seconds = 1800
12
+
13
+ [compute]
14
+ # Cluster shape
15
+ gpu_type = "H100"
16
+ gpu_count = 4
17
+ nodes = 1
18
+
19
+ [data.topology]
20
+ # world_size / container count (optional; inferred when omitted)
21
+ container_count = 4
22
+
23
+ [training]
24
+ mode = "full_finetune" # for documentation; backend decides based on metadata
25
+ use_qlora = false
26
+
27
+ [hyperparameters]
28
+ # epochs
29
+ n_epochs = 2
30
+
31
+ # global batch shape (examples; adjust to your budget)
32
+ world_size = 4
33
+ sequence_length = 2048
34
+ # provide either global_batch OR (per_device_batch×grad_accum×world_size)
35
+ # global_batch = 512
36
+ per_device_batch = 2
37
+ gradient_accumulation_steps = 64
38
+
39
+ # optimizer/schedule
40
+ learning_rate = 8e-6
41
+ warmup_ratio = 0.03
42
+
43
+ [hyperparameters.parallelism]
44
+ use_deepspeed = true
45
+ deepspeed_stage = 3
46
+ fsdp = false
47
+ bf16 = true
48
+ fp16 = false
@@ -0,0 +1,54 @@
1
+ # FFT job config for Qwen/Qwen3-4B on Crafter SFT dataset
2
+
3
+ [algorithm]
4
+ type = "offline"
5
+ method = "supervised_finetune"
6
+ variety = "fft"
7
+
8
+
9
+ [job]
10
+ model = "Qwen/Qwen3-4B"
11
+ # Limit training to the first 100 conversations (export a 100-row JSONL and point to it here)
12
+ # data = "../ft_data/qwen3_32b_ach_ge3_raw_filtered.head100.jsonl"
13
+
14
+ [compute]
15
+ # Adjust as needed for your quota
16
+ gpu_type = "H100"
17
+ gpu_count = 1
18
+ nodes = 1
19
+
20
+ [data]
21
+ # Optional topology metadata (left empty for now)
22
+ topology = {}
23
+
24
+ # Optional local validation dataset path (JSONL). If set, the client will upload
25
+ # this file and wire up validation so the frontend can display val.loss.
26
+ validation_path = "../ft_data/qwen3_32b_ach_ge3_raw_filtered.tokens_1000000_seed_123.val_2000.jsonl"
27
+
28
+ [training]
29
+ mode = "sft_offline"
30
+ use_qlora = false
31
+
32
+ # Validation settings to emit val.loss on the frontend
33
+ [training.validation]
34
+ enabled = true
35
+ evaluation_strategy = "steps"
36
+ eval_steps = 20
37
+ save_best_model_at_end = true
38
+ metric_for_best_model = "val.loss"
39
+ greater_is_better = false
40
+
41
+ [hyperparameters]
42
+ # Minimal safe defaults; backend can override
43
+ n_epochs = 1
44
+ batch_size = 1
45
+ gradient_accumulation_steps = 64
46
+ sequence_length = 4096
47
+ learning_rate = 5e-6
48
+ warmup_ratio = 0.03
49
+ train_kind = "fft"
50
+
51
+ # Optional parallelism block example
52
+ #[hyperparameters.parallelism]
53
+ # tensor_parallel_size = 1
54
+ # pipeline_parallel_size = 1
@@ -0,0 +1,20 @@
1
+ # Eval config for finetuned Qwen/Qwen3-4B (FFT) via task app rollout
2
+
3
+ # Required
4
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
5
+ # Replace with your finished job id if different
6
+ model = "fft:Qwen/Qwen3-4B:job_a53914f51dc146b5"
7
+ policy_name = "crafter"
8
+ # Backend inference proxy base (no /v1); server will append /v1/chat/completions
9
+ inference_url = "https://synth-backend-dev-docker.onrender.com/api/inference"
10
+
11
+ # Optional
12
+ num_episodes = 10
13
+ max_turns = 10
14
+ concurrency = 10
15
+ # difficulty = "easy"
16
+
17
+ # Thinking params (optional; align with your service expectations)
18
+ thinking_mode = "think"
19
+ thinking_budget = 1024
20
+ max_tokens = 1024
@@ -0,0 +1,13 @@
1
+ # Eval config for Groq Qwen3-32B
2
+ # Fields mirror run_eval.py expectations
3
+
4
+ # Required
5
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
6
+ model = "qwen/qwen3-32b"
7
+ inference_url = "https://api.groq.com/openai"
8
+
9
+ # Optional
10
+ num_episodes = 20
11
+ max_turns = 10
12
+ concurrency = 10
13
+ # difficulty = "easy"
@@ -0,0 +1,23 @@
1
+ # Eval config for Synth Modal inference Qwen/Qwen3-4B via task app rollout
2
+
3
+ # Required
4
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
5
+ model = "Qwen/Qwen3-4B"
6
+ policy_name = "crafter"
7
+ inference_url = "https://synth-backend-dev-docker.onrender.com/api/inference" # Modal inference base (no /v1)
8
+ max_tokens = 1024
9
+ thinking_mode = "think"
10
+ thinking_budget = 1024
11
+
12
+ # Optional
13
+ num_episodes = 20
14
+ max_turns = 10
15
+ concurrency = 10
16
+ # difficulty = "easy"
17
+
18
+ # Notes:
19
+ # - run_eval.py --use-rollout will detect provider=vllm for this model and
20
+ # fetch the vLLM base from the task app /info to use as inference_url.
21
+ # - Ensure the task app mounts the openai-api-key secret if your vLLM gateway
22
+ # requires a bearer token (OPENAI_API_KEY). Otherwise it will call unauthenticated.
23
+
@@ -0,0 +1,73 @@
1
+ # RL training starting from base Qwen/Qwen3-4B (TOML-only model selection)
2
+
3
+ [algorithm]
4
+ type = "online"
5
+ method = "policy_gradient"
6
+ variety = "gspo"
7
+
8
+
9
+ [services]
10
+ task_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
11
+
12
+ [compute]
13
+ # Cluster shape for RL pipeline
14
+ gpu_type = "H100"
15
+ gpu_count = 8
16
+
17
+ [topology]
18
+ # Split GPUs across vLLM, training, and reference
19
+ # Must sum to compute.gpu_count
20
+ type = "single_node_split"
21
+ gpus_for_vllm = 4
22
+ gpus_for_training = 3
23
+ gpus_for_ref = 1
24
+ tensor_parallel = 4
25
+
26
+ [vllm]
27
+ # Serving tensor parallel size
28
+ tensor_parallel_size = 4
29
+ max_model_len = 8192
30
+
31
+ [reference]
32
+ # Required by trainer/runtime; ensures dedicated/scoped scoring server config exists
33
+ placement = "dedicated"
34
+ port = 8002
35
+ tp = 1
36
+ health_max_wait_s = 180
37
+ health_interval_ms = 300
38
+
39
+ [model]
40
+ # Base model start
41
+ base = "Qwen/Qwen3-4B"
42
+ label = "crafter-rl-from-base"
43
+
44
+ [rollout]
45
+ max_turns = 10
46
+ episodes_per_batch = 64
47
+ policy_name = "crafter"
48
+
49
+ [evaluation]
50
+ # Run baseline evaluation over the first 100 seeds every 20 training iterations
51
+ instances = 10
52
+ every_n_iters = 10
53
+ seeds = [
54
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
55
+ ]
56
+
57
+ [training]
58
+ log_interval = 1
59
+ weight_sync_interval = 1
60
+ # Additional RL hyperparameters can go here
61
+
62
+ # Stepwise rewards (Crafter decision-level)
63
+ step_rewards_enabled = true
64
+ step_rewards_mode = "decision_stepwise" # "off" | "decision_stepwise" | "env_sparse"
65
+ step_rewards_beta = 0.0
66
+ step_rewards_indicator_lambda = 1.0
67
+ # Optional selector for decision scalar: "unique" | "absolute" (default unique)
68
+ event_rewards_kind = "unique"
69
+
70
+ [training.weight_sync]
71
+ enable = true
72
+ targets = ["policy"]
73
+ weight_sync_interval = 1