synth-ai 0.2.9.dev2__py3-none-any.whl → 0.2.9.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (112) hide show
  1. examples/analyze_semantic_words.sh +17 -0
  2. examples/common_old/backend.py +21 -0
  3. examples/crafter_debug_render.py +180 -0
  4. examples/evals_old/README.md +98 -0
  5. examples/evals_old/__init__.py +6 -0
  6. examples/evals_old/compare_models.py +1037 -0
  7. examples/evals_old/example_log.md +145 -0
  8. examples/evals_old/run_demo.sh +126 -0
  9. examples/evals_old/trace_analysis.py +270 -0
  10. examples/finetuning_old/_backup_synth_qwen/config.toml +29 -0
  11. examples/finetuning_old/_backup_synth_qwen/example_log.md +324 -0
  12. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +60 -0
  13. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +239 -0
  14. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +109 -0
  15. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +1924 -0
  16. examples/finetuning_old/_backup_synth_qwen/readme.md +49 -0
  17. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +114 -0
  18. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +195 -0
  19. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +118 -0
  20. examples/finetuning_old/synth_qwen_v1/README.md +68 -0
  21. examples/finetuning_old/synth_qwen_v1/filter_traces.py +60 -0
  22. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +239 -0
  23. examples/finetuning_old/synth_qwen_v1/finetune.py +46 -0
  24. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +71 -0
  25. examples/finetuning_old/synth_qwen_v1/infer.py +37 -0
  26. examples/finetuning_old/synth_qwen_v1/poll.py +44 -0
  27. examples/finetuning_old/synth_qwen_v1/prepare_data.py +35 -0
  28. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +109 -0
  29. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +1932 -0
  30. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +207 -0
  31. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +232 -0
  32. examples/finetuning_old/synth_qwen_v1/upload_data.py +34 -0
  33. examples/finetuning_old/synth_qwen_v1/util.py +147 -0
  34. examples/rl/README.md +169 -0
  35. examples/rl/configs/eval_base_qwen.toml +15 -0
  36. examples/rl/configs/eval_rl_qwen.toml +11 -0
  37. examples/rl/configs/rl_from_base_qwen.toml +35 -0
  38. examples/rl/configs/rl_from_base_qwen17.toml +74 -0
  39. examples/rl/configs/rl_from_ft_qwen.toml +35 -0
  40. examples/rl/download_dataset.py +64 -0
  41. examples/rl/run_eval.py +435 -0
  42. examples/rl/run_rl_and_save.py +94 -0
  43. examples/rl/task_app/README.md +22 -0
  44. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +8 -8
  45. examples/rl/task_app/math_task_app.py +107 -0
  46. examples/rl_old/task_app.py +962 -0
  47. examples/run_crafter_demo.sh +10 -0
  48. examples/warming_up_to_rl/analyze_trace_db.py +420 -0
  49. examples/warming_up_to_rl/configs/crafter_fft.toml +48 -0
  50. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  51. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +20 -0
  52. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +13 -0
  53. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +23 -0
  54. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +73 -0
  55. examples/warming_up_to_rl/configs/rl_from_ft.toml +56 -0
  56. examples/warming_up_to_rl/export_trace_sft.py +541 -0
  57. examples/warming_up_to_rl/groq_test.py +88 -0
  58. examples/warming_up_to_rl/manage_secrets.py +127 -0
  59. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  60. examples/warming_up_to_rl/old/notes.md +73 -0
  61. examples/warming_up_to_rl/readme.md +172 -0
  62. examples/warming_up_to_rl/run_eval.py +434 -0
  63. examples/warming_up_to_rl/run_fft_and_save.py +309 -0
  64. examples/warming_up_to_rl/run_local_rollout.py +188 -0
  65. examples/warming_up_to_rl/run_local_rollout_modal.py +160 -0
  66. examples/warming_up_to_rl/run_local_rollout_parallel.py +342 -0
  67. examples/warming_up_to_rl/run_local_rollout_traced.py +372 -0
  68. examples/warming_up_to_rl/run_rl_and_save.py +101 -0
  69. examples/warming_up_to_rl/run_rollout_remote.py +129 -0
  70. examples/warming_up_to_rl/task_app/README.md +38 -0
  71. {synth_ai/task/apps → examples/warming_up_to_rl/task_app}/grpo_crafter.py +7 -7
  72. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +165 -0
  73. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  74. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  75. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +145 -0
  76. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1271 -0
  77. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  78. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +429 -0
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +442 -0
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +96 -0
  83. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +302 -0
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +202 -0
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +512 -0
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +102 -0
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +985 -0
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +197 -0
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1749 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +217 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +160 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +146 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +58 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +61 -0
  98. synth_ai/api/train/config_finder.py +18 -18
  99. synth_ai/api/train/env_resolver.py +28 -1
  100. synth_ai/cli/task_apps.py +264 -55
  101. synth_ai/demo_registry.py +7 -7
  102. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  103. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +54 -0
  104. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  105. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +165 -0
  106. synth_ai/task/apps/__init__.py +54 -13
  107. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/METADATA +1 -1
  108. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/RECORD +112 -13
  109. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/top_level.txt +1 -0
  110. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/WHEEL +0 -0
  111. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/entry_points.txt +0 -0
  112. {synth_ai-0.2.9.dev2.dist-info → synth_ai-0.2.9.dev4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,49 @@
1
+ ### Quickstart (Crafter) with config.toml
2
+
3
+ All defaults are in `examples/finetuning/synth_qwen/config.toml`. Place your API keys in `.env`.
4
+
5
+ 1) Generate traces (v3 tracing)
6
+ ```bash
7
+ set -a; source .env 2>/dev/null || true; set +a
8
+ uvpm examples.finetuning.synth_qwen.run_crafter_qwen4b
9
+ ```
10
+
11
+ 2) Filter traces → SFT JSONL
12
+ ```bash
13
+ uvpm examples.finetuning.synth_qwen.filter_traces_achievements
14
+ ```
15
+
16
+ 3) Finetune via learning service (SFT)
17
+ ```bash
18
+ set -a; source .env 2>/dev/null || true; set +a
19
+ uvpm examples.finetuning.synth_qwen.sft_kickoff
20
+ ```
21
+
22
+ 4) Evaluate the fine-tuned model in Crafter
23
+ ```bash
24
+ set -a; source .env 2>/dev/null || true; set +a
25
+ CRAFTER_MODEL="ft:...your-returned-id..." uvpm examples.finetuning.synth_qwen.run_crafter_qwen4b
26
+ ```
27
+
28
+ Notes:
29
+ - If you see a 401, ensure your `.env` contains a valid production `SYNTH_API_KEY` or export it inline.
30
+ - Traces are stored in `traces/v3/synth_ai.db` (sqld); the filter derives the correct internal data file.
31
+
32
+
33
+ ### Interactive Demo
34
+
35
+ Use the interactive script to walk through rollouts → filtering → SFT → optional rollout of the fine-tuned model.
36
+
37
+ ```bash
38
+ examples/finetuning/synth_qwen/run_demo.sh
39
+ ```
40
+
41
+ What it does:
42
+ - Prompts for rollout settings (model, episodes, max steps, difficulty, think).
43
+ - Prompts for filter settings (required achievements, model restriction, min reward, max cost/tokens, output path).
44
+ - Starts the SFT job and captures the returned fine-tuned model id.
45
+ - Asks you to confirm before rolling out the fine-tuned model.
46
+ - API key handling:
47
+ - If a `SYNTH_API_KEY` is detected, you’re asked to confirm using it.
48
+ - If not set, you can choose `SYNTH_API_KEY_PROD` (if present) or securely enter a key.
49
+ - `OPENAI_API_KEY` is set to the same value if missing to prevent 401s.
@@ -0,0 +1,114 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Run Crafter-Classic evaluation (v3 tracing) on Synth’s Qwen 4B model.
4
+
5
+ This forwards flags into the canonical `test_crafter_react_agent_lm_synth.py`
6
+ runner that already handles v3 tracing, warm-up, and reporting.
7
+
8
+ Environment variables:
9
+ - CRAFTER_MODEL (default: Qwen/Qwen3-4B-Instruct-2507)
10
+ - CRAFTER_EPISODES (default: 10)
11
+ - CRAFTER_MAX_STEPS (default: 30)
12
+ - CRAFTER_DIFFICULTY (default: easy)
13
+ - CRAFTER_THINK (default: 0 -> use --no-think)
14
+
15
+ It also sets a few runner-specific env flags to enforce short outputs and a single tool call.
16
+ """
17
+
18
+ import asyncio
19
+ import os
20
+ import sys
21
+ import tomllib
22
+
23
+ # from synth_ai.environments.examples.crafter_classic.agent_demos.crafter_modal_ft import (
24
+ # test_crafter_react_agent_lm_synth as runner,
25
+ # )
26
+ from examples.finetuning.synth_qwen import (
27
+ react_agent_lm as runner,
28
+ )
29
+ from synth_ai.config.base_url import (
30
+ PROD_BASE_URL_DEFAULT,
31
+ get_learning_v2_base_url,
32
+ )
33
+
34
+ # Force prod by default for this runner unless explicitly overridden
35
+ _force_prod = os.getenv("CRAFTER_FORCE_PROD", "1").lower() in ("1", "true", "yes", "on")
36
+ if _force_prod:
37
+ # Sanitize implicit local/dev overrides
38
+ os.environ.pop("SYNTH_LOCAL_BASE_URL", None)
39
+ os.environ.pop("SYNTH_DEV_BASE_URL", None)
40
+ # If caller hasn't explicitly set LEARNING_V2_BASE_URL, lock to prod default
41
+ if "LEARNING_V2_BASE_URL" not in os.environ:
42
+ os.environ["LEARNING_V2_BASE_URL"] = PROD_BASE_URL_DEFAULT
43
+
44
+ # Resolve base URL from shared config (honors LEARNING_V2_BASE_URL and sanitized overrides)
45
+ os.environ["SYNTH_BASE_URL"] = get_learning_v2_base_url()
46
+
47
+ print(f"🔧 Using Synth base URL = {os.environ.get('SYNTH_BASE_URL')}")
48
+
49
+ cfg_path = os.getenv("CRAFTER_CONFIG", "examples/finetuning/synth_qwen/config.toml")
50
+ cfg = {}
51
+ if os.path.exists(cfg_path):
52
+ with open(cfg_path, "rb") as f:
53
+ cfg = tomllib.load(f)
54
+ else:
55
+ cfg = {"rollouts": {}}
56
+ rcfg = cfg.get("rollouts", {})
57
+
58
+ MODEL_ID = os.getenv("CRAFTER_MODEL", rcfg.get("model", "Qwen/Qwen3-4B-Instruct-2507"))
59
+ EPISODES = os.getenv("CRAFTER_EPISODES", str(rcfg.get("episodes", 10)))
60
+ MAX_STEPS = os.getenv("CRAFTER_MAX_STEPS", str(rcfg.get("max_steps", 30)))
61
+ DIFFICULTY = os.getenv("CRAFTER_DIFFICULTY", rcfg.get("difficulty", "easy"))
62
+
63
+
64
+ async def main() -> None:
65
+ think_env = os.getenv("CRAFTER_THINK", "0").lower()
66
+ enable_think = think_env in ("1", "true", "yes", "on")
67
+ think_flag = "--think" if enable_think else "--no-think"
68
+
69
+ # Tighten prompts and enforce tool calling like the tests do
70
+ os.environ["CRAFTER_STOP_AFTER_TOOL_CALLS"] = "1"
71
+ os.environ["SYNTH_OPENAI_DEBUG"] = "0"
72
+ os.environ["CRAFTER_MAX_TOKENS"] = os.environ.get(
73
+ "CRAFTER_MAX_TOKENS", str(rcfg.get("max_tokens", 2048))
74
+ )
75
+ os.environ["CRAFTER_TOOL_CHOICE"] = os.environ.get(
76
+ "CRAFTER_TOOL_CHOICE", rcfg.get("tool_choice", "required")
77
+ )
78
+ os.environ["CRAFTER_TEMPERATURE"] = os.environ.get(
79
+ "CRAFTER_TEMPERATURE", str(rcfg.get("temperature", 0.4))
80
+ )
81
+
82
+ # Default v3 traces path from config if not already set
83
+ tcfg = cfg.get("traces", {})
84
+ if "SQLD_DB_PATH" not in os.environ and tcfg.get("sqld_db_path"):
85
+ os.environ["SQLD_DB_PATH"] = tcfg["sqld_db_path"]
86
+ os.environ["CRAFTER_SYSTEM_PROMPT"] = (
87
+ "You are CrafterAgent playing the Crafter survival environment. Your goal is to stay alive and unlock as many achievements as possible. "
88
+ "Keep your reasoning very brief and focus on the tool call. Use the tool available to you to play Crafter"
89
+ "ALWAYS provide 2-5 actions. Available actions: move_left, move_right, move_up, move_down, do, sleep, place_stone, place_table, place_furnace, place_plant, "
90
+ "make_wood_pickaxe, make_stone_pickaxe, make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword, noop."
91
+ )
92
+ os.environ["CRAFTER_SUPPRESS_OBS_REMINDER"] = "1"
93
+ # Ensure we log full LM inputs and tools
94
+ os.environ["CRAFTER_LOG_FULL_INPUTS"] = os.environ.get("CRAFTER_LOG_FULL_INPUTS", "1")
95
+
96
+ sys.argv = [
97
+ "crafter_runner",
98
+ "--model",
99
+ MODEL_ID,
100
+ "--episodes",
101
+ str(EPISODES),
102
+ "--max-steps",
103
+ str(MAX_STEPS),
104
+ "--difficulty",
105
+ DIFFICULTY,
106
+ think_flag,
107
+ "--quiet",
108
+ ]
109
+
110
+ await runner.main()
111
+
112
+
113
+ if __name__ == "__main__":
114
+ asyncio.run(main())
@@ -0,0 +1,195 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # Interactive demo for Qwen 4B Crafter finetuning
4
+ # Mirrors the flow in readme.md and example_log.md
5
+
6
+ set -euo pipefail
7
+
8
+ # Locate repo root and cd there
9
+ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
10
+ cd "$SCRIPT_DIR/../../.."
11
+
12
+ echo "Synth Qwen4B finetuning demo (Crafter)"
13
+
14
+ # Load env (prefer example-local .env, then repo .env)
15
+ set +u
16
+ set -a
17
+ if [ -f "$SCRIPT_DIR/.env" ]; then source "$SCRIPT_DIR/.env"; fi
18
+ if [ -f ".env" ]; then source ".env"; fi
19
+ set +a
20
+ set -u
21
+
22
+ # Helper: prompt with default
23
+ prompt() {
24
+ local msg="$1"; shift
25
+ local default="$1"; shift
26
+ local var
27
+ read -r -p "$msg" var || true
28
+ if [ -z "$var" ]; then
29
+ echo "$default"
30
+ else
31
+ echo "$var"
32
+ fi
33
+ }
34
+
35
+ # Ensure API key present (and set OPENAI_API_KEY fallback)
36
+ ensure_api_key() {
37
+ local current_key="${SYNTH_API_KEY:-}"
38
+ if [ -n "$current_key" ]; then
39
+ local preview="${current_key:0:6}...${current_key: -4}"
40
+ read -r -p "Detected SYNTH_API_KEY ($preview). Use this key? [Y/n]: " USE_CUR || true
41
+ USE_CUR=${USE_CUR:-Y}
42
+ if [[ ! "$USE_CUR" =~ ^[Yy]$ ]]; then
43
+ current_key=""
44
+ fi
45
+ fi
46
+
47
+ if [ -z "$current_key" ]; then
48
+ if [ -n "${SYNTH_API_KEY_PROD:-}" ]; then
49
+ local prod_prev="${SYNTH_API_KEY_PROD:0:6}...${SYNTH_API_KEY_PROD: -4}"
50
+ read -r -p "Use SYNTH_API_KEY_PROD ($prod_prev)? [y/N]: " USE_PROD || true
51
+ if [[ "$USE_PROD" =~ ^[Yy]$ ]]; then
52
+ current_key="$SYNTH_API_KEY_PROD"
53
+ fi
54
+ fi
55
+ fi
56
+
57
+ while [ -z "$current_key" ]; do
58
+ echo
59
+ read -s -p "Enter your SYNTH_API_KEY: " KEY_IN || true
60
+ echo
61
+ if [ -n "$KEY_IN" ]; then
62
+ current_key="$KEY_IN"
63
+ else
64
+ echo "A valid SYNTH_API_KEY is required to continue."
65
+ fi
66
+ done
67
+
68
+ export SYNTH_API_KEY="$current_key"
69
+ if [ -z "${OPENAI_API_KEY:-}" ]; then
70
+ export OPENAI_API_KEY="$SYNTH_API_KEY"
71
+ echo "OPENAI_API_KEY set from SYNTH_API_KEY."
72
+ fi
73
+ }
74
+
75
+ # Step 1: Rollouts to generate v3 traces
76
+ echo
77
+ read -r -p "Run rollouts to generate v3 traces now? [Y/n]: " RUN_ROLLOUTS || true
78
+ RUN_ROLLOUTS=${RUN_ROLLOUTS:-Y}
79
+ if [[ "$RUN_ROLLOUTS" =~ ^[Yy]$ || -z "$RUN_ROLLOUTS" ]]; then
80
+ echo "Using config defaults from examples/finetuning/synth_qwen/config.toml (override below if desired)."
81
+ # Allow quick overrides via envs
82
+ MODEL_INPUT=$(prompt "Model id [Enter=use config]: " "")
83
+ EPISODES_INPUT=$(prompt "Episodes [Enter=use config]: " "")
84
+ MAX_STEPS_INPUT=$(prompt "Max steps [Enter=use config]: " "")
85
+ DIFFICULTY_INPUT=$(prompt "Difficulty [Enter=use config]: " "")
86
+ THINK_INPUT=$(prompt "Enable think mode? (1/0) [Enter=0]: " "0")
87
+
88
+ if [ -n "$MODEL_INPUT" ]; then export CRAFTER_MODEL="$MODEL_INPUT"; fi
89
+ if [ -n "$EPISODES_INPUT" ]; then export CRAFTER_EPISODES="$EPISODES_INPUT"; fi
90
+ if [ -n "$MAX_STEPS_INPUT" ]; then export CRAFTER_MAX_STEPS="$MAX_STEPS_INPUT"; fi
91
+ if [ -n "$DIFFICULTY_INPUT" ]; then export CRAFTER_DIFFICULTY="$DIFFICULTY_INPUT"; fi
92
+ export CRAFTER_THINK="${THINK_INPUT:-0}"
93
+
94
+ echo
95
+ echo "Running rollouts (v3 tracing)..."
96
+ ensure_api_key
97
+ uv run python -m examples.finetuning.synth_qwen.run_crafter_qwen4b
98
+ else
99
+ echo "Skipping rollouts."
100
+ fi
101
+
102
+ # Step 2: Filter traces -> SFT JSONL
103
+ echo
104
+ read -r -p "Filter v3 traces into SFT JSONL now? [Y/n]: " RUN_FILTER || true
105
+ RUN_FILTER=${RUN_FILTER:-Y}
106
+ if [[ "$RUN_FILTER" =~ ^[Yy]$ || -z "$RUN_FILTER" ]]; then
107
+ # Ensure DB path is correctly set for v3 traces (force set to repo-local path)
108
+ DB_PATH_DEFAULT="$PWD/traces/v3/synth_ai.db/dbs/default/data"
109
+ export CRAFTER_DB_URL="sqlite+aiosqlite:///$DB_PATH_DEFAULT"
110
+ echo "Using DB: $CRAFTER_DB_URL"
111
+ mkdir -p ft_data
112
+ echo "You can override filter options; Enter to use config defaults."
113
+ ACH_INPUT=$(prompt "Required achievements (space-separated) [Enter=config]: " "")
114
+ MODELS_INPUT=$(prompt "Restrict to models (space-separated) [Enter=all]: " "")
115
+ OUT_PATH_INPUT=$(prompt "Output JSONL path [Enter=config]: " "")
116
+ MIN_REWARD_INPUT=$(prompt "Min total reward [Enter=config]: " "")
117
+ MAX_COST_INPUT=$(prompt "Max total cost [Enter=config]: " "")
118
+ MAX_TOKENS_INPUT=$(prompt "Max total tokens [Enter=config]: " "")
119
+
120
+ if [ -n "$ACH_INPUT" ]; then export REQUIRED_ACHIEVEMENTS="$ACH_INPUT"; fi
121
+ if [ -n "$MODELS_INPUT" ]; then export MODELS="$MODELS_INPUT"; fi
122
+ if [ -n "$OUT_PATH_INPUT" ]; then export OUTPUT_JSONL="$OUT_PATH_INPUT"; fi
123
+ if [ -n "$MIN_REWARD_INPUT" ]; then export MIN_TOTAL_REWARD="$MIN_REWARD_INPUT"; fi
124
+ if [ -n "$MAX_COST_INPUT" ]; then export MAX_COST="$MAX_COST_INPUT"; fi
125
+ if [ -n "$MAX_TOKENS_INPUT" ]; then export MAX_TOKENS="$MAX_TOKENS_INPUT"; fi
126
+
127
+ echo
128
+ echo "Filtering traces to SFT JSONL..."
129
+ uv run python -m examples.finetuning.synth_qwen.filter_traces_achievements
130
+ else
131
+ echo "Skipping filter."
132
+ fi
133
+
134
+ # Step 3: Kick off SFT (learning service)
135
+ echo
136
+ read -r -p "Kick off SFT training job now? [Y/n]: " RUN_SFT || true
137
+ RUN_SFT=${RUN_SFT:-Y}
138
+ FT_MODEL_ID=""
139
+ if [[ "$RUN_SFT" =~ ^[Yy]$ || -z "$RUN_SFT" ]]; then
140
+ echo "Enter overrides for training job; Enter to use config."
141
+ BASE_MODEL_INPUT=$(prompt "Base model [Enter=config]: " "")
142
+ TRAIN_JSONL_INPUT=$(prompt "Training JSONL path [Enter=config]: " "")
143
+
144
+ if [ -n "$BASE_MODEL_INPUT" ]; then export QWEN_BASE_MODEL="$BASE_MODEL_INPUT"; fi
145
+ if [ -n "$TRAIN_JSONL_INPUT" ]; then export QWEN_TRAINING_JSONL="$TRAIN_JSONL_INPUT"; fi
146
+
147
+ echo
148
+ echo "Starting SFT job..."
149
+ ensure_api_key
150
+ # Stream logs to terminal and save to file for parsing
151
+ mkdir -p logs
152
+ TS=$(date +%Y%m%d_%H%M%S)
153
+ SFT_LOG_FILE="logs/sft_kickoff_${TS}.log"
154
+ # Force unbuffered stdout so polling status prints live through the pipe
155
+ PYTHONUNBUFFERED=1 uv run python -u -m examples.finetuning.synth_qwen.sft_kickoff | tee "$SFT_LOG_FILE"
156
+ # Extract ft model id like ft:Qwen/... (no whitespace or quotes)
157
+ if grep -qE "ft:[^[:space:]\"]+" "$SFT_LOG_FILE"; then
158
+ FT_MODEL_ID=$(grep -Eo "ft:[^[:space:]\"]+" "$SFT_LOG_FILE" | tail -n1)
159
+ echo "Captured fine-tuned model id: $FT_MODEL_ID"
160
+ echo "SFT logs saved to: $SFT_LOG_FILE"
161
+ else
162
+ echo "Warning: could not parse fine-tuned model id from output. Logs: $SFT_LOG_FILE"
163
+ fi
164
+ else
165
+ echo "Skipping SFT kickoff."
166
+ fi
167
+
168
+ # Step 4: Optional rollout with fine-tuned model
169
+ echo
170
+ if [ -n "$FT_MODEL_ID" ]; then
171
+ read -r -p "Roll out fine-tuned model '$FT_MODEL_ID' in Crafter now? [y/N]: " RUN_ROLLOUT_FT || true
172
+ if [[ "$RUN_ROLLOUT_FT" =~ ^[Yy]$ ]]; then
173
+ EPISODES2=$(prompt "Episodes [Enter=config]: " "")
174
+ MAX_STEPS2=$(prompt "Max steps [Enter=config]: " "")
175
+ DIFFICULTY2=$(prompt "Difficulty [Enter=config]: " "")
176
+ THINK2=$(prompt "Enable think mode? (1/0) [Enter=0]: " "0")
177
+
178
+ export CRAFTER_MODEL="$FT_MODEL_ID"
179
+ if [ -n "$EPISODES2" ]; then export CRAFTER_EPISODES="$EPISODES2"; fi
180
+ if [ -n "$MAX_STEPS2" ]; then export CRAFTER_MAX_STEPS="$MAX_STEPS2"; fi
181
+ if [ -n "$DIFFICULTY2" ]; then export CRAFTER_DIFFICULTY="$DIFFICULTY2"; fi
182
+ export CRAFTER_THINK="${THINK2:-0}"
183
+
184
+ echo
185
+ echo "Running rollouts with fine-tuned model..."
186
+ uv run python -m examples.finetuning.synth_qwen.run_crafter_qwen4b
187
+ else
188
+ echo "Skipping rollout of fine-tuned model."
189
+ fi
190
+ else
191
+ echo "No fine-tuned model id available to roll out."
192
+ fi
193
+
194
+ echo
195
+ echo "Done. You can re-run this script to repeat steps as needed."
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Kick off Qwen 4B SFT against the learning-v2 service using the exact
4
+ upload/job/polling flow mirrored from test_qwen3_sft_training_v2.py.
5
+
6
+ Environment:
7
+ - LEARNING_V2_BASE_URL (preferred)
8
+ - SYNTH_BASE_URL (fallback if LEARNING_V2_BASE_URL is unset)
9
+ - else defaults to http://localhost:8000/api
10
+ - SYNTH_API_KEY
11
+ - QWEN_BASE_MODEL (optional, defaults to Qwen/Qwen3-4B-Instruct-2507)
12
+ - QWEN_TRAINING_JSONL (optional, defaults to ft_data/qwen4b_crafter_sft.jsonl)
13
+ """
14
+
15
+ import asyncio
16
+ import os
17
+ import time
18
+ import tomllib
19
+ from typing import Any
20
+
21
+ import aiohttp
22
+ from synth_ai.config.base_url import get_learning_v2_base_url
23
+
24
+ API_URL = get_learning_v2_base_url()
25
+ API_KEY = os.getenv("SYNTH_API_KEY")
26
+
27
+ _cfg_path = os.getenv("CRAFTER_CONFIG", "examples/finetuning/synth_qwen/config.toml")
28
+ _cfg: dict[str, Any] = {}
29
+ if os.path.exists(_cfg_path):
30
+ with open(_cfg_path, "rb") as _f:
31
+ _cfg = tomllib.load(_f)
32
+ scfg = _cfg.get("sft", {})
33
+
34
+ MODEL = os.getenv("QWEN_BASE_MODEL", scfg.get("base_model", "Qwen/Qwen3-4B-Instruct-2507"))
35
+ TRAINING_PATH = os.getenv(
36
+ "QWEN_TRAINING_JSONL", scfg.get("training_jsonl", "ft_data/qwen4b_crafter_sft.jsonl")
37
+ )
38
+
39
+
40
+ async def upload_file() -> str:
41
+ headers = {"Authorization": f"Bearer {API_KEY}"}
42
+ async with aiohttp.ClientSession() as session:
43
+ form = aiohttp.FormData()
44
+ with open(TRAINING_PATH, "rb") as f:
45
+ form.add_field(
46
+ "file",
47
+ f,
48
+ filename=os.path.basename(TRAINING_PATH),
49
+ content_type="application/jsonl",
50
+ )
51
+ form.add_field("purpose", "fine-tune")
52
+ async with session.post(f"{API_URL}/files", data=form, headers=headers) as resp:
53
+ assert resp.status == 200, await resp.text()
54
+ data = await resp.json()
55
+ return data["id"]
56
+
57
+
58
+ async def create_job(file_id: str) -> str:
59
+ body = {
60
+ "training_file": file_id,
61
+ "model": MODEL,
62
+ "hyperparameters": {
63
+ "training_type": "sft",
64
+ "n_epochs": int(scfg.get("n_epochs", 1)),
65
+ "batch_size": int(scfg.get("batch_size", 4)),
66
+ },
67
+ "upload_to_wasabi": bool(scfg.get("upload_to_wasabi", True)),
68
+ }
69
+ headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
70
+ async with aiohttp.ClientSession() as session, session.post(
71
+ f"{API_URL}/fine_tuning/jobs", json=body, headers=headers
72
+ ) as resp:
73
+ assert resp.status == 200, await resp.text()
74
+ data = await resp.json()
75
+ return data["id"]
76
+
77
+
78
+ async def await_success(job_id: str) -> dict[str, object]:
79
+ headers = {"Authorization": f"Bearer {API_KEY}"}
80
+ async with aiohttp.ClientSession() as session:
81
+ check_interval_seconds = 15
82
+ for attempt in range(20):
83
+ async with session.get(f"{API_URL}/fine_tuning/jobs/{job_id}", headers=headers) as resp:
84
+ if resp.status != 200:
85
+ await asyncio.sleep(check_interval_seconds)
86
+ continue
87
+ job = await resp.json()
88
+ status = job.get("status")
89
+ print(f"⏳ poll {attempt + 1}/20 – status = {status}")
90
+ if status == "succeeded":
91
+ return job
92
+ if status in {"failed", "cancelled"}:
93
+ raise RuntimeError(f"Training failed: {job.get('error')}")
94
+ await asyncio.sleep(check_interval_seconds)
95
+ raise TimeoutError("Training did not finish in time")
96
+
97
+
98
+ async def main() -> None:
99
+ if not API_URL or not API_KEY:
100
+ raise RuntimeError(
101
+ "LEARNING_V2_BASE_URL/SYNTH_BASE_URL and SYNTH_API_KEY must be set or use the default http://localhost:8000/api"
102
+ )
103
+ print("🚀 Starting Qwen 4B SFT")
104
+ fid = await upload_file()
105
+ job_id = await create_job(fid)
106
+ start = time.time()
107
+ job = await await_success(job_id)
108
+ wall = time.time() - start
109
+
110
+ ft_model = job["fine_tuned_model"]
111
+ tokens = job.get("trained_tokens")
112
+
113
+ print("🟢 Qwen4B SFT fine-tune succeeded →", ft_model)
114
+ print(f"⏱️ wall-clock: {wall:.1f}s | trained_tokens: {tokens}")
115
+
116
+
117
+ if __name__ == "__main__":
118
+ asyncio.run(main())
@@ -0,0 +1,68 @@
1
+ ## Synth-Qwen v1 Finetuning Demo (Qwen3 0.6B)
2
+
3
+ Prereqs
4
+ - Python 3.11+ and uv installed (`curl -LsSf https://astral.sh/uv/install.sh | sh`)
5
+ - Local Env Service is provided by this repo; no `sqld` required
6
+ - One of the following ways to provide backend creds:
7
+ - Set `MONOREPO_BACKEND` to your monorepo backend path (defaults to `../monorepo/backend`) and ensure it has `.env.dev` with at least:
8
+ - `DEV_BACKEND_URL` (e.g., `http://localhost:8000`)
9
+ - `TESTING_LOCAL_SYNTH_API_KEY` (or `SYNTH_API_KEY`)
10
+ - OR export these directly in your shell before running:
11
+ - `LOCAL_BACKEND_URL` (e.g., `http://localhost:8000/api`)
12
+ - `SYNTH_API_KEY` (local dev key)
13
+ - Optional for prod: `.env` in repo root with
14
+ - `PROD_BACKEND_URL=https://agent-learning.onrender.com`
15
+ - `TESTING_PROD_SYNTH_API_KEY=...`
16
+
17
+ Steps
18
+ ```bash
19
+ # 0) Go to repo root so traces and logs land in the right place
20
+ cd "$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
21
+ # Note: commands below resolve backend URL per-call using examples/common/backend.py
22
+
23
+ # 1) Start local services (sqld + Env Service) in background
24
+ uvx synth-ai serve --no-sqld --env-port 8901
25
+
26
+ # 3) Rollout base Qwen to generate v3 traces (Crafter via Env Service)
27
+ set -a; MONOREPO_BACKEND=${MONOREPO_BACKEND:-../monorepo/backend}; source "$MONOREPO_BACKEND/.env.dev"; set +a; export SYNTH_BASE_URL="$(uv run python -c 'from examples.common.backend import resolve_backend_url;print(resolve_backend_url())')"; export SYNTH_API_KEY="${DEV_SYNTH_API_KEY:-${SYNTH_API_KEY:-${SYNTH_API_KEY_TEST:-sk-local}}}"; uv run python examples/finetuning/synth_qwen/react_agent_lm.py --model "Qwen/Qwen3-0.6B" --episodes 10 --max-steps 10 --quiet --no-daemon
28
+
29
+ # 4) Convert traces → SFT JSONL (writes training.jsonl) [use single-script alternative below]
30
+ printf "[filter]\nrequired_achievements=[]\n" > /tmp/crater_filter.toml && CRAFTER_DB_URL=sqlite+aiosqlite:///$PWD/traces/v3/synth_ai.db CRAFTER_CONFIG=/tmp/crater_filter.toml WINDOW_MODE=1 MIN_TOTAL_REWARD=1 MIN_ACHIEVEMENTS=0 OUTPUT_JSONL=$PWD/examples/finetuning/synth_qwen_v1/data/training_crafter.jsonl uv run python examples/finetuning/synth_qwen/filter_traces_achievements.py
31
+
32
+ # ALT: Single-script E2E run (prepare → upload → create/start → poll → infer)
33
+ set -a; MONOREPO_BACKEND=${MONOREPO_BACKEND:-../monorepo/backend}; source "$MONOREPO_BACKEND/.env.dev"; set +a; SYNTH_BACKEND_URL_OVERRIDE=prod DEV_BACKEND_URL="$(uv run python -c 'from examples.common.backend import resolve_backend_url;print(resolve_backend_url())')" uv run python examples/finetuning/synth_qwen_v1/run_ft_job.py --mode dev
34
+
35
+ # Test model
36
+ set -a; MONOREPO_BACKEND=${MONOREPO_BACKEND:-../monorepo/backend}; source "$MONOREPO_BACKEND/.env.dev"; set +a; MODE=dev DEV_BACKEND_URL="$(uv run python -c 'from examples.common.backend import resolve_backend_url;print(resolve_backend_url())')" uv run python examples/finetuning/synth_qwen_v1/hello_ft_model.py | cat
37
+
38
+ # 8) Rollout agent again using the fine-tuned model from state.json (env service already on 8901, no sqld)
39
+ set -a; MONOREPO_BACKEND=${MONOREPO_BACKEND:-../monorepo/backend}; source "$MONOREPO_BACKEND/.env.dev"; set +a; FT_MODEL=$(uv run python - <<'PY'
40
+ import json, os
41
+ print(json.load(open(os.path.join(os.getcwd(),'examples/finetuning/synth_qwen_v1/state.json')))['fine_tuned_model'])
42
+ PY
43
+ ); SYNTH_BACKEND_URL_OVERRIDE=prod SYNTH_BASE_URL="$(uv run python -c 'from examples.common.backend import resolve_backend_url;print(resolve_backend_url())')" SYNTH_API_KEY=${TESTING_LOCAL_SYNTH_API_KEY:-${SYNTH_API_KEY:-sk-local}} uv run python examples/finetuning/synth_qwen/react_agent_lm.py --model "$FT_MODEL" --episodes 10 --max-steps 10 --quiet --no-daemon --no-traces
44
+ ```
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+
53
+
54
+
55
+
56
+ export LOCAL_BACKEND_URL=http://localhost:8000/api
57
+ export SYNTH_BACKEND_URL_OVERRIDE=local
58
+ uv run python examples/finetuning/synth_qwen_v1/run_ft_job.py --mode local
59
+
60
+ HATCHET_ENV_OVERRIDE=prod python -u -m app.orchestration.hatchet.workflows
61
+
62
+ export LOCAL_BACKEND_URL=http://localhost:8000/api
63
+ export SYNTH_BACKEND_URL_OVERRIDE=dev
64
+ uv run python examples/finetuning/synth_qwen_v1/run_ft_job.py --mode dev
65
+
66
+ export PROD_BACKEND_URL=https://agent-learning.onrender.com/api
67
+ export SYNTH_BACKEND_URL_OVERRIDE=prod
68
+ uv run python examples/finetuning/synth_qwen_v1/run_ft_job.py --mode prod
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Filter v3 Crafter traces into an SFT-ready JSONL using the maintained
4
+ Modal/Synth filter logic (no CLI needed). Intended to be run after
5
+ collecting trajectories with the Crafter runner.
6
+
7
+ Environment:
8
+ - CRAFTER_DB_URL (default: sqlite:///traces_v3_lm_synth/traces.db)
9
+ - OUTPUT_JSONL (default: ft_data/qwen4b_crafter_sft.jsonl)
10
+ - MIN_TOTAL_REWARD (float, default: 1.0)
11
+ - MIN_ACHIEVEMENTS (int, default: 0)
12
+ - MAX_COST (float, default: 10.0)
13
+ - MAX_TOKENS (int, default: 100000)
14
+ - MODELS (optional, space-separated model names; default empty = all)
15
+ """
16
+
17
+ import asyncio
18
+ import json
19
+ import os
20
+ from typing import Any
21
+
22
+ # Reuse the existing filtering implementation
23
+ from synth_ai.environments.examples.crafter_classic.agent_demos.crafter_modal_ft.filter_traces_sft_turso import (
24
+ filter_traces_from_turso,
25
+ )
26
+
27
+
28
+ def build_config() -> dict[str, Any]:
29
+ models_env = os.getenv("MODELS", "").strip()
30
+ models: list[str] = models_env.split() if models_env else []
31
+ return {
32
+ "mode": "trajectory",
33
+ "filters": {
34
+ "min_total_reward": float(os.getenv("MIN_TOTAL_REWARD", "1.0")),
35
+ "min_achievements": int(os.getenv("MIN_ACHIEVEMENTS", "0")),
36
+ "max_cost": float(os.getenv("MAX_COST", "10.0")),
37
+ "max_tokens": int(os.getenv("MAX_TOKENS", "100000")),
38
+ "models": models,
39
+ },
40
+ }
41
+
42
+
43
+ async def main() -> None:
44
+ db_url = os.getenv("CRAFTER_DB_URL", "sqlite:///traces_v3_lm_synth/traces.db")
45
+ output_path = os.getenv("OUTPUT_JSONL", "ft_data/qwen4b_crafter_sft.jsonl")
46
+ config = build_config()
47
+
48
+ print("🤖 Modal/Synth Fine-Tuning Data Filter (v3)")
49
+ print("Using database:", db_url)
50
+ print("Output file:", output_path)
51
+ print("Config:", json.dumps(config, indent=2))
52
+
53
+ num_examples, stats = await filter_traces_from_turso(db_url, output_path, config)
54
+
55
+ print("\n✅ Wrote", num_examples, "training examples to", output_path)
56
+ print("📊 Stats keys:", list(stats.keys()))
57
+
58
+
59
+ if __name__ == "__main__":
60
+ asyncio.run(main())