synth-ai 0.2.9.dev7__py3-none-any.whl ā 0.2.9.dev9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +8 -11
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +64 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +18 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +38 -0
- examples/qwen_coder/validate_jsonl.py +59 -0
- examples/rl/run_eval.py +36 -37
- examples/rl/run_rl_and_save.py +5 -5
- examples/rl/task_app/math_single_step.py +65 -43
- examples/rl/task_app/math_task_app.py +3 -3
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +5 -5
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +78 -21
- examples/warming_up_to_rl/groq_test.py +4 -4
- examples/warming_up_to_rl/manage_secrets.py +13 -18
- examples/warming_up_to_rl/run_eval.py +42 -44
- examples/warming_up_to_rl/run_fft_and_save.py +11 -16
- examples/warming_up_to_rl/run_local_rollout.py +1 -3
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
- examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
- examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
- examples/warming_up_to_rl/run_rl_and_save.py +5 -6
- examples/warming_up_to_rl/run_rollout_remote.py +8 -10
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth/__init__.py +14 -0
- synth_ai/__init__.py +26 -4
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +128 -21
- synth_ai/api/train/cli.py +80 -64
- synth_ai/api/train/config_finder.py +7 -2
- synth_ai/api/train/env_resolver.py +1 -1
- synth_ai/api/train/pollers.py +2 -1
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +1 -2
- synth_ai/api/train/utils.py +13 -44
- synth_ai/cli/__init__.py +8 -0
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +1 -2
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +2 -1
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +2 -1
- synth_ai/cli/root.py +11 -13
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +529 -179
- synth_ai/cli/traces.py +6 -4
- synth_ai/cli/watch.py +12 -18
- synth_ai/demo_registry.py +1 -1
- synth_ai/demos/core/cli.py +36 -43
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +17 -25
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +2 -5
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +4 -7
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/handshake.py +9 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +18 -10
- synth_ai/inference/client.py +15 -5
- synth_ai/jobs/client.py +78 -83
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +91 -24
- synth_ai/learning/config.py +2 -38
- synth_ai/learning/ft_client.py +4 -59
- synth_ai/learning/health.py +5 -6
- synth_ai/learning/jobs.py +31 -47
- synth_ai/{rl ā learning/rl}/__init__.py +14 -4
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl ā learning/rl}/contracts.py +5 -8
- synth_ai/{rl ā learning/rl}/env_keys.py +39 -15
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -281
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -24
- synth_ai/learning/validators.py +25 -28
- synth_ai/lm/__init__.py +21 -47
- synth_ai/main.py +6 -0
- synth_ai/task/__init__.py +25 -27
- synth_ai/task/apps/__init__.py +7 -8
- synth_ai/task/auth.py +8 -8
- synth_ai/task/client.py +14 -14
- synth_ai/task/contracts.py +36 -35
- synth_ai/task/datasets.py +6 -5
- synth_ai/task/errors.py +10 -10
- synth_ai/task/health.py +17 -9
- synth_ai/task/json.py +58 -23
- synth_ai/task/proxy.py +13 -9
- synth_ai/task/rubrics.py +16 -15
- synth_ai/task/server.py +12 -12
- synth_ai/task/tracing_utils.py +4 -4
- synth_ai/task/vendors.py +5 -6
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/decorators.py +18 -16
- synth_ai/tracing_v3/hooks.py +5 -5
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/session_tracer.py +40 -14
- synth_ai/tracing_v3/storage/base.py +85 -0
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -7
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +2 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -4
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm ā v0/lm}/caching/ephemeral.py +2 -2
- synth_ai/{lm ā v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm ā v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm ā v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm ā v0/lm}/config.py +6 -1
- synth_ai/{lm ā v0/lm}/core/all.py +9 -9
- synth_ai/{lm ā v0/lm}/core/main.py +6 -6
- synth_ai/{lm ā v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm ā v0/lm}/core/synth_models.py +2 -14
- synth_ai/{lm ā v0/lm}/core/vendor_clients.py +2 -2
- synth_ai/{lm ā v0/lm}/overrides.py +2 -2
- synth_ai/{lm ā v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm ā v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm ā v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm ā v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm ā v0/lm}/vendors/core/anthropic_api.py +9 -9
- synth_ai/{lm ā v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm ā v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm ā v0/lm}/vendors/core/openai_api.py +10 -10
- synth_ai/{lm ā v0/lm}/vendors/openai_standard.py +8 -8
- synth_ai/{lm ā v0/lm}/vendors/openai_standard_responses.py +2 -2
- synth_ai/{lm ā v0/lm}/vendors/supported/custom_endpoint.py +3 -3
- synth_ai/{lm ā v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm ā v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm ā v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm ā v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm ā v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm ā v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm ā v0/lm}/vendors/synth_client.py +1 -1
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- synth_ai-0.2.9.dev9.dist-info/METADATA +191 -0
- {synth_ai-0.2.9.dev7.dist-info ā synth_ai-0.2.9.dev9.dist-info}/RECORD +268 -238
- {synth_ai-0.2.9.dev7.dist-info ā synth_ai-0.2.9.dev9.dist-info}/top_level.txt +1 -0
- examples/common_old/backend.py +0 -20
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1038
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -152
- examples/rl_old/task_app.py +0 -1131
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/experimental/synth_oss.py +0 -445
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -249
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -838
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev7.dist-info/METADATA +0 -131
- /synth_ai/{lm ā v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm ā v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm ā v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm ā v0/lm}/constants.py +0 -0
- /synth_ai/{lm ā v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm ā v0/lm}/core/exceptions.py +0 -0
- /synth_ai/{lm ā v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm ā v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm ā v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm ā v0/lm}/injection.py +0 -0
- /synth_ai/{lm ā v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm ā v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm ā v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm ā v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm ā v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm ā v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm ā v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm ā v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm ā v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm ā v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm ā v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm ā v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm ā v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm ā v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm ā v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm ā v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev7.dist-info ā synth_ai-0.2.9.dev9.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev7.dist-info ā synth_ai-0.2.9.dev9.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev7.dist-info ā synth_ai-0.2.9.dev9.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
### Quickstart (Crafter) with config.toml
|
|
2
|
-
|
|
3
|
-
All defaults are in `examples/finetuning/synth_qwen/config.toml`. Place your API keys in `.env`.
|
|
4
|
-
|
|
5
|
-
1) Generate traces (v3 tracing)
|
|
6
|
-
```bash
|
|
7
|
-
set -a; source .env 2>/dev/null || true; set +a
|
|
8
|
-
uvpm examples.finetuning.synth_qwen.run_crafter_qwen4b
|
|
9
|
-
```
|
|
10
|
-
|
|
11
|
-
2) Filter traces ā SFT JSONL
|
|
12
|
-
```bash
|
|
13
|
-
uvpm examples.finetuning.synth_qwen.filter_traces_achievements
|
|
14
|
-
```
|
|
15
|
-
|
|
16
|
-
3) Finetune via learning service (SFT)
|
|
17
|
-
```bash
|
|
18
|
-
set -a; source .env 2>/dev/null || true; set +a
|
|
19
|
-
uvpm examples.finetuning.synth_qwen.sft_kickoff
|
|
20
|
-
```
|
|
21
|
-
|
|
22
|
-
4) Evaluate the fine-tuned model in Crafter
|
|
23
|
-
```bash
|
|
24
|
-
set -a; source .env 2>/dev/null || true; set +a
|
|
25
|
-
CRAFTER_MODEL="ft:...your-returned-id..." uvpm examples.finetuning.synth_qwen.run_crafter_qwen4b
|
|
26
|
-
```
|
|
27
|
-
|
|
28
|
-
Notes:
|
|
29
|
-
- If you see a 401, ensure your `.env` contains a valid production `SYNTH_API_KEY` or export it inline.
|
|
30
|
-
- Traces are stored in `traces/v3/synth_ai.db` (sqld); the filter derives the correct internal data file.
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
### Interactive Demo
|
|
34
|
-
|
|
35
|
-
Use the interactive script to walk through rollouts ā filtering ā SFT ā optional rollout of the fine-tuned model.
|
|
36
|
-
|
|
37
|
-
```bash
|
|
38
|
-
examples/finetuning/synth_qwen/run_demo.sh
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
What it does:
|
|
42
|
-
- Prompts for rollout settings (model, episodes, max steps, difficulty, think).
|
|
43
|
-
- Prompts for filter settings (required achievements, model restriction, min reward, max cost/tokens, output path).
|
|
44
|
-
- Starts the SFT job and captures the returned fine-tuned model id.
|
|
45
|
-
- Asks you to confirm before rolling out the fine-tuned model.
|
|
46
|
-
- API key handling:
|
|
47
|
-
- If a `SYNTH_API_KEY` is detected, youāre asked to confirm using it.
|
|
48
|
-
- If not set, you can choose `SYNTH_API_KEY_PROD` (if present) or securely enter a key.
|
|
49
|
-
- `OPENAI_API_KEY` is set to the same value if missing to prevent 401s.
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Run Crafter-Classic evaluation (v3 tracing) on Synthās Qwen 4B model.
|
|
4
|
-
|
|
5
|
-
This forwards flags into the canonical `test_crafter_react_agent_lm_synth.py`
|
|
6
|
-
runner that already handles v3 tracing, warm-up, and reporting.
|
|
7
|
-
|
|
8
|
-
Environment variables:
|
|
9
|
-
- CRAFTER_MODEL (default: Qwen/Qwen3-4B-Instruct-2507)
|
|
10
|
-
- CRAFTER_EPISODES (default: 10)
|
|
11
|
-
- CRAFTER_MAX_STEPS (default: 30)
|
|
12
|
-
- CRAFTER_DIFFICULTY (default: easy)
|
|
13
|
-
- CRAFTER_THINK (default: 0 -> use --no-think)
|
|
14
|
-
|
|
15
|
-
It also sets a few runner-specific env flags to enforce short outputs and a single tool call.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
import asyncio
|
|
19
|
-
import os
|
|
20
|
-
import sys
|
|
21
|
-
import tomllib
|
|
22
|
-
|
|
23
|
-
# from synth_ai.environments.examples.crafter_classic.agent_demos.crafter_modal_ft import (
|
|
24
|
-
# test_crafter_react_agent_lm_synth as runner,
|
|
25
|
-
# )
|
|
26
|
-
from examples.finetuning.synth_qwen import (
|
|
27
|
-
react_agent_lm as runner,
|
|
28
|
-
)
|
|
29
|
-
from synth_ai.config.base_url import (
|
|
30
|
-
PROD_BASE_URL_DEFAULT,
|
|
31
|
-
get_learning_v2_base_url,
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
# Force prod by default for this runner unless explicitly overridden
|
|
35
|
-
_force_prod = os.getenv("CRAFTER_FORCE_PROD", "1").lower() in ("1", "true", "yes", "on")
|
|
36
|
-
if _force_prod:
|
|
37
|
-
# Sanitize implicit local/dev overrides
|
|
38
|
-
os.environ.pop("SYNTH_LOCAL_BASE_URL", None)
|
|
39
|
-
os.environ.pop("SYNTH_DEV_BASE_URL", None)
|
|
40
|
-
# If caller hasn't explicitly set LEARNING_V2_BASE_URL, lock to prod default
|
|
41
|
-
if "LEARNING_V2_BASE_URL" not in os.environ:
|
|
42
|
-
os.environ["LEARNING_V2_BASE_URL"] = PROD_BASE_URL_DEFAULT
|
|
43
|
-
|
|
44
|
-
# Resolve base URL from shared config (honors LEARNING_V2_BASE_URL and sanitized overrides)
|
|
45
|
-
os.environ["SYNTH_BASE_URL"] = get_learning_v2_base_url()
|
|
46
|
-
|
|
47
|
-
print(f"š§ Using Synth base URL = {os.environ.get('SYNTH_BASE_URL')}")
|
|
48
|
-
|
|
49
|
-
cfg_path = os.getenv("CRAFTER_CONFIG", "examples/finetuning/synth_qwen/config.toml")
|
|
50
|
-
cfg = {}
|
|
51
|
-
if os.path.exists(cfg_path):
|
|
52
|
-
with open(cfg_path, "rb") as f:
|
|
53
|
-
cfg = tomllib.load(f)
|
|
54
|
-
else:
|
|
55
|
-
cfg = {"rollouts": {}}
|
|
56
|
-
rcfg = cfg.get("rollouts", {})
|
|
57
|
-
|
|
58
|
-
MODEL_ID = os.getenv("CRAFTER_MODEL", rcfg.get("model", "Qwen/Qwen3-4B-Instruct-2507"))
|
|
59
|
-
EPISODES = os.getenv("CRAFTER_EPISODES", str(rcfg.get("episodes", 10)))
|
|
60
|
-
MAX_STEPS = os.getenv("CRAFTER_MAX_STEPS", str(rcfg.get("max_steps", 30)))
|
|
61
|
-
DIFFICULTY = os.getenv("CRAFTER_DIFFICULTY", rcfg.get("difficulty", "easy"))
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
async def main() -> None:
|
|
65
|
-
think_env = os.getenv("CRAFTER_THINK", "0").lower()
|
|
66
|
-
enable_think = think_env in ("1", "true", "yes", "on")
|
|
67
|
-
think_flag = "--think" if enable_think else "--no-think"
|
|
68
|
-
|
|
69
|
-
# Tighten prompts and enforce tool calling like the tests do
|
|
70
|
-
os.environ["CRAFTER_STOP_AFTER_TOOL_CALLS"] = "1"
|
|
71
|
-
os.environ["SYNTH_OPENAI_DEBUG"] = "0"
|
|
72
|
-
os.environ["CRAFTER_MAX_TOKENS"] = os.environ.get(
|
|
73
|
-
"CRAFTER_MAX_TOKENS", str(rcfg.get("max_tokens", 2048))
|
|
74
|
-
)
|
|
75
|
-
os.environ["CRAFTER_TOOL_CHOICE"] = os.environ.get(
|
|
76
|
-
"CRAFTER_TOOL_CHOICE", rcfg.get("tool_choice", "required")
|
|
77
|
-
)
|
|
78
|
-
os.environ["CRAFTER_TEMPERATURE"] = os.environ.get(
|
|
79
|
-
"CRAFTER_TEMPERATURE", str(rcfg.get("temperature", 0.4))
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
# Default v3 traces path from config if not already set
|
|
83
|
-
tcfg = cfg.get("traces", {})
|
|
84
|
-
if "SQLD_DB_PATH" not in os.environ and tcfg.get("sqld_db_path"):
|
|
85
|
-
os.environ["SQLD_DB_PATH"] = tcfg["sqld_db_path"]
|
|
86
|
-
os.environ["CRAFTER_SYSTEM_PROMPT"] = (
|
|
87
|
-
"You are CrafterAgent playing the Crafter survival environment. Your goal is to stay alive and unlock as many achievements as possible. "
|
|
88
|
-
"Keep your reasoning very brief and focus on the tool call. Use the tool available to you to play Crafter"
|
|
89
|
-
"ALWAYS provide 2-5 actions. Available actions: move_left, move_right, move_up, move_down, do, sleep, place_stone, place_table, place_furnace, place_plant, "
|
|
90
|
-
"make_wood_pickaxe, make_stone_pickaxe, make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword, noop."
|
|
91
|
-
)
|
|
92
|
-
os.environ["CRAFTER_SUPPRESS_OBS_REMINDER"] = "1"
|
|
93
|
-
# Ensure we log full LM inputs and tools
|
|
94
|
-
os.environ["CRAFTER_LOG_FULL_INPUTS"] = os.environ.get("CRAFTER_LOG_FULL_INPUTS", "1")
|
|
95
|
-
|
|
96
|
-
sys.argv = [
|
|
97
|
-
"crafter_runner",
|
|
98
|
-
"--model",
|
|
99
|
-
MODEL_ID,
|
|
100
|
-
"--episodes",
|
|
101
|
-
str(EPISODES),
|
|
102
|
-
"--max-steps",
|
|
103
|
-
str(MAX_STEPS),
|
|
104
|
-
"--difficulty",
|
|
105
|
-
DIFFICULTY,
|
|
106
|
-
think_flag,
|
|
107
|
-
"--quiet",
|
|
108
|
-
]
|
|
109
|
-
|
|
110
|
-
await runner.main()
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
if __name__ == "__main__":
|
|
114
|
-
asyncio.run(main())
|
|
@@ -1,195 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
|
|
3
|
-
# Interactive demo for Qwen 4B Crafter finetuning
|
|
4
|
-
# Mirrors the flow in readme.md and example_log.md
|
|
5
|
-
|
|
6
|
-
set -euo pipefail
|
|
7
|
-
|
|
8
|
-
# Locate repo root and cd there
|
|
9
|
-
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
|
10
|
-
cd "$SCRIPT_DIR/../../.."
|
|
11
|
-
|
|
12
|
-
echo "Synth Qwen4B finetuning demo (Crafter)"
|
|
13
|
-
|
|
14
|
-
# Load env (prefer example-local .env, then repo .env)
|
|
15
|
-
set +u
|
|
16
|
-
set -a
|
|
17
|
-
if [ -f "$SCRIPT_DIR/.env" ]; then source "$SCRIPT_DIR/.env"; fi
|
|
18
|
-
if [ -f ".env" ]; then source ".env"; fi
|
|
19
|
-
set +a
|
|
20
|
-
set -u
|
|
21
|
-
|
|
22
|
-
# Helper: prompt with default
|
|
23
|
-
prompt() {
|
|
24
|
-
local msg="$1"; shift
|
|
25
|
-
local default="$1"; shift
|
|
26
|
-
local var
|
|
27
|
-
read -r -p "$msg" var || true
|
|
28
|
-
if [ -z "$var" ]; then
|
|
29
|
-
echo "$default"
|
|
30
|
-
else
|
|
31
|
-
echo "$var"
|
|
32
|
-
fi
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
# Ensure API key present (and set OPENAI_API_KEY fallback)
|
|
36
|
-
ensure_api_key() {
|
|
37
|
-
local current_key="${SYNTH_API_KEY:-}"
|
|
38
|
-
if [ -n "$current_key" ]; then
|
|
39
|
-
local preview="${current_key:0:6}...${current_key: -4}"
|
|
40
|
-
read -r -p "Detected SYNTH_API_KEY ($preview). Use this key? [Y/n]: " USE_CUR || true
|
|
41
|
-
USE_CUR=${USE_CUR:-Y}
|
|
42
|
-
if [[ ! "$USE_CUR" =~ ^[Yy]$ ]]; then
|
|
43
|
-
current_key=""
|
|
44
|
-
fi
|
|
45
|
-
fi
|
|
46
|
-
|
|
47
|
-
if [ -z "$current_key" ]; then
|
|
48
|
-
if [ -n "${SYNTH_API_KEY_PROD:-}" ]; then
|
|
49
|
-
local prod_prev="${SYNTH_API_KEY_PROD:0:6}...${SYNTH_API_KEY_PROD: -4}"
|
|
50
|
-
read -r -p "Use SYNTH_API_KEY_PROD ($prod_prev)? [y/N]: " USE_PROD || true
|
|
51
|
-
if [[ "$USE_PROD" =~ ^[Yy]$ ]]; then
|
|
52
|
-
current_key="$SYNTH_API_KEY_PROD"
|
|
53
|
-
fi
|
|
54
|
-
fi
|
|
55
|
-
fi
|
|
56
|
-
|
|
57
|
-
while [ -z "$current_key" ]; do
|
|
58
|
-
echo
|
|
59
|
-
read -s -p "Enter your SYNTH_API_KEY: " KEY_IN || true
|
|
60
|
-
echo
|
|
61
|
-
if [ -n "$KEY_IN" ]; then
|
|
62
|
-
current_key="$KEY_IN"
|
|
63
|
-
else
|
|
64
|
-
echo "A valid SYNTH_API_KEY is required to continue."
|
|
65
|
-
fi
|
|
66
|
-
done
|
|
67
|
-
|
|
68
|
-
export SYNTH_API_KEY="$current_key"
|
|
69
|
-
if [ -z "${OPENAI_API_KEY:-}" ]; then
|
|
70
|
-
export OPENAI_API_KEY="$SYNTH_API_KEY"
|
|
71
|
-
echo "OPENAI_API_KEY set from SYNTH_API_KEY."
|
|
72
|
-
fi
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
# Step 1: Rollouts to generate v3 traces
|
|
76
|
-
echo
|
|
77
|
-
read -r -p "Run rollouts to generate v3 traces now? [Y/n]: " RUN_ROLLOUTS || true
|
|
78
|
-
RUN_ROLLOUTS=${RUN_ROLLOUTS:-Y}
|
|
79
|
-
if [[ "$RUN_ROLLOUTS" =~ ^[Yy]$ || -z "$RUN_ROLLOUTS" ]]; then
|
|
80
|
-
echo "Using config defaults from examples/finetuning/synth_qwen/config.toml (override below if desired)."
|
|
81
|
-
# Allow quick overrides via envs
|
|
82
|
-
MODEL_INPUT=$(prompt "Model id [Enter=use config]: " "")
|
|
83
|
-
EPISODES_INPUT=$(prompt "Episodes [Enter=use config]: " "")
|
|
84
|
-
MAX_STEPS_INPUT=$(prompt "Max steps [Enter=use config]: " "")
|
|
85
|
-
DIFFICULTY_INPUT=$(prompt "Difficulty [Enter=use config]: " "")
|
|
86
|
-
THINK_INPUT=$(prompt "Enable think mode? (1/0) [Enter=0]: " "0")
|
|
87
|
-
|
|
88
|
-
if [ -n "$MODEL_INPUT" ]; then export CRAFTER_MODEL="$MODEL_INPUT"; fi
|
|
89
|
-
if [ -n "$EPISODES_INPUT" ]; then export CRAFTER_EPISODES="$EPISODES_INPUT"; fi
|
|
90
|
-
if [ -n "$MAX_STEPS_INPUT" ]; then export CRAFTER_MAX_STEPS="$MAX_STEPS_INPUT"; fi
|
|
91
|
-
if [ -n "$DIFFICULTY_INPUT" ]; then export CRAFTER_DIFFICULTY="$DIFFICULTY_INPUT"; fi
|
|
92
|
-
export CRAFTER_THINK="${THINK_INPUT:-0}"
|
|
93
|
-
|
|
94
|
-
echo
|
|
95
|
-
echo "Running rollouts (v3 tracing)..."
|
|
96
|
-
ensure_api_key
|
|
97
|
-
uv run python -m examples.finetuning.synth_qwen.run_crafter_qwen4b
|
|
98
|
-
else
|
|
99
|
-
echo "Skipping rollouts."
|
|
100
|
-
fi
|
|
101
|
-
|
|
102
|
-
# Step 2: Filter traces -> SFT JSONL
|
|
103
|
-
echo
|
|
104
|
-
read -r -p "Filter v3 traces into SFT JSONL now? [Y/n]: " RUN_FILTER || true
|
|
105
|
-
RUN_FILTER=${RUN_FILTER:-Y}
|
|
106
|
-
if [[ "$RUN_FILTER" =~ ^[Yy]$ || -z "$RUN_FILTER" ]]; then
|
|
107
|
-
# Ensure DB path is correctly set for v3 traces (force set to repo-local path)
|
|
108
|
-
DB_PATH_DEFAULT="$PWD/traces/v3/synth_ai.db/dbs/default/data"
|
|
109
|
-
export CRAFTER_DB_URL="sqlite+aiosqlite:///$DB_PATH_DEFAULT"
|
|
110
|
-
echo "Using DB: $CRAFTER_DB_URL"
|
|
111
|
-
mkdir -p ft_data
|
|
112
|
-
echo "You can override filter options; Enter to use config defaults."
|
|
113
|
-
ACH_INPUT=$(prompt "Required achievements (space-separated) [Enter=config]: " "")
|
|
114
|
-
MODELS_INPUT=$(prompt "Restrict to models (space-separated) [Enter=all]: " "")
|
|
115
|
-
OUT_PATH_INPUT=$(prompt "Output JSONL path [Enter=config]: " "")
|
|
116
|
-
MIN_REWARD_INPUT=$(prompt "Min total reward [Enter=config]: " "")
|
|
117
|
-
MAX_COST_INPUT=$(prompt "Max total cost [Enter=config]: " "")
|
|
118
|
-
MAX_TOKENS_INPUT=$(prompt "Max total tokens [Enter=config]: " "")
|
|
119
|
-
|
|
120
|
-
if [ -n "$ACH_INPUT" ]; then export REQUIRED_ACHIEVEMENTS="$ACH_INPUT"; fi
|
|
121
|
-
if [ -n "$MODELS_INPUT" ]; then export MODELS="$MODELS_INPUT"; fi
|
|
122
|
-
if [ -n "$OUT_PATH_INPUT" ]; then export OUTPUT_JSONL="$OUT_PATH_INPUT"; fi
|
|
123
|
-
if [ -n "$MIN_REWARD_INPUT" ]; then export MIN_TOTAL_REWARD="$MIN_REWARD_INPUT"; fi
|
|
124
|
-
if [ -n "$MAX_COST_INPUT" ]; then export MAX_COST="$MAX_COST_INPUT"; fi
|
|
125
|
-
if [ -n "$MAX_TOKENS_INPUT" ]; then export MAX_TOKENS="$MAX_TOKENS_INPUT"; fi
|
|
126
|
-
|
|
127
|
-
echo
|
|
128
|
-
echo "Filtering traces to SFT JSONL..."
|
|
129
|
-
uv run python -m examples.finetuning.synth_qwen.filter_traces_achievements
|
|
130
|
-
else
|
|
131
|
-
echo "Skipping filter."
|
|
132
|
-
fi
|
|
133
|
-
|
|
134
|
-
# Step 3: Kick off SFT (learning service)
|
|
135
|
-
echo
|
|
136
|
-
read -r -p "Kick off SFT training job now? [Y/n]: " RUN_SFT || true
|
|
137
|
-
RUN_SFT=${RUN_SFT:-Y}
|
|
138
|
-
FT_MODEL_ID=""
|
|
139
|
-
if [[ "$RUN_SFT" =~ ^[Yy]$ || -z "$RUN_SFT" ]]; then
|
|
140
|
-
echo "Enter overrides for training job; Enter to use config."
|
|
141
|
-
BASE_MODEL_INPUT=$(prompt "Base model [Enter=config]: " "")
|
|
142
|
-
TRAIN_JSONL_INPUT=$(prompt "Training JSONL path [Enter=config]: " "")
|
|
143
|
-
|
|
144
|
-
if [ -n "$BASE_MODEL_INPUT" ]; then export QWEN_BASE_MODEL="$BASE_MODEL_INPUT"; fi
|
|
145
|
-
if [ -n "$TRAIN_JSONL_INPUT" ]; then export QWEN_TRAINING_JSONL="$TRAIN_JSONL_INPUT"; fi
|
|
146
|
-
|
|
147
|
-
echo
|
|
148
|
-
echo "Starting SFT job..."
|
|
149
|
-
ensure_api_key
|
|
150
|
-
# Stream logs to terminal and save to file for parsing
|
|
151
|
-
mkdir -p logs
|
|
152
|
-
TS=$(date +%Y%m%d_%H%M%S)
|
|
153
|
-
SFT_LOG_FILE="logs/sft_kickoff_${TS}.log"
|
|
154
|
-
# Force unbuffered stdout so polling status prints live through the pipe
|
|
155
|
-
PYTHONUNBUFFERED=1 uv run python -u -m examples.finetuning.synth_qwen.sft_kickoff | tee "$SFT_LOG_FILE"
|
|
156
|
-
# Extract ft model id like ft:Qwen/... (no whitespace or quotes)
|
|
157
|
-
if grep -qE "ft:[^[:space:]\"]+" "$SFT_LOG_FILE"; then
|
|
158
|
-
FT_MODEL_ID=$(grep -Eo "ft:[^[:space:]\"]+" "$SFT_LOG_FILE" | tail -n1)
|
|
159
|
-
echo "Captured fine-tuned model id: $FT_MODEL_ID"
|
|
160
|
-
echo "SFT logs saved to: $SFT_LOG_FILE"
|
|
161
|
-
else
|
|
162
|
-
echo "Warning: could not parse fine-tuned model id from output. Logs: $SFT_LOG_FILE"
|
|
163
|
-
fi
|
|
164
|
-
else
|
|
165
|
-
echo "Skipping SFT kickoff."
|
|
166
|
-
fi
|
|
167
|
-
|
|
168
|
-
# Step 4: Optional rollout with fine-tuned model
|
|
169
|
-
echo
|
|
170
|
-
if [ -n "$FT_MODEL_ID" ]; then
|
|
171
|
-
read -r -p "Roll out fine-tuned model '$FT_MODEL_ID' in Crafter now? [y/N]: " RUN_ROLLOUT_FT || true
|
|
172
|
-
if [[ "$RUN_ROLLOUT_FT" =~ ^[Yy]$ ]]; then
|
|
173
|
-
EPISODES2=$(prompt "Episodes [Enter=config]: " "")
|
|
174
|
-
MAX_STEPS2=$(prompt "Max steps [Enter=config]: " "")
|
|
175
|
-
DIFFICULTY2=$(prompt "Difficulty [Enter=config]: " "")
|
|
176
|
-
THINK2=$(prompt "Enable think mode? (1/0) [Enter=0]: " "0")
|
|
177
|
-
|
|
178
|
-
export CRAFTER_MODEL="$FT_MODEL_ID"
|
|
179
|
-
if [ -n "$EPISODES2" ]; then export CRAFTER_EPISODES="$EPISODES2"; fi
|
|
180
|
-
if [ -n "$MAX_STEPS2" ]; then export CRAFTER_MAX_STEPS="$MAX_STEPS2"; fi
|
|
181
|
-
if [ -n "$DIFFICULTY2" ]; then export CRAFTER_DIFFICULTY="$DIFFICULTY2"; fi
|
|
182
|
-
export CRAFTER_THINK="${THINK2:-0}"
|
|
183
|
-
|
|
184
|
-
echo
|
|
185
|
-
echo "Running rollouts with fine-tuned model..."
|
|
186
|
-
uv run python -m examples.finetuning.synth_qwen.run_crafter_qwen4b
|
|
187
|
-
else
|
|
188
|
-
echo "Skipping rollout of fine-tuned model."
|
|
189
|
-
fi
|
|
190
|
-
else
|
|
191
|
-
echo "No fine-tuned model id available to roll out."
|
|
192
|
-
fi
|
|
193
|
-
|
|
194
|
-
echo
|
|
195
|
-
echo "Done. You can re-run this script to repeat steps as needed."
|
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Kick off Qwen 4B SFT against the learning-v2 service using the exact
|
|
4
|
-
upload/job/polling flow mirrored from test_qwen3_sft_training_v2.py.
|
|
5
|
-
|
|
6
|
-
Environment:
|
|
7
|
-
- LEARNING_V2_BASE_URL (preferred)
|
|
8
|
-
- SYNTH_BASE_URL (fallback if LEARNING_V2_BASE_URL is unset)
|
|
9
|
-
- else defaults to http://localhost:8000/api
|
|
10
|
-
- SYNTH_API_KEY
|
|
11
|
-
- QWEN_BASE_MODEL (optional, defaults to Qwen/Qwen3-4B-Instruct-2507)
|
|
12
|
-
- QWEN_TRAINING_JSONL (optional, defaults to ft_data/qwen4b_crafter_sft.jsonl)
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
import asyncio
|
|
16
|
-
import os
|
|
17
|
-
import time
|
|
18
|
-
import tomllib
|
|
19
|
-
from typing import Any
|
|
20
|
-
|
|
21
|
-
import aiohttp
|
|
22
|
-
from synth_ai.config.base_url import get_learning_v2_base_url
|
|
23
|
-
|
|
24
|
-
API_URL = get_learning_v2_base_url()
|
|
25
|
-
API_KEY = os.getenv("SYNTH_API_KEY")
|
|
26
|
-
|
|
27
|
-
_cfg_path = os.getenv("CRAFTER_CONFIG", "examples/finetuning/synth_qwen/config.toml")
|
|
28
|
-
_cfg: dict[str, Any] = {}
|
|
29
|
-
if os.path.exists(_cfg_path):
|
|
30
|
-
with open(_cfg_path, "rb") as _f:
|
|
31
|
-
_cfg = tomllib.load(_f)
|
|
32
|
-
scfg = _cfg.get("sft", {})
|
|
33
|
-
|
|
34
|
-
MODEL = os.getenv("QWEN_BASE_MODEL", scfg.get("base_model", "Qwen/Qwen3-4B-Instruct-2507"))
|
|
35
|
-
TRAINING_PATH = os.getenv(
|
|
36
|
-
"QWEN_TRAINING_JSONL", scfg.get("training_jsonl", "ft_data/qwen4b_crafter_sft.jsonl")
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
async def upload_file() -> str:
|
|
41
|
-
headers = {"Authorization": f"Bearer {API_KEY}"}
|
|
42
|
-
async with aiohttp.ClientSession() as session:
|
|
43
|
-
form = aiohttp.FormData()
|
|
44
|
-
with open(TRAINING_PATH, "rb") as f:
|
|
45
|
-
form.add_field(
|
|
46
|
-
"file",
|
|
47
|
-
f,
|
|
48
|
-
filename=os.path.basename(TRAINING_PATH),
|
|
49
|
-
content_type="application/jsonl",
|
|
50
|
-
)
|
|
51
|
-
form.add_field("purpose", "fine-tune")
|
|
52
|
-
async with session.post(f"{API_URL}/files", data=form, headers=headers) as resp:
|
|
53
|
-
assert resp.status == 200, await resp.text()
|
|
54
|
-
data = await resp.json()
|
|
55
|
-
return data["id"]
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
async def create_job(file_id: str) -> str:
|
|
59
|
-
body = {
|
|
60
|
-
"training_file": file_id,
|
|
61
|
-
"model": MODEL,
|
|
62
|
-
"hyperparameters": {
|
|
63
|
-
"training_type": "sft",
|
|
64
|
-
"n_epochs": int(scfg.get("n_epochs", 1)),
|
|
65
|
-
"batch_size": int(scfg.get("batch_size", 4)),
|
|
66
|
-
},
|
|
67
|
-
"upload_to_wasabi": bool(scfg.get("upload_to_wasabi", True)),
|
|
68
|
-
}
|
|
69
|
-
headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
|
|
70
|
-
async with (
|
|
71
|
-
aiohttp.ClientSession() as session,
|
|
72
|
-
session.post(f"{API_URL}/fine_tuning/jobs", json=body, headers=headers) as resp,
|
|
73
|
-
):
|
|
74
|
-
assert resp.status == 200, await resp.text()
|
|
75
|
-
data = await resp.json()
|
|
76
|
-
return data["id"]
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
async def await_success(job_id: str) -> dict[str, object]:
|
|
80
|
-
headers = {"Authorization": f"Bearer {API_KEY}"}
|
|
81
|
-
async with aiohttp.ClientSession() as session:
|
|
82
|
-
check_interval_seconds = 15
|
|
83
|
-
for attempt in range(20):
|
|
84
|
-
async with session.get(f"{API_URL}/fine_tuning/jobs/{job_id}", headers=headers) as resp:
|
|
85
|
-
if resp.status != 200:
|
|
86
|
-
await asyncio.sleep(check_interval_seconds)
|
|
87
|
-
continue
|
|
88
|
-
job = await resp.json()
|
|
89
|
-
status = job.get("status")
|
|
90
|
-
print(f"ā³ poll {attempt + 1}/20 ā status = {status}")
|
|
91
|
-
if status == "succeeded":
|
|
92
|
-
return job
|
|
93
|
-
if status in {"failed", "cancelled"}:
|
|
94
|
-
raise RuntimeError(f"Training failed: {job.get('error')}")
|
|
95
|
-
await asyncio.sleep(check_interval_seconds)
|
|
96
|
-
raise TimeoutError("Training did not finish in time")
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
async def main() -> None:
|
|
100
|
-
if not API_URL or not API_KEY:
|
|
101
|
-
raise RuntimeError(
|
|
102
|
-
"LEARNING_V2_BASE_URL/SYNTH_BASE_URL and SYNTH_API_KEY must be set or use the default http://localhost:8000/api"
|
|
103
|
-
)
|
|
104
|
-
print("š Starting Qwen 4B SFT")
|
|
105
|
-
fid = await upload_file()
|
|
106
|
-
job_id = await create_job(fid)
|
|
107
|
-
start = time.time()
|
|
108
|
-
job = await await_success(job_id)
|
|
109
|
-
wall = time.time() - start
|
|
110
|
-
|
|
111
|
-
ft_model = job["fine_tuned_model"]
|
|
112
|
-
tokens = job.get("trained_tokens")
|
|
113
|
-
|
|
114
|
-
print("š¢ Qwen4B SFT fine-tune succeeded ā", ft_model)
|
|
115
|
-
print(f"ā±ļø wall-clock: {wall:.1f}s | trained_tokens: {tokens}")
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
if __name__ == "__main__":
|
|
119
|
-
asyncio.run(main())
|
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
## Synth-Qwen v1 Finetuning Demo (Qwen3 0.6B)
|
|
2
|
-
|
|
3
|
-
Prereqs
|
|
4
|
-
- Python 3.11+ and uv installed (`curl -LsSf https://astral.sh/uv/install.sh | sh`)
|
|
5
|
-
- Local Env Service is provided by this repo; no `sqld` required
|
|
6
|
-
- One of the following ways to provide backend creds:
|
|
7
|
-
- Set `MONOREPO_BACKEND` to your monorepo backend path (defaults to `../monorepo/backend`) and ensure it has `.env.dev` with at least:
|
|
8
|
-
- `DEV_BACKEND_URL` (e.g., `http://localhost:8000`)
|
|
9
|
-
- `TESTING_LOCAL_SYNTH_API_KEY` (or `SYNTH_API_KEY`)
|
|
10
|
-
- OR export these directly in your shell before running:
|
|
11
|
-
- `LOCAL_BACKEND_URL` (e.g., `http://localhost:8000/api`)
|
|
12
|
-
- `SYNTH_API_KEY` (local dev key)
|
|
13
|
-
- Optional for prod: `.env` in repo root with
|
|
14
|
-
- `PROD_BACKEND_URL=https://agent-learning.onrender.com`
|
|
15
|
-
- `TESTING_PROD_SYNTH_API_KEY=...`
|
|
16
|
-
|
|
17
|
-
Steps
|
|
18
|
-
```bash
|
|
19
|
-
# 0) Go to repo root so traces and logs land in the right place
|
|
20
|
-
cd "$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
|
|
21
|
-
# Note: commands below resolve backend URL per-call using examples/common/backend.py
|
|
22
|
-
|
|
23
|
-
# 1) Start local services (sqld + Env Service) in background
|
|
24
|
-
uvx synth-ai serve --no-sqld --env-port 8901
|
|
25
|
-
|
|
26
|
-
# 3) Rollout base Qwen to generate v3 traces (Crafter via Env Service)
|
|
27
|
-
set -a; MONOREPO_BACKEND=${MONOREPO_BACKEND:-../monorepo/backend}; source "$MONOREPO_BACKEND/.env.dev"; set +a; export SYNTH_BASE_URL="$(uv run python -c 'from examples.common.backend import resolve_backend_url;print(resolve_backend_url())')"; export SYNTH_API_KEY="${DEV_SYNTH_API_KEY:-${SYNTH_API_KEY:-${SYNTH_API_KEY_TEST:-sk-local}}}"; uv run python examples/finetuning/synth_qwen/react_agent_lm.py --model "Qwen/Qwen3-0.6B" --episodes 10 --max-steps 10 --quiet --no-daemon
|
|
28
|
-
|
|
29
|
-
# 4) Convert traces ā SFT JSONL (writes training.jsonl) [use single-script alternative below]
|
|
30
|
-
printf "[filter]\nrequired_achievements=[]\n" > /tmp/crater_filter.toml && CRAFTER_DB_URL=sqlite+aiosqlite:///$PWD/traces/v3/synth_ai.db CRAFTER_CONFIG=/tmp/crater_filter.toml WINDOW_MODE=1 MIN_TOTAL_REWARD=1 MIN_ACHIEVEMENTS=0 OUTPUT_JSONL=$PWD/examples/finetuning/synth_qwen_v1/data/training_crafter.jsonl uv run python examples/finetuning/synth_qwen/filter_traces_achievements.py
|
|
31
|
-
|
|
32
|
-
# ALT: Single-script E2E run (prepare ā upload ā create/start ā poll ā infer)
|
|
33
|
-
set -a; MONOREPO_BACKEND=${MONOREPO_BACKEND:-../monorepo/backend}; source "$MONOREPO_BACKEND/.env.dev"; set +a; SYNTH_BACKEND_URL_OVERRIDE=prod DEV_BACKEND_URL="$(uv run python -c 'from examples.common.backend import resolve_backend_url;print(resolve_backend_url())')" uv run python examples/finetuning/synth_qwen_v1/run_ft_job.py --mode dev
|
|
34
|
-
|
|
35
|
-
# Test model
|
|
36
|
-
set -a; MONOREPO_BACKEND=${MONOREPO_BACKEND:-../monorepo/backend}; source "$MONOREPO_BACKEND/.env.dev"; set +a; MODE=dev DEV_BACKEND_URL="$(uv run python -c 'from examples.common.backend import resolve_backend_url;print(resolve_backend_url())')" uv run python examples/finetuning/synth_qwen_v1/hello_ft_model.py | cat
|
|
37
|
-
|
|
38
|
-
# 8) Rollout agent again using the fine-tuned model from state.json (env service already on 8901, no sqld)
|
|
39
|
-
set -a; MONOREPO_BACKEND=${MONOREPO_BACKEND:-../monorepo/backend}; source "$MONOREPO_BACKEND/.env.dev"; set +a; FT_MODEL=$(uv run python - <<'PY'
|
|
40
|
-
import json, os
|
|
41
|
-
print(json.load(open(os.path.join(os.getcwd(),'examples/finetuning/synth_qwen_v1/state.json')))['fine_tuned_model'])
|
|
42
|
-
PY
|
|
43
|
-
); SYNTH_BACKEND_URL_OVERRIDE=prod SYNTH_BASE_URL="$(uv run python -c 'from examples.common.backend import resolve_backend_url;print(resolve_backend_url())')" SYNTH_API_KEY=${TESTING_LOCAL_SYNTH_API_KEY:-${SYNTH_API_KEY:-sk-local}} uv run python examples/finetuning/synth_qwen/react_agent_lm.py --model "$FT_MODEL" --episodes 10 --max-steps 10 --quiet --no-daemon --no-traces
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
export LOCAL_BACKEND_URL=http://localhost:8000/api
|
|
57
|
-
export SYNTH_BACKEND_URL_OVERRIDE=local
|
|
58
|
-
uv run python examples/finetuning/synth_qwen_v1/run_ft_job.py --mode local
|
|
59
|
-
|
|
60
|
-
HATCHET_ENV_OVERRIDE=prod python -u -m app.orchestration.hatchet.workflows
|
|
61
|
-
|
|
62
|
-
export LOCAL_BACKEND_URL=http://localhost:8000/api
|
|
63
|
-
export SYNTH_BACKEND_URL_OVERRIDE=dev
|
|
64
|
-
uv run python examples/finetuning/synth_qwen_v1/run_ft_job.py --mode dev
|
|
65
|
-
|
|
66
|
-
export PROD_BACKEND_URL=https://agent-learning.onrender.com/api
|
|
67
|
-
export SYNTH_BACKEND_URL_OVERRIDE=prod
|
|
68
|
-
uv run python examples/finetuning/synth_qwen_v1/run_ft_job.py --mode prod
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Filter v3 Crafter traces into an SFT-ready JSONL using the maintained
|
|
4
|
-
Modal/Synth filter logic (no CLI needed). Intended to be run after
|
|
5
|
-
collecting trajectories with the Crafter runner.
|
|
6
|
-
|
|
7
|
-
Environment:
|
|
8
|
-
- CRAFTER_DB_URL (default: sqlite:///traces_v3_lm_synth/traces.db)
|
|
9
|
-
- OUTPUT_JSONL (default: ft_data/qwen4b_crafter_sft.jsonl)
|
|
10
|
-
- MIN_TOTAL_REWARD (float, default: 1.0)
|
|
11
|
-
- MIN_ACHIEVEMENTS (int, default: 0)
|
|
12
|
-
- MAX_COST (float, default: 10.0)
|
|
13
|
-
- MAX_TOKENS (int, default: 100000)
|
|
14
|
-
- MODELS (optional, space-separated model names; default empty = all)
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
import asyncio
|
|
18
|
-
import json
|
|
19
|
-
import os
|
|
20
|
-
from typing import Any
|
|
21
|
-
|
|
22
|
-
# Reuse the existing filtering implementation
|
|
23
|
-
from synth_ai.environments.examples.crafter_classic.agent_demos.crafter_modal_ft.filter_traces_sft_turso import (
|
|
24
|
-
filter_traces_from_turso,
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def build_config() -> dict[str, Any]:
|
|
29
|
-
models_env = os.getenv("MODELS", "").strip()
|
|
30
|
-
models: list[str] = models_env.split() if models_env else []
|
|
31
|
-
return {
|
|
32
|
-
"mode": "trajectory",
|
|
33
|
-
"filters": {
|
|
34
|
-
"min_total_reward": float(os.getenv("MIN_TOTAL_REWARD", "1.0")),
|
|
35
|
-
"min_achievements": int(os.getenv("MIN_ACHIEVEMENTS", "0")),
|
|
36
|
-
"max_cost": float(os.getenv("MAX_COST", "10.0")),
|
|
37
|
-
"max_tokens": int(os.getenv("MAX_TOKENS", "100000")),
|
|
38
|
-
"models": models,
|
|
39
|
-
},
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
async def main() -> None:
|
|
44
|
-
db_url = os.getenv("CRAFTER_DB_URL", "sqlite:///traces_v3_lm_synth/traces.db")
|
|
45
|
-
output_path = os.getenv("OUTPUT_JSONL", "ft_data/qwen4b_crafter_sft.jsonl")
|
|
46
|
-
config = build_config()
|
|
47
|
-
|
|
48
|
-
print("š¤ Modal/Synth Fine-Tuning Data Filter (v3)")
|
|
49
|
-
print("Using database:", db_url)
|
|
50
|
-
print("Output file:", output_path)
|
|
51
|
-
print("Config:", json.dumps(config, indent=2))
|
|
52
|
-
|
|
53
|
-
num_examples, stats = await filter_traces_from_turso(db_url, output_path, config)
|
|
54
|
-
|
|
55
|
-
print("\nā
Wrote", num_examples, "training examples to", output_path)
|
|
56
|
-
print("š Stats keys:", list(stats.keys()))
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
if __name__ == "__main__":
|
|
60
|
-
asyncio.run(main())
|