synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +23 -17
- examples/dev/qwen3_32b_qlora_4xh100.toml +40 -0
- examples/multi_step/crafter_rl_lora.md +29 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +53 -52
- examples/rl/run_rl_and_save.py +29 -12
- examples/rl/task_app/math_single_step.py +180 -41
- examples/rl/task_app/math_task_app.py +14 -6
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +12 -10
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +218 -36
- examples/warming_up_to_rl/groq_test.py +15 -8
- examples/warming_up_to_rl/manage_secrets.py +29 -25
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +137 -61
- examples/warming_up_to_rl/run_fft_and_save.py +131 -60
- examples/warming_up_to_rl/run_local_rollout.py +88 -39
- examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
- examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
- examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
- examples/warming_up_to_rl/run_rl_and_save.py +35 -12
- examples/warming_up_to_rl/run_rollout_remote.py +44 -19
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth_ai/__init__.py +1 -0
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +157 -26
- synth_ai/api/train/cli.py +213 -57
- synth_ai/api/train/config_finder.py +65 -5
- synth_ai/api/train/env_resolver.py +33 -15
- synth_ai/api/train/pollers.py +13 -4
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +5 -3
- synth_ai/api/train/utils.py +33 -48
- synth_ai/cli/__init__.py +19 -4
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +2 -3
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +21 -6
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +77 -17
- synth_ai/cli/root.py +116 -39
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +1699 -259
- synth_ai/cli/traces.py +7 -4
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +12 -18
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +68 -31
- synth_ai/demos/core/cli.py +516 -194
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/base.py +0 -2
- synth_ai/handshake.py +11 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +43 -11
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +20 -6
- synth_ai/jobs/client.py +103 -78
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +121 -29
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +13 -7
- synth_ai/learning/jobs.py +43 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +25 -24
- synth_ai/lm/__init__.py +21 -47
- synth_ai/task/__init__.py +26 -27
- synth_ai/task/apps/__init__.py +18 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/contracts.py +37 -35
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +15 -14
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +43 -17
- synth_ai/task/tracing_utils.py +12 -7
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +18 -15
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +63 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -8
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -3
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
- synth_ai/{lm → v0/lm}/core/main.py +19 -7
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
- synth_ai/{lm → v0/lm}/overrides.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
- synth_ai/v0/tracing/upload.py +32 -135
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/METADATA +10 -7
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/RECORD +294 -258
- examples/common_old/backend.py +0 -21
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1037
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -147
- examples/rl_old/task_app.py +0 -962
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/zyk/__init__.py +0 -30
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/top_level.txt +0 -0
examples/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Top-level package for Synth AI example environments and utilities."""
|
|
2
|
+
|
|
3
|
+
from importlib import resources as _resources
|
|
4
|
+
|
|
5
|
+
__all__ = ["path_for"]
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def path_for(package: str, resource: str) -> str:
|
|
9
|
+
"""Return absolute path for a packaged resource inside ``examples``.
|
|
10
|
+
|
|
11
|
+
This helper mirrors the one under ``synth_ai`` so hosted apps can access
|
|
12
|
+
bundled assets without needing to install the repo in editable mode.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
with _resources.as_file(_resources.files(f"examples.{package}") / resource) as path:
|
|
16
|
+
return str(path)
|
examples/crafter_debug_render.py
CHANGED
|
@@ -10,10 +10,12 @@ Quick local Crafter observation inspector.
|
|
|
10
10
|
Run:
|
|
11
11
|
uv run python examples/crafter_debug_render.py --base-url http://localhost:8901 --seed 1
|
|
12
12
|
"""
|
|
13
|
+
|
|
13
14
|
import argparse
|
|
15
|
+
import contextlib
|
|
14
16
|
import math
|
|
15
17
|
import os
|
|
16
|
-
from typing import Any
|
|
18
|
+
from typing import Any
|
|
17
19
|
|
|
18
20
|
import httpx
|
|
19
21
|
|
|
@@ -36,21 +38,19 @@ def try_import_crafter_mapping():
|
|
|
36
38
|
id_to_item[ind] = label.lower()
|
|
37
39
|
return id_to_item
|
|
38
40
|
finally:
|
|
39
|
-
|
|
41
|
+
with contextlib.suppress(Exception):
|
|
40
42
|
env.close()
|
|
41
|
-
except Exception:
|
|
42
|
-
pass
|
|
43
43
|
except Exception:
|
|
44
44
|
return None
|
|
45
45
|
|
|
46
46
|
|
|
47
|
-
def format_semantic_map_view(obs:
|
|
47
|
+
def format_semantic_map_view(obs: dict[str, Any], view_size: int = 7) -> str:
|
|
48
48
|
sem = obs.get("semantic_map") or obs.get("sem_map") or obs.get("map")
|
|
49
49
|
if sem is None:
|
|
50
50
|
return "No semantic map available"
|
|
51
51
|
|
|
52
52
|
# Normalize to 2D grid
|
|
53
|
-
grid:
|
|
53
|
+
grid: list[list[int]]
|
|
54
54
|
if isinstance(sem, list) and sem and isinstance(sem[0], list):
|
|
55
55
|
grid = sem
|
|
56
56
|
elif isinstance(sem, list):
|
|
@@ -81,10 +81,10 @@ def format_semantic_map_view(obs: Dict[str, Any], view_size: int = 7) -> str:
|
|
|
81
81
|
px, py = rows // 2, cols // 2
|
|
82
82
|
|
|
83
83
|
half = max(1, view_size // 2)
|
|
84
|
-
lines:
|
|
84
|
+
lines: list[str] = []
|
|
85
85
|
visible: set[str] = set()
|
|
86
86
|
for dy in range(-half, half + 1):
|
|
87
|
-
row_cells:
|
|
87
|
+
row_cells: list[str] = []
|
|
88
88
|
for dx in range(-half, half + 1):
|
|
89
89
|
x = px + dx
|
|
90
90
|
y = py + dy
|
|
@@ -117,13 +117,17 @@ def format_semantic_map_view(obs: Dict[str, Any], view_size: int = 7) -> str:
|
|
|
117
117
|
row_cells.append("void")
|
|
118
118
|
lines.append(" ".join(row_cells))
|
|
119
119
|
|
|
120
|
-
legend =
|
|
120
|
+
legend = (
|
|
121
|
+
f"Visible items: {', '.join(sorted(visible))}" if visible else "No notable items visible"
|
|
122
|
+
)
|
|
121
123
|
return "\n".join(lines) + "\n" + legend
|
|
122
124
|
|
|
123
125
|
|
|
124
126
|
async def main():
|
|
125
127
|
parser = argparse.ArgumentParser()
|
|
126
|
-
parser.add_argument(
|
|
128
|
+
parser.add_argument(
|
|
129
|
+
"--base-url", default=os.getenv("CRAFTER_BASE_URL", "http://localhost:8901")
|
|
130
|
+
)
|
|
127
131
|
parser.add_argument("--seed", type=int, default=1)
|
|
128
132
|
args = parser.parse_args()
|
|
129
133
|
|
|
@@ -145,7 +149,11 @@ async def main():
|
|
|
145
149
|
print(f"Health: {obs.get('health', 10)}/10")
|
|
146
150
|
print(f"Hunger: {obs.get('food', 10)}/10")
|
|
147
151
|
print(f"Energy: {obs.get('energy', 10)}/10")
|
|
148
|
-
inv_items =
|
|
152
|
+
inv_items = (
|
|
153
|
+
", ".join([f"{k}: {v}" for k, v in inv.items() if v])
|
|
154
|
+
if isinstance(inv, dict)
|
|
155
|
+
else str(inv)
|
|
156
|
+
)
|
|
149
157
|
print(f"Inventory: {inv_items if inv_items else 'empty'}")
|
|
150
158
|
if isinstance(ach, dict):
|
|
151
159
|
unlocked = sum(1 for v in ach.values() if v)
|
|
@@ -166,15 +174,13 @@ async def main():
|
|
|
166
174
|
print(format_semantic_map_view(sobs, view_size=7))
|
|
167
175
|
|
|
168
176
|
# Cleanup
|
|
169
|
-
|
|
170
|
-
await client.post(
|
|
171
|
-
|
|
172
|
-
|
|
177
|
+
with contextlib.suppress(Exception):
|
|
178
|
+
await client.post(
|
|
179
|
+
f"{args.base_url}/env/CrafterClassic/terminate", json={"env_id": env_id}
|
|
180
|
+
)
|
|
173
181
|
|
|
174
182
|
|
|
175
183
|
if __name__ == "__main__":
|
|
176
184
|
import asyncio
|
|
177
185
|
|
|
178
186
|
asyncio.run(main())
|
|
179
|
-
|
|
180
|
-
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[job]
|
|
2
|
+
model = "Qwen/Qwen3-32B"
|
|
3
|
+
# Optionally set here; you can also pass --dataset
|
|
4
|
+
# data = "/abs/path/to/train.jsonl"
|
|
5
|
+
|
|
6
|
+
[compute]
|
|
7
|
+
gpu_type = "H100"
|
|
8
|
+
gpu_count = 4
|
|
9
|
+
nodes = 1
|
|
10
|
+
|
|
11
|
+
[data]
|
|
12
|
+
# Optional; forwarded into metadata.effective_config.data.topology
|
|
13
|
+
topology = { container_count = 4 }
|
|
14
|
+
|
|
15
|
+
[training]
|
|
16
|
+
mode = "sft_offline"
|
|
17
|
+
use_qlora = true
|
|
18
|
+
|
|
19
|
+
[training.validation]
|
|
20
|
+
enabled = true
|
|
21
|
+
evaluation_strategy = "steps"
|
|
22
|
+
eval_steps = 20
|
|
23
|
+
save_best_model_at_end = true
|
|
24
|
+
metric_for_best_model = "val.loss"
|
|
25
|
+
greater_is_better = false
|
|
26
|
+
|
|
27
|
+
[hyperparameters]
|
|
28
|
+
n_epochs = 1
|
|
29
|
+
per_device_batch = 1
|
|
30
|
+
gradient_accumulation_steps = 64
|
|
31
|
+
sequence_length = 4096
|
|
32
|
+
learning_rate = 5e-6
|
|
33
|
+
warmup_ratio = 0.03
|
|
34
|
+
|
|
35
|
+
[hyperparameters.parallelism]
|
|
36
|
+
use_deepspeed = true
|
|
37
|
+
deepspeed_stage = 2
|
|
38
|
+
bf16 = true
|
|
39
|
+
fp16 = false
|
|
40
|
+
fsdp = false
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Crafter RL LoRA (10-step runs)
|
|
2
|
+
|
|
3
|
+
This walkthrough shows how to fine-tune the Crafter task app with our 10-step RL LoRA config.
|
|
4
|
+
|
|
5
|
+
1. **Start the Crafter task app on Modal (with tracing + text-only prompts)**
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
BACKEND_BASE_URL=https://agent-learning.onrender.com/api \
|
|
9
|
+
uvx synth-ai modal-serve grpo-crafter \
|
|
10
|
+
--env-file examples/warming_up_to_rl/.env \
|
|
11
|
+
--name grpo-crafter-task-app
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
* Deploys the Modal task app with the tracing/text-only fixes baked in.*
|
|
15
|
+
|
|
16
|
+
2. **Launch the RL job using the updated LoRA config**
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
uvx synth-ai train --type rl \
|
|
20
|
+
--config tests/artifacts/configs/rl.lora.small.toml \
|
|
21
|
+
--backend https://agent-learning.onrender.com/api \
|
|
22
|
+
--env-file .env \
|
|
23
|
+
--no-poll
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
* This config forces 10 agent turns per rollout, reduces batch size to avoid OOMs, and enforces Crafter-specific defaults.*
|
|
27
|
+
|
|
28
|
+
INFO - 🎉 Training completed successfully!
|
|
29
|
+
INFO - All batch rewards: [0.0625, 0.0625, 0.125, 0.0625, 0.0625, 0.3125, 0.375, 0.4375, 0.5, 0.9375]
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
Qwen3 Coder – SFT with LoRA (all linear)
|
|
2
|
+
|
|
3
|
+
This example mirrors the SFT LoRA flow under `examples/sft/` but targets the smallest Qwen3 Coder family model supported downstream. It configures LoRA on all linear projections ("all-linear") to match our RL LoRA recipes.
|
|
4
|
+
|
|
5
|
+
Quick start
|
|
6
|
+
|
|
7
|
+
1) Generate a tiny synthetic dataset (or export your own)
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
uv run python examples/qwen_coder/generate_dataset.py \
|
|
11
|
+
--output examples/qwen_coder/ft_data/coder_sft.small.jsonl \
|
|
12
|
+
--n 50 --seed 42 --lang python
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
2) Run training via the CLI:
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
uvx synth-ai train \
|
|
19
|
+
--type sft \
|
|
20
|
+
--config examples/qwen_coder/configs/coder_lora_small.toml \
|
|
21
|
+
--dataset examples/qwen_coder/ft_data/coder_sft.small.jsonl \
|
|
22
|
+
--env-file /path/to/.env
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
3) Inference via Synth API (pre/post SFT)
|
|
26
|
+
|
|
27
|
+
Use the SDK’s OpenAI-compatible chat client routed through Synth. Export your env with SYNTH_API_KEY (and optional BACKEND_BASE_URL) or pass an env file to CLI helpers.
|
|
28
|
+
|
|
29
|
+
Minimal one-shot inference:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
python - <<'PY'
|
|
33
|
+
import os, asyncio
|
|
34
|
+
from synth_ai.v0.lm.core import main_v3 as lm
|
|
35
|
+
|
|
36
|
+
async def run():
|
|
37
|
+
model = os.getenv("MODEL", "Qwen/Qwen3-Coder-30B-A3B-Instruct")
|
|
38
|
+
resp = await lm.chat_async(
|
|
39
|
+
model,
|
|
40
|
+
messages=[{"role":"user","content":"Write a Python function to reverse a string."}],
|
|
41
|
+
max_tokens=128,
|
|
42
|
+
temperature=0.2,
|
|
43
|
+
)
|
|
44
|
+
print(resp["choices"][0]["message"]["content"])
|
|
45
|
+
asyncio.run(run())
|
|
46
|
+
PY
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
After training, set `MODEL=ft:...` to query the finetuned adapter.
|
|
50
|
+
|
|
51
|
+
4) 30B LoRA variant
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
uvx synth-ai train \
|
|
55
|
+
--type sft \
|
|
56
|
+
--config examples/qwen_coder/configs/coder_lora_30b.toml \
|
|
57
|
+
--dataset examples/qwen_coder/ft_data/coder_sft.small.jsonl \
|
|
58
|
+
--env-file /path/to/.env
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
5) Faster iteration: 4B LoRA config
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
uvx synth-ai train \
|
|
65
|
+
--type sft \
|
|
66
|
+
--config examples/qwen_coder/configs/coder_lora_4b.toml \
|
|
67
|
+
--dataset examples/qwen_coder/ft_data/coder_sft.small.jsonl \
|
|
68
|
+
--env-file /path/to/.env
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Environment variables
|
|
72
|
+
|
|
73
|
+
- `SYNTH_API_KEY`: required for training/inference through the hosted backend
|
|
74
|
+
- `BACKEND_BASE_URL`: defaults to `https://agent-learning.onrender.com/api`
|
|
75
|
+
|
|
76
|
+
Post‑SFT smoke
|
|
77
|
+
|
|
78
|
+
- The training helper `sft_lora_30b.py` writes the resulting `ft:<id>` to `examples/qwen_coder/ft_data/ft_model_id.txt`.
|
|
79
|
+
- Validate inference with your finetuned adapter:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
uv run python examples/qwen_coder/infer_ft_smoke.py
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Dataset utilities
|
|
86
|
+
|
|
87
|
+
- `examples/qwen_coder/validate_jsonl.py`: sanity‑check first N lines for chat structure
|
|
88
|
+
- `examples/qwen_coder/subset_jsonl.py`: create a capped subset for quick tests
|
|
89
|
+
|
|
90
|
+
Optional CLI wrappers
|
|
91
|
+
|
|
92
|
+
- `examples/qwen_coder/scripts/train_coder_30b.sh [/path/to/.env]`
|
|
93
|
+
- `examples/qwen_coder/scripts/infer_coder.sh [/path/to/.env]`
|
|
94
|
+
|
|
95
|
+
Notes
|
|
96
|
+
|
|
97
|
+
- LoRA is enabled with `training.mode = "lora"` and `hyperparameters.train_kind = "peft"`.
|
|
98
|
+
- The config sets an `all-linear` target to apply adapters broadly across attention and MLP projections.
|
|
99
|
+
- Adjust `gradient_accumulation_steps`, `per_device_batch`, and `sequence_length` based on available GPU memory.
|
|
100
|
+
- Use the Synth API client (above) for inference to ensure requests route via the hosted backend.
|
|
101
|
+
|
|
102
|
+
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Shared helpers for Qwen coder SFT examples."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
TRAIN_DATA_PATH = Path("examples/qwen_coder/ft_data/coder_sft.small.jsonl")
|
|
11
|
+
VAL_DATA_PATH = Path("examples/qwen_coder/ft_data/coder_sft.small.val.jsonl")
|
|
12
|
+
DATA_DIR = TRAIN_DATA_PATH.parent
|
|
13
|
+
|
|
14
|
+
_FALLBACK_RECORDS: list[dict[str, object]] = [
|
|
15
|
+
{
|
|
16
|
+
"messages": [
|
|
17
|
+
{"role": "user", "content": "Write a Python function `add(a, b)` that returns the sum of two numbers."}
|
|
18
|
+
],
|
|
19
|
+
"response": "def add(a, b):\n return a + b\n",
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"messages": [
|
|
23
|
+
{
|
|
24
|
+
"role": "user",
|
|
25
|
+
"content": "Implement a Python function `reverse_string(s)` that returns the reversed string.",
|
|
26
|
+
}
|
|
27
|
+
],
|
|
28
|
+
"response": "def reverse_string(s: str) -> str:\n return s[::-1]\n",
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"messages": [
|
|
32
|
+
{
|
|
33
|
+
"role": "user",
|
|
34
|
+
"content": "Write a Python function `count_words(text)` returning a dict mapping words to counts.",
|
|
35
|
+
}
|
|
36
|
+
],
|
|
37
|
+
"response": "from collections import Counter\n\ndef count_words(text: str) -> dict[str, int]:\n words = [w for w in text.split() if w]\n return dict(Counter(words))\n",
|
|
38
|
+
},
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def ensure_tiny_dataset() -> Path:
|
|
43
|
+
"""Ensure the tiny coder dataset exists, generating or writing a fallback if needed."""
|
|
44
|
+
if TRAIN_DATA_PATH.exists():
|
|
45
|
+
return TRAIN_DATA_PATH
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
from examples.qwen_coder.generate_dataset import main as gen_main # type: ignore
|
|
49
|
+
|
|
50
|
+
gen_main()
|
|
51
|
+
if TRAIN_DATA_PATH.exists():
|
|
52
|
+
return TRAIN_DATA_PATH
|
|
53
|
+
except Exception:
|
|
54
|
+
# Fall back to inline dataset below.
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
with TRAIN_DATA_PATH.open("w", encoding="utf-8") as fh:
|
|
59
|
+
for record in _FALLBACK_RECORDS:
|
|
60
|
+
fh.write(json.dumps(record, separators=(",", ":")))
|
|
61
|
+
fh.write("\n")
|
|
62
|
+
return TRAIN_DATA_PATH
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def optional_validation_dataset() -> Path | None:
|
|
66
|
+
"""Return validation dataset path if present."""
|
|
67
|
+
if VAL_DATA_PATH.exists():
|
|
68
|
+
return VAL_DATA_PATH
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _ensure_parent(path: Path) -> Path:
|
|
73
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
74
|
+
return path
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def resolve_output_path(default_filename: str) -> Path:
|
|
78
|
+
"""Resolve output path for storing fine-tuned model ids."""
|
|
79
|
+
override = os.getenv("QWEN_CODER_FT_OUTPUT")
|
|
80
|
+
if override:
|
|
81
|
+
return _ensure_parent(Path(override).expanduser())
|
|
82
|
+
return _ensure_parent(DATA_DIR / default_filename)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def resolve_model_id_path(default_filename: str) -> Path:
|
|
86
|
+
"""Resolve path to read a stored fine-tuned model id."""
|
|
87
|
+
override = os.getenv("QWEN_CODER_FT_MODEL_PATH")
|
|
88
|
+
if override:
|
|
89
|
+
candidate = Path(override).expanduser()
|
|
90
|
+
if candidate.is_dir():
|
|
91
|
+
return candidate / default_filename
|
|
92
|
+
return candidate
|
|
93
|
+
return DATA_DIR / default_filename
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def resolve_infer_output_path(default_filename: str) -> Path:
|
|
97
|
+
"""Resolve path for writing inference outputs."""
|
|
98
|
+
override = os.getenv("QWEN_CODER_FT_INFER_OUTPUT")
|
|
99
|
+
if override:
|
|
100
|
+
return _ensure_parent(Path(override).expanduser())
|
|
101
|
+
return _ensure_parent(DATA_DIR / default_filename)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
__all__ = [
|
|
105
|
+
"DATA_DIR",
|
|
106
|
+
"TRAIN_DATA_PATH",
|
|
107
|
+
"VAL_DATA_PATH",
|
|
108
|
+
"ensure_tiny_dataset",
|
|
109
|
+
"optional_validation_dataset",
|
|
110
|
+
"resolve_output_path",
|
|
111
|
+
"resolve_model_id_path",
|
|
112
|
+
"resolve_infer_output_path",
|
|
113
|
+
]
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Qwen3 Coder 30B LoRA SFT – all-linear adapters
|
|
2
|
+
|
|
3
|
+
[algorithm]
|
|
4
|
+
type = "offline"
|
|
5
|
+
method = "sft"
|
|
6
|
+
variety = "fft"
|
|
7
|
+
|
|
8
|
+
[job]
|
|
9
|
+
model = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
|
|
10
|
+
|
|
11
|
+
[compute]
|
|
12
|
+
gpu_type = "H200"
|
|
13
|
+
gpu_count = 1
|
|
14
|
+
nodes = 1
|
|
15
|
+
|
|
16
|
+
[data]
|
|
17
|
+
topology = {}
|
|
18
|
+
# Optional validation set
|
|
19
|
+
# validation_path = "examples/qwen_coder/ft_data/coder_sft.small.val.jsonl"
|
|
20
|
+
|
|
21
|
+
[metadata]
|
|
22
|
+
# Effective config hints consumed by the backend
|
|
23
|
+
effective_config = { compute = { gpu_type = "H200", gpu_count = 1, nodes = 1 } }
|
|
24
|
+
|
|
25
|
+
[training]
|
|
26
|
+
mode = "lora"
|
|
27
|
+
use_qlora = true
|
|
28
|
+
|
|
29
|
+
[training.validation]
|
|
30
|
+
enabled = true
|
|
31
|
+
evaluation_strategy = "steps"
|
|
32
|
+
eval_steps = 100
|
|
33
|
+
save_best_model_at_end = true
|
|
34
|
+
metric_for_best_model = "val.loss"
|
|
35
|
+
greater_is_better = false
|
|
36
|
+
|
|
37
|
+
[hyperparameters]
|
|
38
|
+
n_epochs = 1
|
|
39
|
+
train_kind = "peft"
|
|
40
|
+
per_device_batch = 1
|
|
41
|
+
gradient_accumulation_steps = 64
|
|
42
|
+
sequence_length = 4096
|
|
43
|
+
learning_rate = 5e-6
|
|
44
|
+
warmup_ratio = 0.03
|
|
45
|
+
|
|
46
|
+
[hyperparameters.parallelism]
|
|
47
|
+
use_deepspeed = true
|
|
48
|
+
deepspeed_stage = 2
|
|
49
|
+
fsdp = false
|
|
50
|
+
bf16 = true
|
|
51
|
+
fp16 = false
|
|
52
|
+
activation_checkpointing = true
|
|
53
|
+
|
|
54
|
+
# LoRA target selection for coder models: apply to all linear projections
|
|
55
|
+
[lora]
|
|
56
|
+
r = 16
|
|
57
|
+
alpha = 32
|
|
58
|
+
dropout = 0.05
|
|
59
|
+
target_modules = ["all-linear"]
|
|
60
|
+
|
|
61
|
+
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Qwen3 Coder 4B LoRA SFT – all-linear adapters
|
|
2
|
+
|
|
3
|
+
[job]
|
|
4
|
+
model = "Qwen/Qwen3-4B"
|
|
5
|
+
|
|
6
|
+
[compute]
|
|
7
|
+
gpu_type = "H100"
|
|
8
|
+
gpu_count = 1
|
|
9
|
+
nodes = 1
|
|
10
|
+
|
|
11
|
+
[data]
|
|
12
|
+
topology = {}
|
|
13
|
+
# Optional validation set
|
|
14
|
+
# validation_path = "examples/qwen_coder/ft_data/coder_sft.small.val.jsonl"
|
|
15
|
+
|
|
16
|
+
[metadata]
|
|
17
|
+
# Effective config hints consumed by the backend
|
|
18
|
+
effective_config = { compute = { gpu_type = "H100", gpu_count = 1, nodes = 1 } }
|
|
19
|
+
|
|
20
|
+
[training]
|
|
21
|
+
mode = "lora"
|
|
22
|
+
use_qlora = true
|
|
23
|
+
|
|
24
|
+
[training.validation]
|
|
25
|
+
enabled = true
|
|
26
|
+
evaluation_strategy = "steps"
|
|
27
|
+
eval_steps = 100
|
|
28
|
+
save_best_model_at_end = true
|
|
29
|
+
metric_for_best_model = "val.loss"
|
|
30
|
+
greater_is_better = false
|
|
31
|
+
|
|
32
|
+
[hyperparameters]
|
|
33
|
+
n_epochs = 1
|
|
34
|
+
train_kind = "peft"
|
|
35
|
+
per_device_batch = 2
|
|
36
|
+
gradient_accumulation_steps = 32
|
|
37
|
+
sequence_length = 4096
|
|
38
|
+
learning_rate = 5e-6
|
|
39
|
+
warmup_ratio = 0.03
|
|
40
|
+
|
|
41
|
+
[hyperparameters.parallelism]
|
|
42
|
+
use_deepspeed = true
|
|
43
|
+
deepspeed_stage = 2
|
|
44
|
+
fsdp = false
|
|
45
|
+
bf16 = true
|
|
46
|
+
fp16 = false
|
|
47
|
+
activation_checkpointing = true
|
|
48
|
+
|
|
49
|
+
# LoRA target selection for coder models: apply to all linear projections
|
|
50
|
+
[lora]
|
|
51
|
+
r = 16
|
|
52
|
+
alpha = 32
|
|
53
|
+
dropout = 0.05
|
|
54
|
+
target_modules = ["all-linear"]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Qwen3 Coder LoRA SFT – all-linear adapters
|
|
2
|
+
|
|
3
|
+
[algorithm]
|
|
4
|
+
type = "offline"
|
|
5
|
+
method = "sft"
|
|
6
|
+
variety = "fft"
|
|
7
|
+
|
|
8
|
+
[job]
|
|
9
|
+
# Smallest supported Qwen3 base; replace with the smallest Coder variant when available
|
|
10
|
+
model = "Qwen/Qwen3-1.7B"
|
|
11
|
+
|
|
12
|
+
[compute]
|
|
13
|
+
gpu_type = "H100"
|
|
14
|
+
gpu_count = 1
|
|
15
|
+
nodes = 1
|
|
16
|
+
|
|
17
|
+
[data]
|
|
18
|
+
topology = {}
|
|
19
|
+
# Optional validation set
|
|
20
|
+
# validation_path = "examples/sft/ft_data/coder_traces.val.jsonl"
|
|
21
|
+
|
|
22
|
+
[training]
|
|
23
|
+
mode = "lora"
|
|
24
|
+
use_qlora = true
|
|
25
|
+
|
|
26
|
+
[training.validation]
|
|
27
|
+
enabled = true
|
|
28
|
+
evaluation_strategy = "steps"
|
|
29
|
+
eval_steps = 100
|
|
30
|
+
save_best_model_at_end = true
|
|
31
|
+
metric_for_best_model = "val.loss"
|
|
32
|
+
greater_is_better = false
|
|
33
|
+
|
|
34
|
+
[hyperparameters]
|
|
35
|
+
n_epochs = 1
|
|
36
|
+
train_kind = "peft"
|
|
37
|
+
per_device_batch = 2
|
|
38
|
+
gradient_accumulation_steps = 32
|
|
39
|
+
sequence_length = 4096
|
|
40
|
+
learning_rate = 5e-6
|
|
41
|
+
warmup_ratio = 0.03
|
|
42
|
+
|
|
43
|
+
[hyperparameters.parallelism]
|
|
44
|
+
use_deepspeed = true
|
|
45
|
+
deepspeed_stage = 2
|
|
46
|
+
fsdp = false
|
|
47
|
+
bf16 = true
|
|
48
|
+
fp16 = false
|
|
49
|
+
activation_checkpointing = true
|
|
50
|
+
|
|
51
|
+
# LoRA target selection for coder models: apply to all linear projections
|
|
52
|
+
[lora]
|
|
53
|
+
r = 16
|
|
54
|
+
alpha = 32
|
|
55
|
+
dropout = 0.05
|
|
56
|
+
target_modules = ["all-linear"]
|
|
57
|
+
|
|
58
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Generate a small, synthetic SFT JSONL dataset for coder-style prompts.
|
|
4
|
+
|
|
5
|
+
Each line uses the minimal schema consumed by the SFT CLI:
|
|
6
|
+
{"messages": [{"role": "user", "content": "..."}], "response": "..."}
|
|
7
|
+
|
|
8
|
+
Example:
|
|
9
|
+
uv run python examples/qwen_coder/generate_dataset.py \
|
|
10
|
+
--output examples/qwen_coder/ft_data/coder_sft.small.jsonl \
|
|
11
|
+
--n 50 --seed 42 --lang python
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import json
|
|
17
|
+
import random
|
|
18
|
+
from collections.abc import Iterable
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
PROMPT_TEMPLATES: dict[str, list[str]] = {
|
|
22
|
+
"python": [
|
|
23
|
+
"Write a Python function `add(a, b)` that returns the sum of two numbers.",
|
|
24
|
+
"Write a Python function `reverse_string(s)` that returns the reversed string.",
|
|
25
|
+
"Implement a Python function `is_palindrome(s)` that returns True if s is a palindrome.",
|
|
26
|
+
"Write a Python function `fibonacci(n)` that returns a list of the first n Fibonacci numbers.",
|
|
27
|
+
"Write a Python function `count_words(text)` that returns a dict of word -> count.",
|
|
28
|
+
],
|
|
29
|
+
"javascript": [
|
|
30
|
+
"Write a JavaScript function `add(a, b)` that returns the sum of two numbers.",
|
|
31
|
+
"Write a JavaScript function `reverseString(s)` that returns the reversed string.",
|
|
32
|
+
"Implement a JavaScript function `isPalindrome(s)` that returns true if s is a palindrome.",
|
|
33
|
+
"Write a JavaScript function `fibonacci(n)` that returns an array of the first n Fibonacci numbers.",
|
|
34
|
+
"Write a JavaScript function `countWords(text)` that returns an object mapping word -> count.",
|
|
35
|
+
],
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
SOLUTIONS: dict[str, list[str]] = {
|
|
40
|
+
"python": [
|
|
41
|
+
"""def add(a, b):\n return a + b\n""",
|
|
42
|
+
"""def reverse_string(s: str) -> str:\n return s[::-1]\n""",
|
|
43
|
+
"""def is_palindrome(s: str) -> bool:\n t = ''.join(ch.lower() for ch in s if ch.isalnum())\n return t == t[::-1]\n""",
|
|
44
|
+
"""def fibonacci(n: int) -> list[int]:\n a, b = 0, 1\n out: list[int] = []\n for _ in range(max(0, n)):\n out.append(a)\n a, b = b, a + b\n return out\n""",
|
|
45
|
+
"""from collections import Counter\n\n"""
|
|
46
|
+
"""def count_words(text: str) -> dict[str, int]:\n words = [w for w in text.split() if w]\n return dict(Counter(words))\n""",
|
|
47
|
+
],
|
|
48
|
+
"javascript": [
|
|
49
|
+
"""function add(a, b) {\n return a + b;\n}\n""",
|
|
50
|
+
"""function reverseString(s) {\n return s.split('').reverse().join('');\n}\n""",
|
|
51
|
+
"""function isPalindrome(s) {\n const t = (s.match(/[a-z0-9]/gi) || []).join('').toLowerCase();\n return t === t.split('').reverse().join('');\n}\n""",
|
|
52
|
+
"""function fibonacci(n) {\n const out = [];\n let a = 0, b = 1;\n for (let i = 0; i < Math.max(0, n); i++) {\n out.push(a);\n [a, b] = [b, a + b];\n }\n return out;\n}\n""",
|
|
53
|
+
"""function countWords(text) {\n const words = text.split(/\s+/).filter(Boolean);\n return words.reduce((acc, w) => { acc[w] = (acc[w] || 0) + 1; return acc; }, {});\n}\n""",
|
|
54
|
+
],
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _iter_examples(n: int, lang: str) -> Iterable[dict]:
|
|
59
|
+
prompts = PROMPT_TEMPLATES.get(lang, PROMPT_TEMPLATES["python"]).copy()
|
|
60
|
+
answers = SOLUTIONS.get(lang, SOLUTIONS["python"]).copy()
|
|
61
|
+
for _ in range(n):
|
|
62
|
+
i = random.randrange(0, len(prompts))
|
|
63
|
+
j = random.randrange(0, len(answers))
|
|
64
|
+
user = prompts[i]
|
|
65
|
+
assistant = answers[j]
|
|
66
|
+
yield {
|
|
67
|
+
"messages": [
|
|
68
|
+
{"role": "user", "content": user},
|
|
69
|
+
],
|
|
70
|
+
"response": assistant,
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def main() -> None:
|
|
75
|
+
ap = argparse.ArgumentParser(description="Generate synthetic coder SFT JSONL dataset")
|
|
76
|
+
ap.add_argument("--output", required=True, help="Path to write JSONL (will create parent dir)")
|
|
77
|
+
ap.add_argument("--n", type=int, default=50, help="Number of examples to generate")
|
|
78
|
+
ap.add_argument("--seed", type=int, default=42, help="Random seed")
|
|
79
|
+
ap.add_argument("--lang", choices=["python", "javascript"], default="python")
|
|
80
|
+
args = ap.parse_args()
|
|
81
|
+
|
|
82
|
+
random.seed(args.seed)
|
|
83
|
+
out_path = Path(args.output).expanduser().resolve()
|
|
84
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
|
|
86
|
+
# Write JSONL
|
|
87
|
+
with out_path.open("w", encoding="utf-8") as fh:
|
|
88
|
+
for rec in _iter_examples(max(1, int(args.n)), lang=args.lang):
|
|
89
|
+
fh.write(json.dumps(rec, ensure_ascii=False))
|
|
90
|
+
fh.write("\n")
|
|
91
|
+
|
|
92
|
+
print(f"Wrote {args.n} examples to {out_path}")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
if __name__ == "__main__":
|
|
96
|
+
main()
|
|
97
|
+
|
|
98
|
+
|