synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +23 -17
- examples/dev/qwen3_32b_qlora_4xh100.toml +40 -0
- examples/multi_step/crafter_rl_lora.md +29 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +53 -52
- examples/rl/run_rl_and_save.py +29 -12
- examples/rl/task_app/math_single_step.py +180 -41
- examples/rl/task_app/math_task_app.py +14 -6
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +12 -10
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +218 -36
- examples/warming_up_to_rl/groq_test.py +15 -8
- examples/warming_up_to_rl/manage_secrets.py +29 -25
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +137 -61
- examples/warming_up_to_rl/run_fft_and_save.py +131 -60
- examples/warming_up_to_rl/run_local_rollout.py +88 -39
- examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
- examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
- examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
- examples/warming_up_to_rl/run_rl_and_save.py +35 -12
- examples/warming_up_to_rl/run_rollout_remote.py +44 -19
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth_ai/__init__.py +1 -0
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +157 -26
- synth_ai/api/train/cli.py +213 -57
- synth_ai/api/train/config_finder.py +65 -5
- synth_ai/api/train/env_resolver.py +33 -15
- synth_ai/api/train/pollers.py +13 -4
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +5 -3
- synth_ai/api/train/utils.py +33 -48
- synth_ai/cli/__init__.py +19 -4
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +2 -3
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +21 -6
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +77 -17
- synth_ai/cli/root.py +116 -39
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +1699 -259
- synth_ai/cli/traces.py +7 -4
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +12 -18
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +68 -31
- synth_ai/demos/core/cli.py +516 -194
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/base.py +0 -2
- synth_ai/handshake.py +11 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +43 -11
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +20 -6
- synth_ai/jobs/client.py +103 -78
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +121 -29
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +13 -7
- synth_ai/learning/jobs.py +43 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +25 -24
- synth_ai/lm/__init__.py +21 -47
- synth_ai/task/__init__.py +26 -27
- synth_ai/task/apps/__init__.py +18 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/contracts.py +37 -35
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +15 -14
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +43 -17
- synth_ai/task/tracing_utils.py +12 -7
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +18 -15
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +63 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -8
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -3
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
- synth_ai/{lm → v0/lm}/core/main.py +19 -7
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
- synth_ai/{lm → v0/lm}/overrides.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
- synth_ai/v0/tracing/upload.py +32 -135
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/METADATA +10 -7
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/RECORD +294 -258
- examples/common_old/backend.py +0 -21
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1037
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -147
- examples/rl_old/task_app.py +0 -962
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/zyk/__init__.py +0 -30
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/top_level.txt +0 -0
|
@@ -1,246 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Random-search prompt optimizer (BootstrapFewShotWithRandomSearch), DSPy-inspired.
|
|
3
|
-
|
|
4
|
-
Implements the high-level pseudocode of DSPy's Random Search optimizer in a
|
|
5
|
-
provider-agnostic, modular style. You can plug in your own student/program and
|
|
6
|
-
metric, and this module will explore baselines and bootstrapped few-shot variants.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from __future__ import annotations
|
|
10
|
-
|
|
11
|
-
import contextlib
|
|
12
|
-
import random
|
|
13
|
-
from collections.abc import Callable, Sequence
|
|
14
|
-
from dataclasses import dataclass
|
|
15
|
-
from typing import Any
|
|
16
|
-
|
|
17
|
-
# ---------------------------
|
|
18
|
-
# Protocol-like expectations (duck-typed)
|
|
19
|
-
# ---------------------------
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class _ProgramLike:
|
|
23
|
-
def reset_copy(self): # zero-shot copy
|
|
24
|
-
return self
|
|
25
|
-
|
|
26
|
-
def deepcopy(self): # deep copy
|
|
27
|
-
return self
|
|
28
|
-
|
|
29
|
-
def with_demos(self, demos: list[tuple[Any, Any]]):
|
|
30
|
-
return self
|
|
31
|
-
|
|
32
|
-
def run(self, x: Any) -> Any:
|
|
33
|
-
raise NotImplementedError
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
# ---------------------------
|
|
37
|
-
# Helpers and lightweight components
|
|
38
|
-
# ---------------------------
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
@dataclass
|
|
42
|
-
class EvalResult:
|
|
43
|
-
score: float
|
|
44
|
-
subscores: list[float]
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def evaluate(
|
|
48
|
-
program: _ProgramLike, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
|
|
49
|
-
) -> EvalResult:
|
|
50
|
-
subs = []
|
|
51
|
-
for x, y in dataset:
|
|
52
|
-
subs.append(metric(program.run(x), y))
|
|
53
|
-
return EvalResult(sum(subs) / max(1, len(subs)), subs)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
class LabeledFewShot:
|
|
57
|
-
def __init__(self, k: int):
|
|
58
|
-
self.k = k
|
|
59
|
-
|
|
60
|
-
def compile(
|
|
61
|
-
self, student: _ProgramLike, trainset: Sequence[tuple[Any, Any]], sample: bool = True
|
|
62
|
-
) -> _ProgramLike:
|
|
63
|
-
p = getattr(student, "deepcopy", student.reset_copy)()
|
|
64
|
-
demos = list(trainset)
|
|
65
|
-
if sample:
|
|
66
|
-
random.shuffle(demos)
|
|
67
|
-
p = p.with_demos(demos[: min(self.k, len(demos))])
|
|
68
|
-
return p
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
class BootstrapFewShot:
|
|
72
|
-
def __init__(
|
|
73
|
-
self,
|
|
74
|
-
*,
|
|
75
|
-
metric: Callable[[Any, Any], float],
|
|
76
|
-
metric_threshold: float | None = None,
|
|
77
|
-
max_bootstrapped_demos: int = 8,
|
|
78
|
-
max_labeled_demos: int = 0,
|
|
79
|
-
teacher_settings: dict[str, Any] | None = None,
|
|
80
|
-
max_rounds: int = 1,
|
|
81
|
-
):
|
|
82
|
-
self.metric = metric
|
|
83
|
-
self.metric_threshold = metric_threshold
|
|
84
|
-
self.max_bootstrapped_demos = max_bootstrapped_demos
|
|
85
|
-
self.max_labeled_demos = max_labeled_demos
|
|
86
|
-
self.teacher_settings = teacher_settings or {}
|
|
87
|
-
self.max_rounds = max_rounds
|
|
88
|
-
|
|
89
|
-
def compile(
|
|
90
|
-
self,
|
|
91
|
-
student: _ProgramLike,
|
|
92
|
-
teacher: _ProgramLike | None,
|
|
93
|
-
trainset: Sequence[tuple[Any, Any]],
|
|
94
|
-
) -> _ProgramLike:
|
|
95
|
-
p = getattr(student, "deepcopy", student.reset_copy)()
|
|
96
|
-
rng = random.Random()
|
|
97
|
-
# If bootstrapped demos disabled, return labeled-only few-shot quickly
|
|
98
|
-
if self.max_bootstrapped_demos <= 0:
|
|
99
|
-
demos: list[tuple[Any, Any]] = []
|
|
100
|
-
if self.max_labeled_demos > 0:
|
|
101
|
-
demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
|
|
102
|
-
return p.with_demos(demos)
|
|
103
|
-
boot: list[tuple[Any, Any]] = []
|
|
104
|
-
# Bootstrap demos by self consistency
|
|
105
|
-
for _ in range(self.max_rounds):
|
|
106
|
-
rng.shuffle(trainset := list(trainset))
|
|
107
|
-
for x, y in trainset:
|
|
108
|
-
yhat = p.run(x)
|
|
109
|
-
ok = self.metric(yhat, y)
|
|
110
|
-
if (self.metric_threshold is None and ok == 1) or (
|
|
111
|
-
self.metric_threshold is not None and ok >= self.metric_threshold
|
|
112
|
-
):
|
|
113
|
-
boot.append((x, y))
|
|
114
|
-
if len(boot) >= self.max_bootstrapped_demos:
|
|
115
|
-
break
|
|
116
|
-
if len(boot) >= self.max_bootstrapped_demos:
|
|
117
|
-
break
|
|
118
|
-
|
|
119
|
-
# Optionally add labeled demos
|
|
120
|
-
demos = list(boot)
|
|
121
|
-
if self.max_labeled_demos > 0:
|
|
122
|
-
demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
|
|
123
|
-
|
|
124
|
-
return p.with_demos(demos)
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
# ---------------------------
|
|
128
|
-
# Random-search compile (BootstrapFewShotWithRandomSearch)
|
|
129
|
-
# ---------------------------
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
@dataclass
|
|
133
|
-
class Candidate:
|
|
134
|
-
score: float
|
|
135
|
-
subscores: list[float]
|
|
136
|
-
seed: int
|
|
137
|
-
program: _ProgramLike
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def random_search_compile(
|
|
141
|
-
student: _ProgramLike,
|
|
142
|
-
trainset: Sequence[tuple[Any, Any]],
|
|
143
|
-
valset: Sequence[tuple[Any, Any]],
|
|
144
|
-
metric: Callable[[Any, Any], float],
|
|
145
|
-
*,
|
|
146
|
-
max_bootstrapped_demos: int = 8,
|
|
147
|
-
max_labeled_demos: int = 4,
|
|
148
|
-
max_rounds: int = 2,
|
|
149
|
-
num_candidate_programs: int = 16,
|
|
150
|
-
stop_at_score: float | None = None,
|
|
151
|
-
evaluate_fn: Callable[[_ProgramLike, Sequence[tuple[Any, Any]], Callable[[Any, Any], float]], EvalResult] | None = None,
|
|
152
|
-
on_candidate_evaluated: Callable[[int, float, EvalResult, dict[str, Any]], None] | None = None,
|
|
153
|
-
) -> tuple[_ProgramLike, list[dict[str, Any]]]:
|
|
154
|
-
best_program: _ProgramLike | None = None
|
|
155
|
-
best_score = float("-inf")
|
|
156
|
-
candidates: list[Candidate] = []
|
|
157
|
-
records: list[dict[str, Any]] = []
|
|
158
|
-
|
|
159
|
-
seeds = list(range(num_candidate_programs))
|
|
160
|
-
seeds = [-3, -2, -1] + seeds # zero-shot, labeled few-shot, bootstrapped few-shot
|
|
161
|
-
|
|
162
|
-
rng = random.Random(0)
|
|
163
|
-
for idx, seed in enumerate(seeds):
|
|
164
|
-
train_copy = list(trainset)
|
|
165
|
-
|
|
166
|
-
if seed == -3:
|
|
167
|
-
program = getattr(student, "reset_copy", student.deepcopy)()
|
|
168
|
-
|
|
169
|
-
elif seed == -2:
|
|
170
|
-
program = LabeledFewShot(k=max_labeled_demos).compile(student, train_copy, sample=True)
|
|
171
|
-
|
|
172
|
-
else:
|
|
173
|
-
if seed >= 0:
|
|
174
|
-
rng.shuffle(train_copy)
|
|
175
|
-
if max_bootstrapped_demos <= 0:
|
|
176
|
-
size = 0
|
|
177
|
-
else:
|
|
178
|
-
size = (
|
|
179
|
-
max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
|
|
180
|
-
)
|
|
181
|
-
program = BootstrapFewShot(
|
|
182
|
-
metric=metric,
|
|
183
|
-
metric_threshold=None,
|
|
184
|
-
max_bootstrapped_demos=size,
|
|
185
|
-
max_labeled_demos=max_labeled_demos,
|
|
186
|
-
teacher_settings={},
|
|
187
|
-
max_rounds=max_rounds,
|
|
188
|
-
).compile(student, teacher=None, trainset=train_copy)
|
|
189
|
-
|
|
190
|
-
res = (
|
|
191
|
-
evaluate_fn(program, valset, metric)
|
|
192
|
-
if evaluate_fn
|
|
193
|
-
else evaluate(program, valset, metric)
|
|
194
|
-
)
|
|
195
|
-
cand = Candidate(score=res.score, subscores=res.subscores, seed=seed, program=program)
|
|
196
|
-
candidates.append(cand)
|
|
197
|
-
# Record an intervention summary for reproducibility
|
|
198
|
-
intervention: dict[str, Any] = {"seed": seed}
|
|
199
|
-
if hasattr(program, "demos"):
|
|
200
|
-
try:
|
|
201
|
-
intervention["demos"] = program.demos # type: ignore
|
|
202
|
-
except Exception:
|
|
203
|
-
intervention["demos"] = None
|
|
204
|
-
# Type of candidate
|
|
205
|
-
if seed == -3:
|
|
206
|
-
intervention["kind"] = "zero_shot"
|
|
207
|
-
intervention["label"] = "zero-shot"
|
|
208
|
-
elif seed == -2:
|
|
209
|
-
intervention["kind"] = "labeled_few_shot"
|
|
210
|
-
intervention["label"] = f"labeled-{max_labeled_demos}"
|
|
211
|
-
else:
|
|
212
|
-
intervention["kind"] = "bootstrapped_few_shot"
|
|
213
|
-
intervention["label"] = f"boot-b{max_bootstrapped_demos}-l{max_labeled_demos}"
|
|
214
|
-
record_obj = {
|
|
215
|
-
"score": cand.score,
|
|
216
|
-
"subscores": cand.subscores,
|
|
217
|
-
"intervention": intervention,
|
|
218
|
-
}
|
|
219
|
-
records.append(record_obj)
|
|
220
|
-
|
|
221
|
-
if res.score > best_score:
|
|
222
|
-
best_score, best_program = res.score, program
|
|
223
|
-
|
|
224
|
-
if stop_at_score is not None and best_score >= stop_at_score:
|
|
225
|
-
break
|
|
226
|
-
|
|
227
|
-
if on_candidate_evaluated is not None:
|
|
228
|
-
with contextlib.suppress(Exception):
|
|
229
|
-
on_candidate_evaluated(idx + 1, res.score, res, intervention)
|
|
230
|
-
|
|
231
|
-
# Attach candidates for inspection
|
|
232
|
-
if hasattr(best_program, "candidate_programs"):
|
|
233
|
-
# If user object supports attribute assignment
|
|
234
|
-
with contextlib.suppress(Exception):
|
|
235
|
-
best_program.candidate_programs = sorted(
|
|
236
|
-
candidates, key=lambda c: c.score, reverse=True
|
|
237
|
-
) # type: ignore[attr-defined]
|
|
238
|
-
|
|
239
|
-
return (best_program or getattr(student, "deepcopy", student)(), records)
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
__all__ = [
|
|
243
|
-
"random_search_compile",
|
|
244
|
-
"LabeledFewShot",
|
|
245
|
-
"BootstrapFewShot",
|
|
246
|
-
]
|
|
@@ -1,172 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Example: MIPROv2-style optimizer on Banking77 using Groq gpt-oss-20b.
|
|
3
|
-
|
|
4
|
-
Requires:
|
|
5
|
-
- .env with GROQ_API_KEY
|
|
6
|
-
- datasets
|
|
7
|
-
|
|
8
|
-
Run:
|
|
9
|
-
- uv run -q python -m synth_ai.learning.prompts.run_mipro_banking77
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
from __future__ import annotations
|
|
13
|
-
|
|
14
|
-
import asyncio
|
|
15
|
-
import json
|
|
16
|
-
import os
|
|
17
|
-
import random
|
|
18
|
-
import time
|
|
19
|
-
from collections.abc import Sequence
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
from typing import Any
|
|
22
|
-
|
|
23
|
-
from datasets import load_dataset
|
|
24
|
-
from dotenv import load_dotenv
|
|
25
|
-
from synth_ai.learning.prompts.mipro import ProgramAdapter, evaluate_program, mipro_v2_compile
|
|
26
|
-
from synth_ai.lm.core.main_v3 import LM, build_messages
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def choose_label(pred: str, label_names: list[str]) -> str:
|
|
30
|
-
norm = (pred or "").strip().lower()
|
|
31
|
-
d = {ln.lower(): ln for ln in label_names}
|
|
32
|
-
if norm in d:
|
|
33
|
-
return d[norm]
|
|
34
|
-
|
|
35
|
-
def score(cand: str) -> int:
|
|
36
|
-
c = cand.lower()
|
|
37
|
-
return sum(1 for w in c.split() if w in norm)
|
|
38
|
-
|
|
39
|
-
return max(label_names, key=score)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def accuracy(pred: str, gold: str, labels: list[str]) -> float:
|
|
43
|
-
return 1.0 if choose_label(pred, labels) == gold else 0.0
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class NaivePromptModel:
|
|
47
|
-
"""Toy prompt model that returns simple instruction variants."""
|
|
48
|
-
|
|
49
|
-
def generate_instructions(self, ctx: dict[str, Any], k: int = 8) -> list[str]:
|
|
50
|
-
base = "Classify the Banking77 intent and return exactly one label."
|
|
51
|
-
variants = [
|
|
52
|
-
base,
|
|
53
|
-
base + " Be concise.",
|
|
54
|
-
base + " Use examples to guide your reasoning.",
|
|
55
|
-
base + " Return only the label text.",
|
|
56
|
-
base + " Follow the label names strictly.",
|
|
57
|
-
base + " Do not include explanations.",
|
|
58
|
-
base + " Think about similar intents before answering.",
|
|
59
|
-
base + " Carefully consider the user's message.",
|
|
60
|
-
]
|
|
61
|
-
random.shuffle(variants)
|
|
62
|
-
return variants[:k]
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def build_run_fn(lm: LM, label_names: list[str]):
|
|
66
|
-
def run_fn(x: str, _model: Any | None = None) -> str:
|
|
67
|
-
# Use instructions and demos from adapter state (set by set_instructions/set_demos)
|
|
68
|
-
# The adapter passes state via closure; we rebuild messages here
|
|
69
|
-
instructions = state_ref.get("instructions", {}).get(
|
|
70
|
-
"main", "You are an intent classifier for Banking77."
|
|
71
|
-
)
|
|
72
|
-
examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in state_ref.get("demos", []))
|
|
73
|
-
sys = instructions
|
|
74
|
-
user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
|
|
75
|
-
messages = build_messages(sys, user, images_bytes=None, model_name=lm.model)
|
|
76
|
-
|
|
77
|
-
async def _call():
|
|
78
|
-
resp = await lm.respond_async(messages=messages)
|
|
79
|
-
return (resp.raw_response or "").strip()
|
|
80
|
-
|
|
81
|
-
return asyncio.run(_call())
|
|
82
|
-
|
|
83
|
-
return run_fn
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def set_instructions(new_instr: dict[str, str], state: dict[str, Any]) -> dict[str, Any]:
|
|
87
|
-
state["instructions"] = {**state.get("instructions", {}), **new_instr}
|
|
88
|
-
return state
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
def set_demos(demos: list[tuple[str, str]], state: dict[str, Any]) -> dict[str, Any]:
|
|
92
|
-
state["demos"] = list(demos)
|
|
93
|
-
return state
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def main():
|
|
97
|
-
load_dotenv()
|
|
98
|
-
random.seed(0)
|
|
99
|
-
|
|
100
|
-
model = os.getenv("MODEL", "openai/gpt-oss-20b")
|
|
101
|
-
vendor = os.getenv("VENDOR", "groq")
|
|
102
|
-
lm = LM(model=model, vendor=vendor, temperature=0.0)
|
|
103
|
-
|
|
104
|
-
print("Loading Banking77 dataset (train/dev split of test for demo)...")
|
|
105
|
-
ds = load_dataset("banking77")
|
|
106
|
-
label_names: list[str] = ds["test"].features["label"].names # type: ignore
|
|
107
|
-
|
|
108
|
-
all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
|
|
109
|
-
random.shuffle(all_items)
|
|
110
|
-
trainset: Sequence[tuple[str, str]] = all_items[:80]
|
|
111
|
-
valset: Sequence[tuple[str, str]] = all_items[80:160]
|
|
112
|
-
|
|
113
|
-
global state_ref
|
|
114
|
-
state_ref = {
|
|
115
|
-
"instructions": {"main": "You are an intent classifier for Banking77."},
|
|
116
|
-
"demos": [],
|
|
117
|
-
}
|
|
118
|
-
adapter = ProgramAdapter(
|
|
119
|
-
run_fn=build_run_fn(lm, label_names),
|
|
120
|
-
state=state_ref,
|
|
121
|
-
_predictors=["main"],
|
|
122
|
-
set_instructions=set_instructions,
|
|
123
|
-
set_demos=set_demos,
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
def metric(yhat: str, y: str) -> float:
|
|
127
|
-
return accuracy(yhat, y, label_names)
|
|
128
|
-
|
|
129
|
-
prompt_model = NaivePromptModel()
|
|
130
|
-
task_model = None # not used in this minimal example
|
|
131
|
-
|
|
132
|
-
print("Running MIPROv2-style optimizer...")
|
|
133
|
-
best, records = mipro_v2_compile(
|
|
134
|
-
student=adapter,
|
|
135
|
-
trainset=trainset,
|
|
136
|
-
valset=valset,
|
|
137
|
-
metric=metric,
|
|
138
|
-
prompt_model=prompt_model,
|
|
139
|
-
task_model=task_model,
|
|
140
|
-
max_bootstrapped_demos=6,
|
|
141
|
-
max_labeled_demos=4,
|
|
142
|
-
num_candidates=6,
|
|
143
|
-
num_trials=12,
|
|
144
|
-
minibatch=True,
|
|
145
|
-
minibatch_size=16,
|
|
146
|
-
minibatch_full_eval_steps=3,
|
|
147
|
-
seed=0,
|
|
148
|
-
)
|
|
149
|
-
|
|
150
|
-
res = evaluate_program(best, valset, metric)
|
|
151
|
-
print(
|
|
152
|
-
f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})"
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
out = {
|
|
156
|
-
"context": {
|
|
157
|
-
"model": model,
|
|
158
|
-
"vendor": vendor,
|
|
159
|
-
"train_size": len(trainset),
|
|
160
|
-
"val_size": len(valset),
|
|
161
|
-
},
|
|
162
|
-
"trials": records,
|
|
163
|
-
}
|
|
164
|
-
out_dir = Path(__file__).parent
|
|
165
|
-
fname = str(out_dir / f"mipro_banking77_{int(time.time())}.json")
|
|
166
|
-
with open(fname, "w") as f:
|
|
167
|
-
json.dump(out, f, indent=2)
|
|
168
|
-
print(f"Saved trial records to {fname}")
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
if __name__ == "__main__":
|
|
172
|
-
main()
|