synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.9.dev9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +8 -11
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +64 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +18 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +38 -0
- examples/qwen_coder/validate_jsonl.py +59 -0
- examples/rl/run_eval.py +36 -37
- examples/rl/run_rl_and_save.py +5 -5
- examples/rl/task_app/math_single_step.py +65 -43
- examples/rl/task_app/math_task_app.py +3 -3
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +5 -5
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +78 -21
- examples/warming_up_to_rl/groq_test.py +4 -4
- examples/warming_up_to_rl/manage_secrets.py +13 -18
- examples/warming_up_to_rl/run_eval.py +42 -44
- examples/warming_up_to_rl/run_fft_and_save.py +11 -16
- examples/warming_up_to_rl/run_local_rollout.py +1 -3
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
- examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
- examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
- examples/warming_up_to_rl/run_rl_and_save.py +5 -6
- examples/warming_up_to_rl/run_rollout_remote.py +8 -10
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth/__init__.py +14 -0
- synth_ai/__init__.py +26 -4
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +128 -21
- synth_ai/api/train/cli.py +80 -64
- synth_ai/api/train/config_finder.py +7 -2
- synth_ai/api/train/env_resolver.py +1 -1
- synth_ai/api/train/pollers.py +2 -1
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +1 -2
- synth_ai/api/train/utils.py +13 -44
- synth_ai/cli/__init__.py +8 -0
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +1 -2
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +2 -1
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +2 -1
- synth_ai/cli/root.py +11 -13
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +529 -179
- synth_ai/cli/traces.py +6 -4
- synth_ai/cli/watch.py +12 -18
- synth_ai/demo_registry.py +1 -1
- synth_ai/demos/core/cli.py +36 -43
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +17 -25
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +2 -5
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +4 -7
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/handshake.py +9 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +18 -10
- synth_ai/inference/client.py +15 -5
- synth_ai/jobs/client.py +78 -83
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +91 -24
- synth_ai/learning/config.py +2 -38
- synth_ai/learning/ft_client.py +4 -59
- synth_ai/learning/health.py +5 -6
- synth_ai/learning/jobs.py +31 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -4
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -8
- synth_ai/{rl → learning/rl}/env_keys.py +39 -15
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -281
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -24
- synth_ai/learning/validators.py +25 -28
- synth_ai/lm/__init__.py +21 -47
- synth_ai/main.py +6 -0
- synth_ai/task/__init__.py +25 -27
- synth_ai/task/apps/__init__.py +7 -8
- synth_ai/task/auth.py +8 -8
- synth_ai/task/client.py +14 -14
- synth_ai/task/contracts.py +36 -35
- synth_ai/task/datasets.py +6 -5
- synth_ai/task/errors.py +10 -10
- synth_ai/task/health.py +17 -9
- synth_ai/task/json.py +58 -23
- synth_ai/task/proxy.py +13 -9
- synth_ai/task/rubrics.py +16 -15
- synth_ai/task/server.py +12 -12
- synth_ai/task/tracing_utils.py +4 -4
- synth_ai/task/vendors.py +5 -6
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/decorators.py +18 -16
- synth_ai/tracing_v3/hooks.py +5 -5
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/session_tracer.py +40 -14
- synth_ai/tracing_v3/storage/base.py +85 -0
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -7
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +2 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -4
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/main.py +6 -6
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
- synth_ai/{lm → v0/lm}/overrides.py +2 -2
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- synth_ai-0.2.9.dev9.dist-info/METADATA +191 -0
- {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/RECORD +268 -238
- {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/top_level.txt +1 -0
- examples/common_old/backend.py +0 -20
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1038
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -152
- examples/rl_old/task_app.py +0 -1131
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/experimental/synth_oss.py +0 -445
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -249
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -838
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev7.dist-info/METADATA +0 -131
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,249 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Random-search prompt optimizer (BootstrapFewShotWithRandomSearch), DSPy-inspired.
|
|
3
|
-
|
|
4
|
-
Implements the high-level pseudocode of DSPy's Random Search optimizer in a
|
|
5
|
-
provider-agnostic, modular style. You can plug in your own student/program and
|
|
6
|
-
metric, and this module will explore baselines and bootstrapped few-shot variants.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from __future__ import annotations
|
|
10
|
-
|
|
11
|
-
import contextlib
|
|
12
|
-
import random
|
|
13
|
-
from collections.abc import Callable, Sequence
|
|
14
|
-
from dataclasses import dataclass
|
|
15
|
-
from typing import Any
|
|
16
|
-
|
|
17
|
-
# ---------------------------
|
|
18
|
-
# Protocol-like expectations (duck-typed)
|
|
19
|
-
# ---------------------------
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class _ProgramLike:
|
|
23
|
-
def reset_copy(self): # zero-shot copy
|
|
24
|
-
return self
|
|
25
|
-
|
|
26
|
-
def deepcopy(self): # deep copy
|
|
27
|
-
return self
|
|
28
|
-
|
|
29
|
-
def with_demos(self, demos: list[tuple[Any, Any]]):
|
|
30
|
-
return self
|
|
31
|
-
|
|
32
|
-
def run(self, x: Any) -> Any:
|
|
33
|
-
raise NotImplementedError
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
# ---------------------------
|
|
37
|
-
# Helpers and lightweight components
|
|
38
|
-
# ---------------------------
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
@dataclass
|
|
42
|
-
class EvalResult:
|
|
43
|
-
score: float
|
|
44
|
-
subscores: list[float]
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def evaluate(
|
|
48
|
-
program: _ProgramLike, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
|
|
49
|
-
) -> EvalResult:
|
|
50
|
-
subs = []
|
|
51
|
-
for x, y in dataset:
|
|
52
|
-
subs.append(metric(program.run(x), y))
|
|
53
|
-
return EvalResult(sum(subs) / max(1, len(subs)), subs)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
class LabeledFewShot:
|
|
57
|
-
def __init__(self, k: int):
|
|
58
|
-
self.k = k
|
|
59
|
-
|
|
60
|
-
def compile(
|
|
61
|
-
self, student: _ProgramLike, trainset: Sequence[tuple[Any, Any]], sample: bool = True
|
|
62
|
-
) -> _ProgramLike:
|
|
63
|
-
p = getattr(student, "deepcopy", student.reset_copy)()
|
|
64
|
-
demos = list(trainset)
|
|
65
|
-
if sample:
|
|
66
|
-
random.shuffle(demos)
|
|
67
|
-
p = p.with_demos(demos[: min(self.k, len(demos))])
|
|
68
|
-
return p
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
class BootstrapFewShot:
|
|
72
|
-
def __init__(
|
|
73
|
-
self,
|
|
74
|
-
*,
|
|
75
|
-
metric: Callable[[Any, Any], float],
|
|
76
|
-
metric_threshold: float | None = None,
|
|
77
|
-
max_bootstrapped_demos: int = 8,
|
|
78
|
-
max_labeled_demos: int = 0,
|
|
79
|
-
teacher_settings: dict[str, Any] | None = None,
|
|
80
|
-
max_rounds: int = 1,
|
|
81
|
-
):
|
|
82
|
-
self.metric = metric
|
|
83
|
-
self.metric_threshold = metric_threshold
|
|
84
|
-
self.max_bootstrapped_demos = max_bootstrapped_demos
|
|
85
|
-
self.max_labeled_demos = max_labeled_demos
|
|
86
|
-
self.teacher_settings = teacher_settings or {}
|
|
87
|
-
self.max_rounds = max_rounds
|
|
88
|
-
|
|
89
|
-
def compile(
|
|
90
|
-
self,
|
|
91
|
-
student: _ProgramLike,
|
|
92
|
-
teacher: _ProgramLike | None,
|
|
93
|
-
trainset: Sequence[tuple[Any, Any]],
|
|
94
|
-
) -> _ProgramLike:
|
|
95
|
-
p = getattr(student, "deepcopy", student.reset_copy)()
|
|
96
|
-
rng = random.Random()
|
|
97
|
-
# If bootstrapped demos disabled, return labeled-only few-shot quickly
|
|
98
|
-
if self.max_bootstrapped_demos <= 0:
|
|
99
|
-
demos: list[tuple[Any, Any]] = []
|
|
100
|
-
if self.max_labeled_demos > 0:
|
|
101
|
-
demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
|
|
102
|
-
return p.with_demos(demos)
|
|
103
|
-
boot: list[tuple[Any, Any]] = []
|
|
104
|
-
# Bootstrap demos by self consistency
|
|
105
|
-
for _ in range(self.max_rounds):
|
|
106
|
-
rng.shuffle(trainset := list(trainset))
|
|
107
|
-
for x, y in trainset:
|
|
108
|
-
yhat = p.run(x)
|
|
109
|
-
ok = self.metric(yhat, y)
|
|
110
|
-
if (self.metric_threshold is None and ok == 1) or (
|
|
111
|
-
self.metric_threshold is not None and ok >= self.metric_threshold
|
|
112
|
-
):
|
|
113
|
-
boot.append((x, y))
|
|
114
|
-
if len(boot) >= self.max_bootstrapped_demos:
|
|
115
|
-
break
|
|
116
|
-
if len(boot) >= self.max_bootstrapped_demos:
|
|
117
|
-
break
|
|
118
|
-
|
|
119
|
-
# Optionally add labeled demos
|
|
120
|
-
demos = list(boot)
|
|
121
|
-
if self.max_labeled_demos > 0:
|
|
122
|
-
demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
|
|
123
|
-
|
|
124
|
-
return p.with_demos(demos)
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
# ---------------------------
|
|
128
|
-
# Random-search compile (BootstrapFewShotWithRandomSearch)
|
|
129
|
-
# ---------------------------
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
@dataclass
|
|
133
|
-
class Candidate:
|
|
134
|
-
score: float
|
|
135
|
-
subscores: list[float]
|
|
136
|
-
seed: int
|
|
137
|
-
program: _ProgramLike
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def random_search_compile(
|
|
141
|
-
student: _ProgramLike,
|
|
142
|
-
trainset: Sequence[tuple[Any, Any]],
|
|
143
|
-
valset: Sequence[tuple[Any, Any]],
|
|
144
|
-
metric: Callable[[Any, Any], float],
|
|
145
|
-
*,
|
|
146
|
-
max_bootstrapped_demos: int = 8,
|
|
147
|
-
max_labeled_demos: int = 4,
|
|
148
|
-
max_rounds: int = 2,
|
|
149
|
-
num_candidate_programs: int = 16,
|
|
150
|
-
stop_at_score: float | None = None,
|
|
151
|
-
evaluate_fn: Callable[
|
|
152
|
-
[_ProgramLike, Sequence[tuple[Any, Any]], Callable[[Any, Any], float]], EvalResult
|
|
153
|
-
]
|
|
154
|
-
| None = None,
|
|
155
|
-
on_candidate_evaluated: Callable[[int, float, EvalResult, dict[str, Any]], None] | None = None,
|
|
156
|
-
) -> tuple[_ProgramLike, list[dict[str, Any]]]:
|
|
157
|
-
best_program: _ProgramLike | None = None
|
|
158
|
-
best_score = float("-inf")
|
|
159
|
-
candidates: list[Candidate] = []
|
|
160
|
-
records: list[dict[str, Any]] = []
|
|
161
|
-
|
|
162
|
-
seeds = list(range(num_candidate_programs))
|
|
163
|
-
seeds = [-3, -2, -1] + seeds # zero-shot, labeled few-shot, bootstrapped few-shot
|
|
164
|
-
|
|
165
|
-
rng = random.Random(0)
|
|
166
|
-
for idx, seed in enumerate(seeds):
|
|
167
|
-
train_copy = list(trainset)
|
|
168
|
-
|
|
169
|
-
if seed == -3:
|
|
170
|
-
program = getattr(student, "reset_copy", student.deepcopy)()
|
|
171
|
-
|
|
172
|
-
elif seed == -2:
|
|
173
|
-
program = LabeledFewShot(k=max_labeled_demos).compile(student, train_copy, sample=True)
|
|
174
|
-
|
|
175
|
-
else:
|
|
176
|
-
if seed >= 0:
|
|
177
|
-
rng.shuffle(train_copy)
|
|
178
|
-
if max_bootstrapped_demos <= 0:
|
|
179
|
-
size = 0
|
|
180
|
-
else:
|
|
181
|
-
size = (
|
|
182
|
-
max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
|
|
183
|
-
)
|
|
184
|
-
program = BootstrapFewShot(
|
|
185
|
-
metric=metric,
|
|
186
|
-
metric_threshold=None,
|
|
187
|
-
max_bootstrapped_demos=size,
|
|
188
|
-
max_labeled_demos=max_labeled_demos,
|
|
189
|
-
teacher_settings={},
|
|
190
|
-
max_rounds=max_rounds,
|
|
191
|
-
).compile(student, teacher=None, trainset=train_copy)
|
|
192
|
-
|
|
193
|
-
res = (
|
|
194
|
-
evaluate_fn(program, valset, metric)
|
|
195
|
-
if evaluate_fn
|
|
196
|
-
else evaluate(program, valset, metric)
|
|
197
|
-
)
|
|
198
|
-
cand = Candidate(score=res.score, subscores=res.subscores, seed=seed, program=program)
|
|
199
|
-
candidates.append(cand)
|
|
200
|
-
# Record an intervention summary for reproducibility
|
|
201
|
-
intervention: dict[str, Any] = {"seed": seed}
|
|
202
|
-
if hasattr(program, "demos"):
|
|
203
|
-
try:
|
|
204
|
-
intervention["demos"] = program.demos # type: ignore
|
|
205
|
-
except Exception:
|
|
206
|
-
intervention["demos"] = None
|
|
207
|
-
# Type of candidate
|
|
208
|
-
if seed == -3:
|
|
209
|
-
intervention["kind"] = "zero_shot"
|
|
210
|
-
intervention["label"] = "zero-shot"
|
|
211
|
-
elif seed == -2:
|
|
212
|
-
intervention["kind"] = "labeled_few_shot"
|
|
213
|
-
intervention["label"] = f"labeled-{max_labeled_demos}"
|
|
214
|
-
else:
|
|
215
|
-
intervention["kind"] = "bootstrapped_few_shot"
|
|
216
|
-
intervention["label"] = f"boot-b{max_bootstrapped_demos}-l{max_labeled_demos}"
|
|
217
|
-
record_obj = {
|
|
218
|
-
"score": cand.score,
|
|
219
|
-
"subscores": cand.subscores,
|
|
220
|
-
"intervention": intervention,
|
|
221
|
-
}
|
|
222
|
-
records.append(record_obj)
|
|
223
|
-
|
|
224
|
-
if res.score > best_score:
|
|
225
|
-
best_score, best_program = res.score, program
|
|
226
|
-
|
|
227
|
-
if stop_at_score is not None and best_score >= stop_at_score:
|
|
228
|
-
break
|
|
229
|
-
|
|
230
|
-
if on_candidate_evaluated is not None:
|
|
231
|
-
with contextlib.suppress(Exception):
|
|
232
|
-
on_candidate_evaluated(idx + 1, res.score, res, intervention)
|
|
233
|
-
|
|
234
|
-
# Attach candidates for inspection
|
|
235
|
-
if hasattr(best_program, "candidate_programs"):
|
|
236
|
-
# If user object supports attribute assignment
|
|
237
|
-
with contextlib.suppress(Exception):
|
|
238
|
-
best_program.candidate_programs = sorted(
|
|
239
|
-
candidates, key=lambda c: c.score, reverse=True
|
|
240
|
-
) # type: ignore[attr-defined]
|
|
241
|
-
|
|
242
|
-
return (best_program or getattr(student, "deepcopy", student)(), records)
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
__all__ = [
|
|
246
|
-
"random_search_compile",
|
|
247
|
-
"LabeledFewShot",
|
|
248
|
-
"BootstrapFewShot",
|
|
249
|
-
]
|
|
@@ -1,172 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Example: MIPROv2-style optimizer on Banking77 using Groq gpt-oss-20b.
|
|
3
|
-
|
|
4
|
-
Requires:
|
|
5
|
-
- .env with GROQ_API_KEY
|
|
6
|
-
- datasets
|
|
7
|
-
|
|
8
|
-
Run:
|
|
9
|
-
- uv run -q python -m synth_ai.learning.prompts.run_mipro_banking77
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
from __future__ import annotations
|
|
13
|
-
|
|
14
|
-
import asyncio
|
|
15
|
-
import json
|
|
16
|
-
import os
|
|
17
|
-
import random
|
|
18
|
-
import time
|
|
19
|
-
from collections.abc import Sequence
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
from typing import Any
|
|
22
|
-
|
|
23
|
-
from datasets import load_dataset
|
|
24
|
-
from dotenv import load_dotenv
|
|
25
|
-
from synth_ai.learning.prompts.mipro import ProgramAdapter, evaluate_program, mipro_v2_compile
|
|
26
|
-
from synth_ai.lm.core.main_v3 import LM, build_messages
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def choose_label(pred: str, label_names: list[str]) -> str:
|
|
30
|
-
norm = (pred or "").strip().lower()
|
|
31
|
-
d = {ln.lower(): ln for ln in label_names}
|
|
32
|
-
if norm in d:
|
|
33
|
-
return d[norm]
|
|
34
|
-
|
|
35
|
-
def score(cand: str) -> int:
|
|
36
|
-
c = cand.lower()
|
|
37
|
-
return sum(1 for w in c.split() if w in norm)
|
|
38
|
-
|
|
39
|
-
return max(label_names, key=score)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def accuracy(pred: str, gold: str, labels: list[str]) -> float:
|
|
43
|
-
return 1.0 if choose_label(pred, labels) == gold else 0.0
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class NaivePromptModel:
|
|
47
|
-
"""Toy prompt model that returns simple instruction variants."""
|
|
48
|
-
|
|
49
|
-
def generate_instructions(self, ctx: dict[str, Any], k: int = 8) -> list[str]:
|
|
50
|
-
base = "Classify the Banking77 intent and return exactly one label."
|
|
51
|
-
variants = [
|
|
52
|
-
base,
|
|
53
|
-
base + " Be concise.",
|
|
54
|
-
base + " Use examples to guide your reasoning.",
|
|
55
|
-
base + " Return only the label text.",
|
|
56
|
-
base + " Follow the label names strictly.",
|
|
57
|
-
base + " Do not include explanations.",
|
|
58
|
-
base + " Think about similar intents before answering.",
|
|
59
|
-
base + " Carefully consider the user's message.",
|
|
60
|
-
]
|
|
61
|
-
random.shuffle(variants)
|
|
62
|
-
return variants[:k]
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def build_run_fn(lm: LM, label_names: list[str]):
|
|
66
|
-
def run_fn(x: str, _model: Any | None = None) -> str:
|
|
67
|
-
# Use instructions and demos from adapter state (set by set_instructions/set_demos)
|
|
68
|
-
# The adapter passes state via closure; we rebuild messages here
|
|
69
|
-
instructions = state_ref.get("instructions", {}).get(
|
|
70
|
-
"main", "You are an intent classifier for Banking77."
|
|
71
|
-
)
|
|
72
|
-
examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in state_ref.get("demos", []))
|
|
73
|
-
sys = instructions
|
|
74
|
-
user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
|
|
75
|
-
messages = build_messages(sys, user, images_bytes=None, model_name=lm.model)
|
|
76
|
-
|
|
77
|
-
async def _call():
|
|
78
|
-
resp = await lm.respond_async(messages=messages)
|
|
79
|
-
return (resp.raw_response or "").strip()
|
|
80
|
-
|
|
81
|
-
return asyncio.run(_call())
|
|
82
|
-
|
|
83
|
-
return run_fn
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def set_instructions(new_instr: dict[str, str], state: dict[str, Any]) -> dict[str, Any]:
|
|
87
|
-
state["instructions"] = {**state.get("instructions", {}), **new_instr}
|
|
88
|
-
return state
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
def set_demos(demos: list[tuple[str, str]], state: dict[str, Any]) -> dict[str, Any]:
|
|
92
|
-
state["demos"] = list(demos)
|
|
93
|
-
return state
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def main():
|
|
97
|
-
load_dotenv()
|
|
98
|
-
random.seed(0)
|
|
99
|
-
|
|
100
|
-
model = os.getenv("MODEL", "openai/gpt-oss-20b")
|
|
101
|
-
vendor = os.getenv("VENDOR", "groq")
|
|
102
|
-
lm = LM(model=model, vendor=vendor, temperature=0.0)
|
|
103
|
-
|
|
104
|
-
print("Loading Banking77 dataset (train/dev split of test for demo)...")
|
|
105
|
-
ds = load_dataset("banking77")
|
|
106
|
-
label_names: list[str] = ds["test"].features["label"].names # type: ignore
|
|
107
|
-
|
|
108
|
-
all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
|
|
109
|
-
random.shuffle(all_items)
|
|
110
|
-
trainset: Sequence[tuple[str, str]] = all_items[:80]
|
|
111
|
-
valset: Sequence[tuple[str, str]] = all_items[80:160]
|
|
112
|
-
|
|
113
|
-
global state_ref
|
|
114
|
-
state_ref = {
|
|
115
|
-
"instructions": {"main": "You are an intent classifier for Banking77."},
|
|
116
|
-
"demos": [],
|
|
117
|
-
}
|
|
118
|
-
adapter = ProgramAdapter(
|
|
119
|
-
run_fn=build_run_fn(lm, label_names),
|
|
120
|
-
state=state_ref,
|
|
121
|
-
_predictors=["main"],
|
|
122
|
-
set_instructions=set_instructions,
|
|
123
|
-
set_demos=set_demos,
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
def metric(yhat: str, y: str) -> float:
|
|
127
|
-
return accuracy(yhat, y, label_names)
|
|
128
|
-
|
|
129
|
-
prompt_model = NaivePromptModel()
|
|
130
|
-
task_model = None # not used in this minimal example
|
|
131
|
-
|
|
132
|
-
print("Running MIPROv2-style optimizer...")
|
|
133
|
-
best, records = mipro_v2_compile(
|
|
134
|
-
student=adapter,
|
|
135
|
-
trainset=trainset,
|
|
136
|
-
valset=valset,
|
|
137
|
-
metric=metric,
|
|
138
|
-
prompt_model=prompt_model,
|
|
139
|
-
task_model=task_model,
|
|
140
|
-
max_bootstrapped_demos=6,
|
|
141
|
-
max_labeled_demos=4,
|
|
142
|
-
num_candidates=6,
|
|
143
|
-
num_trials=12,
|
|
144
|
-
minibatch=True,
|
|
145
|
-
minibatch_size=16,
|
|
146
|
-
minibatch_full_eval_steps=3,
|
|
147
|
-
seed=0,
|
|
148
|
-
)
|
|
149
|
-
|
|
150
|
-
res = evaluate_program(best, valset, metric)
|
|
151
|
-
print(
|
|
152
|
-
f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})"
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
out = {
|
|
156
|
-
"context": {
|
|
157
|
-
"model": model,
|
|
158
|
-
"vendor": vendor,
|
|
159
|
-
"train_size": len(trainset),
|
|
160
|
-
"val_size": len(valset),
|
|
161
|
-
},
|
|
162
|
-
"trials": records,
|
|
163
|
-
}
|
|
164
|
-
out_dir = Path(__file__).parent
|
|
165
|
-
fname = str(out_dir / f"mipro_banking77_{int(time.time())}.json")
|
|
166
|
-
with open(fname, "w") as f:
|
|
167
|
-
json.dump(out, f, indent=2)
|
|
168
|
-
print(f"Saved trial records to {fname}")
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
if __name__ == "__main__":
|
|
172
|
-
main()
|