PyPI - synth-ai - Versions diffs - 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl - Mend

synth-ai 0.2.9.dev4py3-none-any.whl → 0.2.9.dev6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (353) hide show

examples/__init__.py +16 -0
examples/crafter_debug_render.py +23 -17
examples/qwen_coder/README.md +102 -0
examples/qwen_coder/_shared.py +113 -0
examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
examples/qwen_coder/configs/coder_lora_small.toml +58 -0
examples/qwen_coder/generate_dataset.py +98 -0
examples/qwen_coder/infer_ft_smoke.py +64 -0
examples/qwen_coder/infer_prod_proxy.py +73 -0
examples/qwen_coder/infer_via_synth.py +87 -0
examples/qwen_coder/scripts/infer_coder.sh +18 -0
examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
examples/qwen_coder/sft_full_17b.py +103 -0
examples/qwen_coder/sft_lora_30b.py +110 -0
examples/qwen_coder/subset_jsonl.py +38 -0
examples/qwen_coder/validate_jsonl.py +59 -0
examples/rl/configs/eval_base_qwen.toml +1 -1
examples/rl/configs/rl_from_base_qwen17.toml +1 -1
examples/rl/download_dataset.py +26 -10
examples/rl/run_eval.py +53 -52
examples/rl/run_rl_and_save.py +29 -12
examples/rl/task_app/math_single_step.py +180 -41
examples/rl/task_app/math_task_app.py +14 -6
examples/sft/README.md +139 -0
examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
examples/sft/evaluate.py +117 -0
examples/sft/export_dataset.py +117 -0
examples/sft/generate_traces.py +162 -0
examples/swe/__init__.py +12 -0
examples/swe/task_app/README.md +105 -0
examples/swe/task_app/__init__.py +2 -0
examples/swe/task_app/grpo_swe_mini.py +571 -0
examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
examples/swe/task_app/hosted/README.md +173 -0
examples/swe/task_app/hosted/__init__.py +5 -0
examples/swe/task_app/hosted/branching.py +143 -0
examples/swe/task_app/hosted/environment_routes.py +1289 -0
examples/swe/task_app/hosted/envs/__init__.py +1 -0
examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
examples/swe/task_app/hosted/hosted_app.py +204 -0
examples/swe/task_app/hosted/inference/__init__.py +5 -0
examples/swe/task_app/hosted/inference/openai_client.py +618 -0
examples/swe/task_app/hosted/main.py +100 -0
examples/swe/task_app/hosted/policy_routes.py +1079 -0
examples/swe/task_app/hosted/registry.py +195 -0
examples/swe/task_app/hosted/rollout.py +1869 -0
examples/swe/task_app/hosted/storage/__init__.py +5 -0
examples/swe/task_app/hosted/storage/volume.py +211 -0
examples/swe/task_app/hosted/test_agents.py +161 -0
examples/swe/task_app/hosted/test_service.py +137 -0
examples/swe/task_app/hosted/utils.py +62 -0
examples/vlm/README.md +68 -0
examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
examples/vlm/crafter_image_only_agent.py +207 -0
examples/vlm/crafter_openai_vlm_agent.py +277 -0
examples/vlm/filter_image_rows.py +63 -0
examples/vlm/run_crafter_vlm_benchmark.py +316 -0
examples/warming_up_to_rl/analyze_trace_db.py +12 -10
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
examples/warming_up_to_rl/export_trace_sft.py +218 -36
examples/warming_up_to_rl/groq_test.py +15 -8
examples/warming_up_to_rl/manage_secrets.py +29 -25
examples/warming_up_to_rl/readme.md +9 -2
examples/warming_up_to_rl/run_eval.py +137 -61
examples/warming_up_to_rl/run_fft_and_save.py +131 -60
examples/warming_up_to_rl/run_local_rollout.py +88 -39
examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
examples/warming_up_to_rl/run_rl_and_save.py +35 -12
examples/warming_up_to_rl/run_rollout_remote.py +44 -19
examples/warming_up_to_rl/task_app/README.md +6 -2
examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
synth/__init__.py +14 -0
synth_ai/__init__.py +20 -4
synth_ai/api/models/supported.py +376 -0
synth_ai/api/train/builders.py +157 -26
synth_ai/api/train/cli.py +213 -57
synth_ai/api/train/config_finder.py +65 -5
synth_ai/api/train/env_resolver.py +33 -15
synth_ai/api/train/pollers.py +13 -4
synth_ai/api/train/supported_algos.py +139 -0
synth_ai/api/train/task_app.py +5 -3
synth_ai/api/train/utils.py +33 -48
synth_ai/cli/__init__.py +19 -4
synth_ai/cli/_modal_wrapper.py +28 -0
synth_ai/cli/_typer_patch.py +49 -0
synth_ai/cli/balance.py +2 -3
synth_ai/cli/calc.py +1 -1
synth_ai/cli/demo.py +21 -6
synth_ai/cli/recent.py +2 -2
synth_ai/cli/rl_demo.py +77 -17
synth_ai/cli/root.py +116 -39
synth_ai/cli/status.py +2 -2
synth_ai/cli/task_apps.py +1709 -243
synth_ai/cli/traces.py +7 -4
synth_ai/cli/turso.py +73 -0
synth_ai/cli/watch.py +12 -18
synth_ai/core/experiment.py +0 -2
synth_ai/demo_registry.py +68 -31
synth_ai/demos/core/cli.py +516 -194
synth_ai/demos/demo_task_apps/__init__.py +3 -3
synth_ai/demos/demo_task_apps/core.py +64 -28
synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
synth_ai/demos/demo_task_apps/math/_common.py +1 -2
synth_ai/demos/demo_task_apps/math/app.py +2 -1
synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
synth_ai/environments/examples/bandit/engine.py +12 -4
synth_ai/environments/examples/bandit/taskset.py +4 -4
synth_ai/environments/examples/crafter_classic/environment.py +76 -1
synth_ai/environments/reproducibility/tree.py +5 -6
synth_ai/environments/service/app.py +11 -12
synth_ai/environments/service/core_routes.py +10 -9
synth_ai/environments/stateful/engine.py +1 -1
synth_ai/environments/tasks/core.py +1 -0
synth_ai/environments/tasks/filters.py +5 -6
synth_ai/environments/tasks/utils.py +4 -5
synth_ai/evals/base.py +0 -2
synth_ai/handshake.py +11 -9
synth_ai/http.py +1 -1
synth_ai/http_client.py +43 -11
synth_ai/inference/__init__.py +0 -2
synth_ai/inference/client.py +20 -6
synth_ai/jobs/client.py +103 -78
synth_ai/learning/__init__.py +41 -6
synth_ai/learning/algorithms.py +14 -0
synth_ai/learning/client.py +121 -29
synth_ai/learning/config.py +2 -40
synth_ai/learning/constants.py +0 -2
synth_ai/learning/ft_client.py +4 -56
synth_ai/learning/health.py +13 -7
synth_ai/learning/jobs.py +43 -47
synth_ai/{rl → learning/rl}/__init__.py +14 -5
synth_ai/learning/rl/client.py +267 -0
synth_ai/learning/rl/config.py +31 -0
synth_ai/{rl → learning/rl}/contracts.py +5 -10
synth_ai/{rl → learning/rl}/env_keys.py +45 -16
synth_ai/learning/rl/secrets.py +13 -0
synth_ai/learning/rl_client.py +2 -253
synth_ai/learning/sft/__init__.py +29 -0
synth_ai/learning/sft/client.py +68 -0
synth_ai/learning/sft/config.py +270 -0
synth_ai/learning/sft/data.py +295 -0
synth_ai/learning/sse.py +25 -26
synth_ai/learning/validators.py +25 -24
synth_ai/lm/__init__.py +21 -47
synth_ai/task/__init__.py +26 -27
synth_ai/task/apps/__init__.py +18 -19
synth_ai/task/auth.py +35 -23
synth_ai/task/client.py +15 -13
synth_ai/task/contracts.py +37 -35
synth_ai/task/datasets.py +9 -6
synth_ai/task/errors.py +11 -10
synth_ai/task/health.py +17 -11
synth_ai/task/json.py +58 -24
synth_ai/task/proxy.py +15 -14
synth_ai/task/rubrics.py +22 -15
synth_ai/task/server.py +43 -17
synth_ai/task/tracing_utils.py +12 -7
synth_ai/task/validators.py +0 -1
synth_ai/task/vendors.py +5 -7
synth_ai/tracing_v3/__init__.py +2 -0
synth_ai/tracing_v3/abstractions.py +21 -4
synth_ai/tracing_v3/db_config.py +26 -1
synth_ai/tracing_v3/decorators.py +18 -15
synth_ai/tracing_v3/examples/basic_usage.py +3 -2
synth_ai/tracing_v3/hooks.py +6 -4
synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
synth_ai/tracing_v3/replica_sync.py +1 -0
synth_ai/tracing_v3/session_tracer.py +63 -16
synth_ai/tracing_v3/storage/base.py +89 -1
synth_ai/tracing_v3/storage/config.py +21 -8
synth_ai/tracing_v3/storage/factory.py +10 -8
synth_ai/tracing_v3/storage/utils.py +4 -2
synth_ai/tracing_v3/turso/daemon.py +7 -2
synth_ai/tracing_v3/turso/models.py +5 -2
synth_ai/tracing_v3/turso/native_manager.py +1173 -0
synth_ai/tracing_v3/utils.py +4 -3
synth_ai/v0/api/__init__.py +8 -0
synth_ai/v0/api/models/__init__.py +8 -0
synth_ai/v0/api/models/supported.py +8 -0
synth_ai/v0/config/__init__.py +15 -0
synth_ai/v0/config/base_url.py +12 -0
synth_ai/v0/lm/__init__.py +51 -0
synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
synth_ai/{lm → v0/lm}/config.py +6 -1
synth_ai/{lm → v0/lm}/core/all.py +9 -9
synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
synth_ai/{lm → v0/lm}/core/main.py +19 -7
synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
synth_ai/{lm → v0/lm}/overrides.py +4 -4
synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
synth_ai/v0/tracing/upload.py +32 -135
synth_ai/v0/tracing_v3/__init__.py +10 -0
synth_ai/v0/tracing_v3/abstractions.py +3 -0
synth_ai/v0/tracing_v3/decorators.py +3 -0
synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
synth_ai/v0/tracing_v3/session_tracer.py +3 -0
synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -264
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
examples/common_old/backend.py +0 -21
examples/evals_old/README.md +0 -98
examples/evals_old/__init__.py +0 -6
examples/evals_old/compare_models.py +0 -1037
examples/evals_old/example_log.md +0 -145
examples/evals_old/run_demo.sh +0 -126
examples/evals_old/trace_analysis.py +0 -270
examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
examples/finetuning_old/synth_qwen_v1/README.md +0 -68
examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
examples/finetuning_old/synth_qwen_v1/util.py +0 -147
examples/rl_old/task_app.py +0 -962
examples/warming_up_to_rl/old/event_rewards.md +0 -234
examples/warming_up_to_rl/old/notes.md +0 -73
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
synth_ai/experimental/synth_oss.py +0 -446
synth_ai/install_sqld.sh +0 -40
synth_ai/learning/filtering.py +0 -0
synth_ai/learning/offline/dpo.py +0 -0
synth_ai/learning/offline/providers.py +0 -7
synth_ai/learning/offline/sft.py +0 -0
synth_ai/learning/offline/shared.py +0 -0
synth_ai/learning/online/grpo.py +0 -0
synth_ai/learning/online/irft.py +0 -0
synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
synth_ai/learning/prompts/gepa.py +0 -0
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
synth_ai/learning/prompts/mipro.py +0 -289
synth_ai/learning/prompts/random_search.py +0 -246
synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
synth_ai/rl/secrets.py +0 -19
synth_ai/scripts/verify_rewards.py +0 -100
synth_ai/tracing/__init__.py +0 -30
synth_ai/tracing_v1/__init__.py +0 -33
synth_ai/tracing_v3/turso/__init__.py +0 -25
synth_ai/tracing_v3/turso/manager.py +0 -774
synth_ai/zyk/__init__.py +0 -30
synth_ai-0.2.9.dev4.dist-info/METADATA +0 -131
/synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
/synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
/synth_ai/{lm → v0/lm}/constants.py +0 -0
/synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
/synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
/synth_ai/{lm → v0/lm}/injection.py +0 -0
/synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
/synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
/synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/tools/base.py +0 -0
/synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/warmup.py +0 -0
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0

synth_ai/learning/prompts/run_random_search_banking77.py DELETED Viewed

@@ -1,324 +0,0 @@
-"""
-Example: Random Search optimizer on Banking77 using Groq gpt-oss-20b.
-Requires:
-- .env with GROQ_API_KEY
-- datasets (`uv add datasets` if needed)
-Run:
-- uv run -q python -m synth_ai.learning.prompts.run_random_search_banking77
-"""
-from __future__ import annotations
-import asyncio
-import json
-import os
-import random
-import time
-from collections.abc import Sequence
-from dataclasses import dataclass, replace
-from pathlib import Path
-from types import SimpleNamespace
-from typing import Any
-from datasets import load_dataset
-from dotenv import load_dotenv
-from synth_ai.learning.prompts.random_search import random_search_compile
-from synth_ai.lm.core.main_v3 import LM, build_messages
-from tqdm import tqdm
-def choose_label(pred: str, label_names: list[str]) -> str:
-    norm = (pred or "").strip().lower()
-    d = {ln.lower(): ln for ln in label_names}
-    if norm in d:
-        return d[norm]
-    def score(cand: str) -> int:
-        c = cand.lower()
-        return sum(1 for w in c.split() if w in norm)
-    return max(label_names, key=score)
-def accuracy(pred: str, gold: str, labels: list[str]) -> float:
-    return 1.0 if choose_label(pred, labels) == gold else 0.0
-@dataclass
-class StudentProgram:
-    lm: LM
-    label_names: list[str]
-    instruction: str
-    demos: list[tuple[str, str]]
-    def reset_copy(self):
-        return replace(self, instruction=self.instruction, demos=list(self.demos))
-    def deepcopy(self):
-        return replace(self, instruction=str(self.instruction), demos=list(self.demos))
-    def with_demos(self, demos: list[tuple[str, str]]):
-        return replace(self, demos=list(demos))
-    def run(self, x: str) -> str:
-        # Build a prompt with optional demos
-        examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in self.demos)
-        sys = self.instruction or "You are an intent classifier for Banking77."
-        user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
-        messages = build_messages(sys, user, images_bytes=None, model_name=self.lm.model)
-        # Call LM synchronously via asyncio
-        async def _call():
-            resp = await self.lm.respond_async(messages=messages)
-            return (resp.raw_response or "").strip()
-        return asyncio.run(_call())
-    async def _apredict(self, x: str):
-        examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in self.demos)
-        sys = self.instruction or "You are an intent classifier for Banking77."
-        user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
-        messages = build_messages(sys, user, images_bytes=None, model_name=self.lm.model)
-        resp = await self.lm.respond_async(messages=messages)
-        return (resp.raw_response or "").strip(), (resp.usage or {})
-def main():
-    load_dotenv()
-    random.seed(0)
-    model = os.getenv("MODEL", "openai/gpt-oss-20b")
-    vendor = os.getenv("VENDOR", "groq")
-    lm = LM(model=model, vendor=vendor, temperature=0.0)
-    print("Loading Banking77 dataset (train/dev split of test for demo)...")
-    ds = load_dataset("banking77")
-    label_names: list[str] = ds["test"].features["label"].names  # type: ignore
-    # Create small train/val from the test split for speed
-    all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
-    random.shuffle(all_items)
-    trainset: Sequence[tuple[str, str]] = all_items[:40]
-    valset: Sequence[tuple[str, str]] = all_items[40:60]  # 20 examples
-    student = StudentProgram(
-        lm=lm,
-        label_names=label_names,
-        instruction="You are an intent classifier for the Banking77 dataset. Return exactly one label.",
-        demos=[],
-    )
-    def metric(yhat: str, y: str) -> float:
-        return accuracy(yhat, y, label_names)
-    total_candidates = 3 + 3  # zero-shot, labeled few-shot, bootstrapped + 3 random seeds
-    print(
-        f"Running Random Search optimizer ({total_candidates} candidates, parallel eval of 20 questions)..."
-    )
-    def eval_parallel(program: StudentProgram, dataset: Sequence[tuple[str, str]], metric_fn):
-        async def _run():
-            xs = [x for x, _ in dataset]
-            ys = [y for _, y in dataset]
-            preds: list[Optional[str]] = [None] * len(xs)
-            sem = asyncio.Semaphore(int(os.getenv("CONCURRENCY", "5")))
-            async def worker(i: int, x: str, y: str):
-                import time
-                t_start = time.monotonic()
-                try:
-                    async with sem:
-                        pred, usage = await asyncio.wait_for(
-                            program._apredict(x),
-                            timeout=float(os.getenv("TIMEOUT_S", "45")),
-                        )
-                        t_end = time.monotonic()
-                        return i, y, pred, t_start, t_end, usage or {}
-                except asyncio.CancelledError:
-                    # Respect cancellation but return a placeholder record so scheduler can proceed
-                    t_end = time.monotonic()
-                    return i, y, "", t_start, t_end, {}
-                except Exception:
-                    t_end = time.monotonic()
-                    return i, y, "", t_start, t_end, {}
-            tasks = [asyncio.create_task(worker(i, x, y)) for i, (x, y) in enumerate(zip(xs, ys, strict=False))]
-            correct_sum = 0.0
-            processed = 0
-            import statistics
-            import time
-            durations: list[float] = []
-            in_tok_sum = 0
-            out_tok_sum = 0
-            in_tok_count = 0
-            out_tok_count = 0
-            details: list[dict[str, Any]] = []
-            t_batch_start = time.monotonic()
-            deadline = float(os.getenv("BATCH_DEADLINE_S", "20"))
-            with tqdm(total=len(tasks), desc="Rollouts", leave=False) as pbar:
-                pending = set(tasks)
-                # Process completions until all done or deadline reached
-                while pending:
-                    elapsed = time.monotonic() - t_batch_start
-                    remaining = max(0.0, deadline - elapsed)
-                    if remaining <= 0.0:
-                        # Cancel any remaining
-                        for t in pending:
-                            t.cancel()
-                        done, _ = await asyncio.wait(pending, return_when=asyncio.ALL_COMPLETED)
-                        # Record canceled as zeros
-                        for task in done:
-                            try:
-                                i, y_true, pred, t_start, t_end, usage = task.result()
-                            except Exception:
-                                # Unknown index: we can't recover; skip as it's canceled before start
-                                continue
-                            # Already processed ones shouldn't be in pending; skip
-                        break
-                    # Wait for at least one completion within remaining time (polling granularity <= 1s)
-                    timeout = min(1.0, remaining)
-                    done, pending = await asyncio.wait(
-                        pending, timeout=timeout, return_when=asyncio.FIRST_COMPLETED
-                    )
-                    import contextlib
-                    for task in done:
-                        try:
-                            i, y_true, pred, t_start, t_end, usage = task.result()
-                        except BaseException:
-                            # Treat as failure/cancelled
-                            continue
-                        durations.append(max(0.0, t_end - t_start))
-                        preds[i] = pred
-                        processed += 1
-                        with contextlib.suppress(Exception):
-                            correct_sum += float(metric_fn(pred, y_true))
-                        with contextlib.suppress(Exception):
-                            pt = usage.get("prompt_tokens") or usage.get("input_tokens")
-                            ct = usage.get("completion_tokens") or usage.get("output_tokens")
-                            if isinstance(pt, (int, float)):
-                                in_tok_sum += int(pt)
-                                in_tok_count += 1
-                            if isinstance(ct, (int, float)):
-                                out_tok_sum += int(ct)
-                                out_tok_count += 1
-                        details.append(
-                            {
-                                "index": i,
-                                "seconds": max(0.0, t_end - t_start),
-                                "score": float(metric_fn(pred, y_true)),
-                                "usage": {
-                                    "prompt_tokens": usage.get("prompt_tokens")
-                                    or usage.get("input_tokens"),
-                                    "completion_tokens": usage.get("completion_tokens")
-                                    or usage.get("output_tokens"),
-                                },
-                            }
-                        )
-                        pbar.update(1)
-                        med = statistics.median(durations) if durations else 0.0
-                        mx = max(durations) if durations else 0.0
-                        avg_in = (in_tok_sum / in_tok_count) if in_tok_count else 0.0
-                        avg_out = (out_tok_sum / out_tok_count) if out_tok_count else 0.0
-                        pbar.set_postfix(
-                            {
-                                "acc": f"{(correct_sum / processed):.2f}",
-                                "done": f"{processed}/{len(tasks)}",
-                                "med_s": f"{med:.1f}",
-                                "max_s": f"{mx:.1f}",
-                                "tin": f"{avg_in:.1f}",
-                                "tout": f"{avg_out:.1f}",
-                            }
-                        )
-            # Compute score only from completed/successful rollouts (drop timeouts/cancelled)
-            subs = [float(d.get("score", 0.0)) for d in details]
-            result = SimpleNamespace(score=(sum(subs) / max(1, len(subs))), subscores=subs)
-            result.details = details
-            result.mean_in = (in_tok_sum / in_tok_count) if in_tok_count else 0.0
-            result.mean_out = (out_tok_sum / out_tok_count) if out_tok_count else 0.0
-            return result
-        return asyncio.run(_run())
-    pbar = tqdm(total=total_candidates, desc="Candidates")
-    candidate_eval_details: dict[int, Any] = {}
-    def on_cand(idx: int, score: float, res, intervention):
-        pbar.update(1)
-        pbar.set_postfix({"score": f"{score:.2f}"})
-        # store per-instance details (for apples-to-apples)
-        import contextlib
-        with contextlib.suppress(Exception):
-            candidate_eval_details[idx] = {
-                "score": score,
-                "mean_in": getattr(res, "mean_in", None),
-                "mean_out": getattr(res, "mean_out", None),
-                "instances": getattr(res, "details", None),
-            }
-        # visible summary line per candidate
-        kind = (
-            intervention.get("kind", "candidate") if isinstance(intervention, dict) else "candidate"
-        )
-        label = intervention.get("label") if isinstance(intervention, dict) else None
-        seed = intervention.get("seed") if isinstance(intervention, dict) else None
-        processed = len(getattr(res, "details", []) or [])
-        from tqdm import tqdm as _tqdm
-        _tqdm.write(
-            f"Candidate {idx}/{total_candidates} [{kind}{'' if label is None else f', label={label}'}{'' if seed is None else f', seed={seed}'}]: "
-            f"score={score:.2f} | mean tin/tout={getattr(res, 'mean_in', 0):.1f}/{getattr(res, 'mean_out', 0):.1f} | N={processed}"
-        )
-    best, records = random_search_compile(
-        student=student,
-        trainset=trainset,
-        valset=valset,
-        metric=metric,
-        evaluate_fn=eval_parallel,
-        max_bootstrapped_demos=0,
-        max_labeled_demos=4,
-        max_rounds=2,
-        num_candidate_programs=3,
-        on_candidate_evaluated=on_cand,
-    )
-    pbar.close()
-    # Evaluate best on holdout (valset) with parallel rollouts
-    print("Evaluating best program on val (parallel rollouts)...")
-    best_res = eval_parallel(best, valset, metric)
-    correct = int(round(best_res.score * max(1, len(best_res.subscores))))
-    print(
-        "Best program accuracy on val: "
-        f"{correct}/{len(valset)} ({best_res.score:.2%}) "
-        f"| mean tokens in/out: {getattr(best_res, 'mean_in', 0):.1f}/{getattr(best_res, 'mean_out', 0):.1f}"
-    )
-    # Save per-candidate scores and interventions
-    out = {
-        "context": {
-            "model": model,
-            "vendor": vendor,
-            "train_size": len(trainset),
-            "val_size": len(valset),
-        },
-        "candidates": records,
-        "candidate_eval_details": candidate_eval_details,
-        "best_eval_details": {
-            "score": best_res.score,
-            "mean_in": getattr(best_res, "mean_in", None),
-            "mean_out": getattr(best_res, "mean_out", None),
-            "instances": getattr(best_res, "details", None),
-        },
-    }
-    out_dir = Path(__file__).parent
-    fname = str(out_dir / f"random_search_banking77_{int(time.time())}.json")
-    with open(fname, "w") as f:
-        json.dump(out, f, indent=2)
-    print(f"Saved candidate records to {fname}")
-if __name__ == "__main__":
-    main()

synth_ai/rl/secrets.py DELETED Viewed

@@ -1,19 +0,0 @@
-from __future__ import annotations
-"""Helpers for generating RL environment credentials."""
-import secrets
-__all__ = ["mint_environment_api_key"]
-def mint_environment_api_key() -> str:
-    """Mint a random ENVIRONMENT_API_KEY value.
-    The current format is 64 hexadecimal characters (256 bits of entropy), which
-    matches the shell helpers used by the RL examples. This keeps the token easy
-    to copy while remaining suitably strong for authentication.
-    """
-    # secrets.token_hex(32) → 32 random bytes rendered as 64 hex characters.
-    return secrets.token_hex(32)

synth_ai/scripts/verify_rewards.py DELETED Viewed

@@ -1,100 +0,0 @@
-#!/usr/bin/env python3
-"""
-Verify reward persistence in a traces database.
-Usage:
-  uv run python -m synth_ai.scripts.verify_rewards --db /path/to/db.sqlite --min-reward 1
-"""
-import argparse
-import asyncio
-import os
-from typing import Dict
-from sqlalchemy import text
-from synth_ai.tracing_v3.turso.manager import AsyncSQLTraceManager
-async def verify(db_path: str, min_reward: int) -> int:
-    db_url = db_path
-    if not db_url.startswith("sqlite+aiosqlite:///"):
-        db_url = f"sqlite+aiosqlite:///{os.path.abspath(db_path)}"
-    mgr = AsyncSQLTraceManager(db_url=db_url)
-    await mgr.initialize()
-    try:
-        async with mgr.session() as session:
-            # Sessions with outcome_rewards
-            q_good = text(
-                """
-                SELECT session_id, MAX(total_reward) as total_reward
-                FROM outcome_rewards
-                GROUP BY session_id
-                """
-            )
-            res = await session.execute(q_good)
-            outcomes = {row[0]: int(row[1]) for row in res.fetchall()}
-            # Sessions without outcome_rewards
-            q_missing = text(
-                """
-                SELECT s.session_id
-                FROM session_traces s
-                LEFT JOIN outcome_rewards o ON s.session_id = o.session_id
-                WHERE o.session_id IS NULL
-                """
-            )
-            res2 = await session.execute(q_missing)
-            missing = [row[0] for row in res2.fetchall()]
-            # Aggregate event_rewards per session (informational)
-            q_event = text(
-                """
-                SELECT session_id, COALESCE(SUM(reward_value), 0.0) as sum_rewards
-                FROM event_rewards
-                GROUP BY session_id
-                """
-            )
-            res3 = await session.execute(q_event)
-            event_sums: Dict[str, float] = {row[0]: float(row[1]) for row in res3.fetchall()}
-        print(f"Sessions with outcome_rewards: {len(outcomes)}")
-        print(f"Sessions missing outcome_rewards: {len(missing)}")
-        if missing:
-            print("Missing session_ids:", ", ".join(missing[:10]) + (" ..." if len(missing) > 10 else ""))
-        # Threshold check
-        qualifying = {sid: r for sid, r in outcomes.items() if r >= min_reward}
-        print(f"Sessions with total_reward >= {min_reward}: {len(qualifying)}")
-        # Show a small comparison snapshot
-        sample = list(qualifying.items())[:5]
-        for sid, tot in sample:
-            er = event_sums.get(sid, 0.0)
-            print(f"  {sid}: outcome={tot}, sum(event_rewards)={er:.2f}")
-        # Exit non-zero if any sessions are missing outcome rewards
-        if missing:
-            return 2
-        if min_reward > 0 and not qualifying:
-            return 3
-        return 0
-    finally:
-        await mgr.close()
-def main() -> int:
-    ap = argparse.ArgumentParser(description="Verify reward persistence in traces DB")
-    ap.add_argument("--db", required=True, help="Path to traces SQLite DB (aiosqlite)")
-    ap.add_argument("--min-reward", type=int, default=0, help="Minimum total_reward to consider qualifying")
-    args = ap.parse_args()
-    return asyncio.run(verify(args.db, args.min_reward))
-if __name__ == "__main__":
-    raise SystemExit(main())

synth_ai/tracing/__init__.py DELETED Viewed

@@ -1,30 +0,0 @@
-import importlib as _importlib
-import sys as _sys
-_pkg = _importlib.import_module("synth_ai.v0.tracing")
-_sys.modules[__name__] = _pkg
-_SUBMODULES = [
-    "abstractions",
-    "base_client",
-    "client_manager",
-    "config",
-    "context",
-    "decorators",
-    "immediate_client",
-    "local",
-    "log_client_base",
-    "retry_queue",
-    "trackers",
-    "upload",
-    "utils",
-]
-for _m in _SUBMODULES:
-    _sys.modules[f"{__name__}.{_m}"] = _importlib.import_module(f"synth_ai.v0.tracing.{_m}")
-_events_pkg = _importlib.import_module("synth_ai.v0.tracing.events")
-_sys.modules[f"{__name__}.events"] = _events_pkg
-for _m in ["manage", "scope", "store"]:
-    _sys.modules[f"{__name__}.events.{_m}"] = _importlib.import_module(
-        f"synth_ai.v0.tracing.events.{_m}"
-    )

synth_ai/tracing_v1/__init__.py DELETED Viewed

@@ -1,33 +0,0 @@
-import importlib as _importlib
-import sys as _sys
-# Forward top-level package
-_pkg = _importlib.import_module("synth_ai.v0.tracing_v1")
-_sys.modules[__name__] = _pkg
-# Explicitly forward submodules so `synth_ai.tracing_v1.X` works
-_SUBMODULES = [
-    "abstractions",
-    "base_client",
-    "client_manager",
-    "config",
-    "context",
-    "decorators",
-    "immediate_client",
-    "local",
-    "log_client_base",
-    "retry_queue",
-    "trackers",
-    "upload",
-    "utils",
-]
-for _m in _SUBMODULES:
-    _sys.modules[f"{__name__}.{_m}"] = _importlib.import_module(f"synth_ai.v0.tracing_v1.{_m}")
-# Forward events package and its submodules
-_events_pkg = _importlib.import_module("synth_ai.v0.tracing_v1.events")
-_sys.modules[f"{__name__}.events"] = _events_pkg
-for _m in ["manage", "scope", "store"]:
-    _sys.modules[f"{__name__}.events.{_m}"] = _importlib.import_module(
-        f"synth_ai.v0.tracing_v1.events.{_m}"
-    )

synth_ai/tracing_v3/turso/__init__.py DELETED Viewed

@@ -1,25 +0,0 @@
-"""Turso/sqld implementation for tracing v3."""
-from .manager import AsyncSQLTraceManager
-from .models import (
-    Base,
-    Event,
-    Experiment,
-    Message,
-    SessionTimestep,
-    SessionTrace,
-    System,
-    SystemVersion,
-)
-__all__ = [
-    "AsyncSQLTraceManager",
-    "Base",
-    "SessionTrace",
-    "SessionTimestep",
-    "Event",
-    "Message",
-    "Experiment",
-    "System",
-    "SystemVersion",
-]

synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.9.dev4py3-none-any.whl → 0.2.9.dev6py3-none-any.whl