synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -2
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +6 -6
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +79 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +6 -6
- examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
- examples/sft/evaluate.py +2 -4
- examples/sft/export_dataset.py +7 -4
- examples/swe/task_app/README.md +33 -3
- examples/swe/task_app/grpo_swe_mini.py +4 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +0 -8
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +144 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +155 -17
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +61 -69
- synth_ai/cli/_modal_wrapper.py +7 -5
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/recent.py +2 -1
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +21 -0
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +110 -1499
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/http.py +8 -22
- synth_ai/inference/client.py +1 -1
- synth_ai/judge_schemas.py +4 -5
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +4 -2
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +4 -2
- synth_ai/task/config.py +6 -4
- synth_ai/task/rubrics/__init__.py +1 -2
- synth_ai/task/rubrics/loaders.py +14 -10
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +24 -11
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/session_tracer.py +7 -7
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -9
- synth_ai/tracing_v3/turso/native_manager.py +80 -72
- synth_ai/tracing_v3/utils.py +2 -2
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +294 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/tui.py +0 -57
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -906
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Trace hooks for Pokemon Red environment - v3 version.
|
|
3
|
+
Captures reward information and saves to Turso database.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
from synth_ai.tracing_v3.abstractions import BaseEvent, EnvironmentEvent
|
|
10
|
+
from synth_ai.tracing_v3.hooks import HookManager
|
|
11
|
+
|
|
12
|
+
# Pokemon Red achievement categories by reward value
|
|
13
|
+
EXPLORATION_ACHIEVEMENTS = {
|
|
14
|
+
0.02: "explore_new_area",
|
|
15
|
+
0.04: "explore_multiple_areas",
|
|
16
|
+
1.0: "leave_starting_area",
|
|
17
|
+
1.5: "enter_new_city",
|
|
18
|
+
2.0: "explore_new_route",
|
|
19
|
+
5.0: "enter_gym_building",
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
TRAINING_ACHIEVEMENTS = {
|
|
23
|
+
0.2: "pokemon_level_up",
|
|
24
|
+
0.3: "reach_power_level",
|
|
25
|
+
3.0: "pokemon_ready_for_battle",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
BATTLE_ACHIEVEMENTS = {
|
|
29
|
+
0.1: "encounter_wild_pokemon",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
RESOURCE_ACHIEVEMENTS = {
|
|
33
|
+
0.05: "keep_pokemon_healthy",
|
|
34
|
+
0.5: "find_valuable_item",
|
|
35
|
+
0.8: "visit_pokemon_center",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
MAJOR_ACHIEVEMENTS = {
|
|
39
|
+
50.0: "defeat_brock_win_badge",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
async def track_pokemon_rewards(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
|
|
44
|
+
"""Hook that captures detailed Pokemon Red reward information."""
|
|
45
|
+
# Only process EnvironmentEvents
|
|
46
|
+
if not isinstance(event_obj, EnvironmentEvent):
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
reward = event_obj.reward
|
|
50
|
+
if reward is None or reward == 0.0:
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
# Determine achievement type based on reward value
|
|
54
|
+
achievement_type = "unknown"
|
|
55
|
+
achievement_category = "other"
|
|
56
|
+
|
|
57
|
+
# Check each category
|
|
58
|
+
if reward in EXPLORATION_ACHIEVEMENTS:
|
|
59
|
+
achievement_type = EXPLORATION_ACHIEVEMENTS[reward]
|
|
60
|
+
achievement_category = "exploration"
|
|
61
|
+
elif reward in TRAINING_ACHIEVEMENTS:
|
|
62
|
+
achievement_type = TRAINING_ACHIEVEMENTS[reward]
|
|
63
|
+
achievement_category = "training"
|
|
64
|
+
elif reward in BATTLE_ACHIEVEMENTS:
|
|
65
|
+
achievement_type = BATTLE_ACHIEVEMENTS[reward]
|
|
66
|
+
achievement_category = "battle"
|
|
67
|
+
elif reward in RESOURCE_ACHIEVEMENTS:
|
|
68
|
+
achievement_type = RESOURCE_ACHIEVEMENTS[reward]
|
|
69
|
+
achievement_category = "resource"
|
|
70
|
+
elif reward in MAJOR_ACHIEVEMENTS:
|
|
71
|
+
achievement_type = MAJOR_ACHIEVEMENTS[reward]
|
|
72
|
+
achievement_category = "major"
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
"reward_value": reward,
|
|
76
|
+
"achievement_type": achievement_type,
|
|
77
|
+
"achievement_category": achievement_category,
|
|
78
|
+
"timestamp": datetime.now().isoformat(),
|
|
79
|
+
"system_state_before": event_obj.system_state_before,
|
|
80
|
+
"system_state_after": event_obj.system_state_after,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
async def track_pokemon_milestones(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
|
|
85
|
+
"""Hook that tracks significant Pokemon Red milestones."""
|
|
86
|
+
# Only process EnvironmentEvents
|
|
87
|
+
if not isinstance(event_obj, EnvironmentEvent):
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
reward = event_obj.reward
|
|
91
|
+
if reward is None:
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
# Track major milestones
|
|
95
|
+
if reward >= 1.0: # Significant progress rewards
|
|
96
|
+
return {
|
|
97
|
+
"milestone": "major_progress",
|
|
98
|
+
"reward": reward,
|
|
99
|
+
"timestamp": datetime.now().isoformat(),
|
|
100
|
+
}
|
|
101
|
+
elif reward >= 0.5: # Moderate rewards
|
|
102
|
+
return {
|
|
103
|
+
"milestone": "moderate_progress",
|
|
104
|
+
"reward": reward,
|
|
105
|
+
"timestamp": datetime.now().isoformat(),
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
async def track_pokemon_outcomes(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
|
|
112
|
+
"""Hook that tracks episode outcomes for Pokemon Red."""
|
|
113
|
+
# Only process EnvironmentEvents
|
|
114
|
+
if not isinstance(event_obj, EnvironmentEvent):
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
# Check for termination conditions
|
|
118
|
+
if event_obj.terminated or event_obj.truncated:
|
|
119
|
+
total_reward = getattr(event_obj, 'total_reward', 0.0)
|
|
120
|
+
steps_taken = getattr(event_obj, 'step_count', 0)
|
|
121
|
+
|
|
122
|
+
# Extract achievement information from system state
|
|
123
|
+
achievements_count = 0
|
|
124
|
+
if event_obj.system_state_after:
|
|
125
|
+
# Count positive rewards as achievements
|
|
126
|
+
# This is a simplified count - in practice you'd track actual achievements
|
|
127
|
+
achievements_count = max(1, int(total_reward / 0.1)) # Rough estimate
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
"outcome_type": "episode_end",
|
|
131
|
+
"total_reward": total_reward,
|
|
132
|
+
"steps_taken": steps_taken,
|
|
133
|
+
"achievements_count": achievements_count,
|
|
134
|
+
"terminated": event_obj.terminated,
|
|
135
|
+
"truncated": event_obj.truncated,
|
|
136
|
+
"timestamp": datetime.now().isoformat(),
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# Create the global POKEMON_RED_HOOKS instance
|
|
143
|
+
POKEMON_RED_HOOKS = HookManager()
|
|
144
|
+
|
|
145
|
+
# Register all hooks
|
|
146
|
+
POKEMON_RED_HOOKS.register(
|
|
147
|
+
"event_recorded",
|
|
148
|
+
track_pokemon_rewards,
|
|
149
|
+
name="pokemon_rewards",
|
|
150
|
+
priority=10,
|
|
151
|
+
event_types=["environment"],
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
POKEMON_RED_HOOKS.register(
|
|
155
|
+
"event_recorded",
|
|
156
|
+
track_pokemon_milestones,
|
|
157
|
+
name="pokemon_milestones",
|
|
158
|
+
priority=5,
|
|
159
|
+
event_types=["environment"],
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
POKEMON_RED_HOOKS.register(
|
|
163
|
+
"event_recorded",
|
|
164
|
+
track_pokemon_outcomes,
|
|
165
|
+
name="pokemon_outcomes",
|
|
166
|
+
priority=5,
|
|
167
|
+
event_types=["environment"],
|
|
168
|
+
)
|
synth_ai/evals/base.py
CHANGED
|
@@ -1,13 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
1
7
|
class Judgement:
|
|
2
8
|
def __init__(
|
|
3
|
-
self,
|
|
4
|
-
|
|
9
|
+
self,
|
|
10
|
+
criteria: str,
|
|
11
|
+
score: float,
|
|
12
|
+
reasoning: str = "",
|
|
13
|
+
evidence: list[str] | None = None,
|
|
14
|
+
) -> None:
|
|
5
15
|
self.criteria = criteria
|
|
6
16
|
self.score = score
|
|
7
17
|
self.reasoning = reasoning
|
|
8
18
|
self.evidence = evidence or []
|
|
9
19
|
|
|
10
20
|
|
|
11
|
-
class BaseEval:
|
|
12
|
-
|
|
13
|
-
|
|
21
|
+
class BaseEval(ABC):
|
|
22
|
+
@abstractmethod
|
|
23
|
+
async def run(self, data: Any) -> list[Judgement]:
|
|
24
|
+
"""Execute the evaluation and return a list of judgements."""
|
synth_ai/evals/client.py
CHANGED
|
@@ -10,7 +10,7 @@ import os
|
|
|
10
10
|
import warnings
|
|
11
11
|
from typing import Any, Literal, TypedDict
|
|
12
12
|
|
|
13
|
-
from synth_ai.
|
|
13
|
+
from synth_ai.http_client import AsyncHttpClient, HTTPError
|
|
14
14
|
from synth_ai.tracing_v3.serialization import normalize_for_json
|
|
15
15
|
|
|
16
16
|
Provider = Literal["groq", "gemini"]
|
synth_ai/http.py
CHANGED
|
@@ -1,26 +1,12 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
Backward-compatible HTTP client exports.
|
|
3
|
+
|
|
4
|
+
Historically, some modules imported ``synth_ai.http``. The canonical location
|
|
5
|
+
is ``synth_ai.http_client``; this module simply re-exports the same symbols so
|
|
6
|
+
legacy imports keep working.
|
|
5
7
|
"""
|
|
6
8
|
|
|
7
|
-
try:
|
|
8
|
-
from synth_ai.http_client import * # type: ignore F401,F403
|
|
9
|
-
except Exception:
|
|
10
|
-
try:
|
|
11
|
-
from .http_client import * # type: ignore F401,F403
|
|
12
|
-
except Exception:
|
|
13
|
-
import importlib.util as _ilu
|
|
14
|
-
import sys as _sys
|
|
15
|
-
from pathlib import Path as _Path
|
|
16
9
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
if not _spec or not _spec.loader:
|
|
21
|
-
raise ImportError("Could not load http_client module") from None
|
|
22
|
-
_mod = _ilu.module_from_spec(_spec)
|
|
23
|
-
_spec.loader.exec_module(_mod)
|
|
24
|
-
_sys.modules["synth_ai.http_client"] = _mod
|
|
25
|
-
for _name in ("HTTPError", "AsyncHttpClient", "sleep"):
|
|
26
|
-
globals()[_name] = getattr(_mod, _name)
|
|
10
|
+
from synth_ai.http_client import AsyncHttpClient, HTTPError, sleep
|
|
11
|
+
|
|
12
|
+
__all__ = ["AsyncHttpClient", "HTTPError", "sleep"]
|
synth_ai/inference/client.py
CHANGED
synth_ai/judge_schemas.py
CHANGED
|
@@ -9,7 +9,7 @@ This is the canonical contract that the backend MUST conform to.
|
|
|
9
9
|
|
|
10
10
|
from __future__ import annotations
|
|
11
11
|
|
|
12
|
-
from typing import Any,
|
|
12
|
+
from typing import Any, Literal, Optional
|
|
13
13
|
|
|
14
14
|
from pydantic import BaseModel, Field
|
|
15
15
|
|
|
@@ -63,7 +63,7 @@ class JudgeScoreResponse(BaseModel):
|
|
|
63
63
|
description="Request metadata (provider, options, etc.)"
|
|
64
64
|
)
|
|
65
65
|
|
|
66
|
-
def aggregate_event_reward(self) -> float
|
|
66
|
+
def aggregate_event_reward(self) -> Optional[float]:
|
|
67
67
|
"""
|
|
68
68
|
Aggregate all event totals into a single reward.
|
|
69
69
|
|
|
@@ -74,7 +74,7 @@ class JudgeScoreResponse(BaseModel):
|
|
|
74
74
|
return None
|
|
75
75
|
return sum(self.event_totals)
|
|
76
76
|
|
|
77
|
-
def aggregate_outcome_reward(self) -> float
|
|
77
|
+
def aggregate_outcome_reward(self) -> Optional[float]:
|
|
78
78
|
"""
|
|
79
79
|
Extract outcome reward from outcome_review.
|
|
80
80
|
|
|
@@ -123,5 +123,4 @@ class JudgeScoreRequest(BaseModel):
|
|
|
123
123
|
task_app: JudgeTaskApp = Field(..., description="Task application metadata")
|
|
124
124
|
trace: JudgeTracePayload = Field(..., description="Trajectory trace to evaluate")
|
|
125
125
|
options: JudgeOptions = Field(default_factory=lambda: JudgeOptions(), description="Judge options")
|
|
126
|
-
rubric: Optional[
|
|
127
|
-
|
|
126
|
+
rubric: Optional[dict[str, Any]] = Field(None, description="Optional explicit rubric criteria")
|
synth_ai/learning/client.py
CHANGED
|
@@ -11,7 +11,7 @@ from synth_ai.api.models.supported import (
|
|
|
11
11
|
)
|
|
12
12
|
from synth_ai.learning.sft.config import prepare_sft_job_payload
|
|
13
13
|
|
|
14
|
-
from ..http import AsyncHttpClient, HTTPError, sleep
|
|
14
|
+
from .._utils.http import AsyncHttpClient, HTTPError, sleep
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class LearningClient:
|
synth_ai/learning/health.py
CHANGED
synth_ai/learning/jobs.py
CHANGED
|
@@ -5,7 +5,7 @@ from collections.abc import Callable
|
|
|
5
5
|
from contextlib import suppress
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
-
from ..http import AsyncHttpClient, sleep
|
|
8
|
+
from .._utils.http import AsyncHttpClient, sleep
|
|
9
9
|
from .constants import TERMINAL_EVENT_FAILURE, TERMINAL_EVENT_SUCCESS, TERMINAL_STATUSES
|
|
10
10
|
|
|
11
11
|
|
synth_ai/learning/rl/client.py
CHANGED
|
@@ -10,7 +10,7 @@ from synth_ai.api.models.supported import (
|
|
|
10
10
|
normalize_model_identifier,
|
|
11
11
|
)
|
|
12
12
|
|
|
13
|
-
from ...http import AsyncHttpClient, HTTPError, sleep
|
|
13
|
+
from ..._utils.http import AsyncHttpClient, HTTPError, sleep
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def _api_base(b: str) -> str:
|
|
@@ -107,7 +107,9 @@ class RlClient:
|
|
|
107
107
|
async with AsyncHttpClient(self._base_url, self._api_key, timeout=30.0) as http:
|
|
108
108
|
try:
|
|
109
109
|
js = await http.get(
|
|
110
|
-
f"{_api_base(self._base_url)}/learning/jobs/{job_id}/events",
|
|
110
|
+
f"{_api_base(self._base_url)}/learning/jobs/{job_id}/events",
|
|
111
|
+
params=params,
|
|
112
|
+
headers={"accept": "application/json"},
|
|
111
113
|
)
|
|
112
114
|
except HTTPError as he:
|
|
113
115
|
with suppress(Exception):
|
synth_ai/learning/rl/env_keys.py
CHANGED
synth_ai/learning/rl/secrets.py
CHANGED
synth_ai/learning/sft/client.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
from ...http import AsyncHttpClient, HTTPError
|
|
6
|
+
from ..._utils.http import AsyncHttpClient, HTTPError
|
|
7
7
|
from .config import prepare_sft_job_payload
|
|
8
8
|
from .data import validate_jsonl_or_raise
|
|
9
9
|
|