synth-ai 0.2.14__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/__init__.py +19 -40
- synth_ai/__main__.py +30 -3
- synth_ai/cli/__init__.py +105 -70
- synth_ai/cli/__main__.py +42 -0
- synth_ai/cli/_internal/__init__.py +5 -0
- synth_ai/cli/_internal/modal_wrapper.py +31 -0
- synth_ai/cli/_internal/storage.py +20 -0
- synth_ai/cli/_internal/typer_patch.py +47 -0
- synth_ai/cli/_internal/validate_task_app.py +29 -0
- synth_ai/cli/agents/__init__.py +17 -0
- synth_ai/cli/agents/claude.py +77 -0
- synth_ai/cli/agents/codex.py +265 -0
- synth_ai/cli/agents/opencode.py +253 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/artifacts/__init__.py +13 -0
- synth_ai/cli/commands/artifacts/client.py +119 -0
- synth_ai/cli/commands/artifacts/config.py +57 -0
- synth_ai/cli/commands/artifacts/core.py +24 -0
- synth_ai/cli/commands/artifacts/download.py +188 -0
- synth_ai/cli/commands/artifacts/export.py +186 -0
- synth_ai/cli/commands/artifacts/list.py +156 -0
- synth_ai/cli/commands/artifacts/parsing.py +250 -0
- synth_ai/cli/commands/artifacts/show.py +336 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +636 -0
- synth_ai/cli/commands/baseline/list.py +94 -0
- synth_ai/cli/commands/demo/__init__.py +3 -0
- synth_ai/cli/commands/demo/core.py +153 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1113 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/scan/__init__.py +19 -0
- synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
- synth_ai/cli/commands/scan/core.py +344 -0
- synth_ai/cli/commands/scan/health_checker.py +242 -0
- synth_ai/cli/commands/scan/local_scanner.py +278 -0
- synth_ai/cli/commands/scan/models.py +83 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1438 -0
- synth_ai/cli/commands/status/__init__.py +66 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +23 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/session.py +182 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +22 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +201 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
- synth_ai/cli/commands/train/validation.py +392 -0
- synth_ai/cli/demo_apps/__init__.py +10 -0
- synth_ai/cli/demo_apps/core/__init__.py +28 -0
- synth_ai/cli/demo_apps/core/cli.py +1735 -0
- synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
- synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/cli/demo_apps/demo_registry.py +176 -0
- synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
- synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +742 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
- synth_ai/cli/demo_apps/math/__init__.py +1 -0
- synth_ai/cli/demo_apps/math/_common.py +16 -0
- synth_ai/cli/demo_apps/math/app.py +38 -0
- synth_ai/cli/demo_apps/math/config.toml +76 -0
- synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
- synth_ai/cli/demo_apps/math/modal_task_app.py +702 -0
- synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
- synth_ai/cli/demo_apps/mipro/main.py +271 -0
- synth_ai/cli/demo_apps/mipro/task_app.py +933 -0
- synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
- synth_ai/cli/demos/__init__.py +12 -0
- synth_ai/cli/demos/demo.py +32 -0
- synth_ai/cli/demos/rl_demo.py +254 -0
- synth_ai/cli/deploy.py +216 -0
- synth_ai/cli/infra/__init__.py +14 -0
- synth_ai/cli/infra/balance.py +216 -0
- synth_ai/cli/infra/mcp.py +35 -0
- synth_ai/cli/infra/modal_app.py +36 -0
- synth_ai/cli/infra/setup.py +69 -0
- synth_ai/cli/infra/status.py +16 -0
- synth_ai/cli/infra/turso.py +77 -0
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/agents.py +76 -0
- synth_ai/cli/lib/apps/modal_app.py +101 -0
- synth_ai/cli/lib/apps/task_app.py +643 -0
- synth_ai/cli/lib/bin.py +39 -0
- synth_ai/cli/lib/env.py +375 -0
- synth_ai/cli/lib/errors.py +85 -0
- synth_ai/cli/lib/modal.py +315 -0
- synth_ai/cli/lib/plotting.py +126 -0
- synth_ai/cli/lib/prompt_args.py +39 -0
- synth_ai/cli/lib/prompts.py +284 -0
- synth_ai/cli/lib/sqld.py +122 -0
- synth_ai/cli/lib/task_app_discovery.py +884 -0
- synth_ai/cli/lib/task_app_env.py +295 -0
- synth_ai/cli/lib/train_cfgs.py +300 -0
- synth_ai/cli/lib/tunnel_records.py +207 -0
- synth_ai/cli/local/__init__.py +14 -0
- synth_ai/cli/local/experiment_queue/__init__.py +72 -0
- synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
- synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
- synth_ai/cli/local/experiment_queue/config.py +128 -0
- synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
- synth_ai/cli/local/experiment_queue/database.py +175 -0
- synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
- synth_ai/cli/local/experiment_queue/models.py +231 -0
- synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
- synth_ai/cli/local/experiment_queue/results.py +373 -0
- synth_ai/cli/local/experiment_queue/schemas.py +131 -0
- synth_ai/cli/local/experiment_queue/service.py +344 -0
- synth_ai/cli/local/experiment_queue/status.py +372 -0
- synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
- synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
- synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
- synth_ai/cli/local/experiment_queue/validation.py +157 -0
- synth_ai/cli/local/session/__init__.py +92 -0
- synth_ai/cli/local/session/client.py +383 -0
- synth_ai/cli/local/session/constants.py +63 -0
- synth_ai/cli/local/session/exceptions.py +105 -0
- synth_ai/cli/local/session/manager.py +139 -0
- synth_ai/cli/local/session/models.py +89 -0
- synth_ai/cli/local/session/query.py +110 -0
- synth_ai/cli/root.py +30 -6
- synth_ai/cli/task_apps/__init__.py +26 -0
- synth_ai/cli/task_apps/commands.py +3153 -0
- synth_ai/cli/task_apps/deploy.py +7 -0
- synth_ai/cli/task_apps/list.py +26 -0
- synth_ai/cli/task_apps/main.py +36 -0
- synth_ai/cli/task_apps/modal_serve.py +11 -0
- synth_ai/cli/task_apps/serve.py +11 -0
- synth_ai/cli/training/__init__.py +8 -0
- synth_ai/cli/training/train.py +5 -0
- synth_ai/cli/training/train_cfg.py +34 -0
- synth_ai/cli/training/watch.py +506 -0
- synth_ai/cli/turso.py +34 -55
- synth_ai/cli/usage.py +159 -0
- synth_ai/cli/utils/__init__.py +8 -0
- synth_ai/cli/utils/experiments.py +235 -0
- synth_ai/cli/utils/queue.py +504 -0
- synth_ai/cli/utils/recent.py +133 -0
- synth_ai/cli/utils/traces.py +164 -0
- synth_ai/contracts/__init__.py +67 -0
- synth_ai/core/__init__.py +100 -0
- synth_ai/core/_utils/__init__.py +54 -0
- synth_ai/core/_utils/base_url.py +10 -0
- synth_ai/core/_utils/http.py +10 -0
- synth_ai/core/_utils/prompts.py +14 -0
- synth_ai/core/_utils/task_app_state.py +12 -0
- synth_ai/core/_utils/user_config.py +10 -0
- synth_ai/core/apps/common.py +116 -0
- synth_ai/core/auth.py +95 -0
- synth_ai/core/cfgs.py +240 -0
- synth_ai/core/config/__init__.py +16 -0
- synth_ai/core/config/base.py +168 -0
- synth_ai/core/config/resolver.py +89 -0
- synth_ai/core/env.py +220 -0
- synth_ai/core/errors.py +126 -0
- synth_ai/core/http.py +230 -0
- synth_ai/core/integrations/__init__.py +11 -0
- synth_ai/core/integrations/cloudflare.py +1710 -0
- synth_ai/core/integrations/mcp/__init__.py +6 -0
- synth_ai/core/integrations/mcp/__main__.py +8 -0
- synth_ai/core/integrations/mcp/claude.py +36 -0
- synth_ai/core/integrations/mcp/main.py +254 -0
- synth_ai/core/integrations/mcp/setup.py +100 -0
- synth_ai/core/integrations/modal.py +277 -0
- synth_ai/core/json.py +72 -0
- synth_ai/core/log_filter.py +99 -0
- synth_ai/core/logging.py +82 -0
- synth_ai/core/paths.py +107 -0
- synth_ai/core/pricing.py +109 -0
- synth_ai/core/process.py +233 -0
- synth_ai/core/ssl.py +25 -0
- synth_ai/core/storage/__init__.py +71 -0
- synth_ai/core/task_app_state.py +318 -0
- synth_ai/core/telemetry.py +282 -0
- synth_ai/core/tracing_v3/__init__.py +99 -0
- synth_ai/core/tracing_v3/abstractions.py +302 -0
- synth_ai/core/tracing_v3/config.py +229 -0
- synth_ai/core/tracing_v3/constants.py +21 -0
- synth_ai/core/tracing_v3/db_config.py +182 -0
- synth_ai/core/tracing_v3/decorators.py +401 -0
- synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
- synth_ai/core/tracing_v3/migration_helper.py +119 -0
- synth_ai/core/tracing_v3/session_tracer.py +542 -0
- synth_ai/core/tracing_v3/storage/base.py +211 -0
- synth_ai/core/tracing_v3/storage/config.py +109 -0
- synth_ai/core/tracing_v3/storage/factory.py +39 -0
- synth_ai/core/tracing_v3/trace_utils.py +326 -0
- synth_ai/core/tracing_v3/turso/daemon.py +278 -0
- synth_ai/core/tracing_v3/turso/models.py +470 -0
- synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
- synth_ai/core/tracing_v3/utils.py +108 -0
- synth_ai/core/urls.py +18 -0
- synth_ai/core/user_config.py +137 -0
- synth_ai/core/uvicorn.py +222 -0
- synth_ai/data/__init__.py +110 -0
- synth_ai/data/enums.py +141 -0
- synth_ai/data/rewards.py +152 -0
- synth_ai/data/specs.py +36 -0
- synth_ai/data/traces.py +35 -0
- synth_ai/products/__init__.py +6 -0
- synth_ai/products/graph_evolve/__init__.py +46 -0
- synth_ai/products/graph_evolve/client.py +226 -0
- synth_ai/products/graph_evolve/config.py +591 -0
- synth_ai/products/graph_evolve/converters/__init__.py +42 -0
- synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
- synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
- synth_ai/products/graph_evolve/run.py +222 -0
- synth_ai/sdk/__init__.py +119 -0
- synth_ai/sdk/api/__init__.py +1 -0
- synth_ai/sdk/api/models/supported.py +514 -0
- synth_ai/sdk/api/research_agent/__init__.py +86 -0
- synth_ai/sdk/api/research_agent/cli.py +428 -0
- synth_ai/sdk/api/research_agent/config.py +357 -0
- synth_ai/sdk/api/research_agent/job.py +717 -0
- synth_ai/sdk/api/train/__init__.py +85 -0
- synth_ai/sdk/api/train/builders.py +895 -0
- synth_ai/sdk/api/train/cli.py +2188 -0
- synth_ai/sdk/api/train/config_finder.py +267 -0
- synth_ai/sdk/api/train/configs/__init__.py +65 -0
- synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
- synth_ai/sdk/api/train/configs/rl.py +188 -0
- synth_ai/sdk/api/train/configs/sft.py +99 -0
- synth_ai/sdk/api/train/configs/shared.py +81 -0
- synth_ai/sdk/api/train/context_learning.py +312 -0
- synth_ai/sdk/api/train/env_resolver.py +418 -0
- synth_ai/sdk/api/train/graph_validators.py +216 -0
- synth_ai/sdk/api/train/graphgen.py +984 -0
- synth_ai/sdk/api/train/graphgen_models.py +823 -0
- synth_ai/sdk/api/train/graphgen_validators.py +109 -0
- synth_ai/sdk/api/train/pollers.py +124 -0
- synth_ai/sdk/api/train/progress/__init__.py +97 -0
- synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
- synth_ai/sdk/api/train/progress/events.py +326 -0
- synth_ai/sdk/api/train/progress/results.py +428 -0
- synth_ai/sdk/api/train/progress/tracker.py +641 -0
- synth_ai/sdk/api/train/prompt_learning.py +470 -0
- synth_ai/sdk/api/train/rl.py +442 -0
- synth_ai/sdk/api/train/sft.py +396 -0
- synth_ai/sdk/api/train/summary.py +522 -0
- synth_ai/sdk/api/train/supported_algos.py +147 -0
- synth_ai/sdk/api/train/task_app.py +331 -0
- synth_ai/sdk/api/train/utils.py +279 -0
- synth_ai/sdk/api/train/validators.py +2424 -0
- synth_ai/sdk/baseline/__init__.py +25 -0
- synth_ai/sdk/baseline/config.py +209 -0
- synth_ai/sdk/baseline/discovery.py +216 -0
- synth_ai/sdk/baseline/execution.py +154 -0
- synth_ai/sdk/graphs/__init__.py +15 -0
- synth_ai/sdk/graphs/completions.py +570 -0
- synth_ai/sdk/inference/__init__.py +6 -0
- synth_ai/sdk/inference/client.py +128 -0
- synth_ai/sdk/jobs/__init__.py +16 -0
- synth_ai/sdk/jobs/client.py +371 -0
- synth_ai/sdk/judging/__init__.py +15 -0
- synth_ai/sdk/judging/base.py +24 -0
- synth_ai/sdk/judging/client.py +191 -0
- synth_ai/sdk/judging/schemas.py +222 -0
- synth_ai/sdk/learning/__init__.py +69 -0
- synth_ai/sdk/learning/client.py +240 -0
- synth_ai/sdk/learning/ft_client.py +7 -0
- synth_ai/sdk/learning/health.py +49 -0
- synth_ai/sdk/learning/jobs.py +202 -0
- synth_ai/sdk/learning/prompt_extraction.py +334 -0
- synth_ai/sdk/learning/prompt_learning_client.py +455 -0
- synth_ai/sdk/learning/prompt_learning_types.py +185 -0
- synth_ai/sdk/learning/rl/client.py +268 -0
- synth_ai/sdk/learning/rl/contracts.py +27 -0
- synth_ai/sdk/learning/rl/env_keys.py +166 -0
- synth_ai/sdk/learning/rl/secrets.py +13 -0
- synth_ai/sdk/learning/sft/client.py +95 -0
- synth_ai/sdk/learning/sft/config.py +270 -0
- synth_ai/sdk/learning/sft/data.py +698 -0
- synth_ai/sdk/learning/validators.py +52 -0
- synth_ai/sdk/research_agent/__init__.py +34 -0
- synth_ai/sdk/research_agent/container_builder.py +328 -0
- synth_ai/sdk/research_agent/container_spec.py +198 -0
- synth_ai/sdk/research_agent/defaults.py +34 -0
- synth_ai/sdk/research_agent/results_collector.py +69 -0
- synth_ai/sdk/specs/__init__.py +46 -0
- synth_ai/sdk/specs/dataclasses.py +149 -0
- synth_ai/sdk/specs/loader.py +144 -0
- synth_ai/sdk/specs/serializer.py +199 -0
- synth_ai/sdk/specs/validation.py +250 -0
- synth_ai/sdk/streaming/__init__.py +35 -0
- synth_ai/sdk/streaming/config.py +94 -0
- synth_ai/sdk/streaming/handlers.py +1997 -0
- synth_ai/sdk/streaming/streamer.py +704 -0
- synth_ai/sdk/streaming/types.py +112 -0
- synth_ai/sdk/task/__init__.py +151 -0
- synth_ai/sdk/task/apps/__init__.py +133 -0
- synth_ai/sdk/task/config.py +261 -0
- synth_ai/sdk/task/contracts.py +298 -0
- synth_ai/sdk/task/datasets.py +108 -0
- synth_ai/sdk/task/in_process.py +1190 -0
- synth_ai/sdk/task/in_process_runner.py +309 -0
- synth_ai/sdk/task/inference_api.py +299 -0
- synth_ai/sdk/task/proxy.py +287 -0
- synth_ai/sdk/task/rubrics/__init__.py +55 -0
- synth_ai/sdk/task/rubrics/loaders.py +156 -0
- synth_ai/sdk/task/rubrics.py +219 -0
- synth_ai/sdk/task/server.py +580 -0
- synth_ai/sdk/task/trace_correlation_helpers.py +506 -0
- synth_ai/sdk/task/tracing_utils.py +95 -0
- synth_ai/sdk/task/validators.py +456 -0
- synth_ai/sdk/tracing/__init__.py +39 -0
- synth_ai/sdk/training/__init__.py +102 -0
- synth_ai/sdk/usage/__init__.py +37 -0
- synth_ai/sdk/usage/client.py +171 -0
- synth_ai/sdk/usage/models.py +261 -0
- synth_ai/utils/__init__.py +213 -0
- synth_ai-0.4.1.dist-info/METADATA +195 -0
- synth_ai-0.4.1.dist-info/RECORD +379 -0
- synth_ai-0.4.1.dist-info/top_level.txt +1 -0
- examples/__init__.py +0 -16
- examples/analyze_semantic_words.sh +0 -17
- examples/crafter_debug_render.py +0 -186
- examples/dev/qwen3_32b_qlora_4xh100.toml +0 -40
- examples/multi_step/configs/README_verilog_rl.md +0 -77
- examples/multi_step/configs/VERILOG_REWARDS.md +0 -90
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +0 -183
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +0 -35
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +0 -36
- examples/multi_step/configs/crafter_rl_outcome.toml +0 -74
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +0 -187
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +0 -83
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +0 -78
- examples/multi_step/configs/crafter_synth_backend.md +0 -40
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +0 -31
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +0 -33
- examples/multi_step/configs/verilog_rl_lora.toml +0 -190
- examples/multi_step/crafter_rl_lora.md +0 -70
- examples/multi_step/judges/crafter_backend_judge.py +0 -220
- examples/multi_step/judges/verilog_backend_judge.py +0 -234
- examples/multi_step/readme.md +0 -48
- examples/multi_step/sse_metrics_streaming_notes.md +0 -357
- examples/multi_step/task_app_config_notes.md +0 -494
- examples/multi_step/verilog_rl_lora.md +0 -218
- examples/qwen_coder/README.md +0 -102
- examples/qwen_coder/_shared.py +0 -113
- examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
- examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
- examples/qwen_coder/configs/coder_lora_small.toml +0 -58
- examples/qwen_coder/generate_dataset.py +0 -98
- examples/qwen_coder/infer_ft_smoke.py +0 -65
- examples/qwen_coder/infer_prod_proxy.py +0 -73
- examples/qwen_coder/infer_via_synth.py +0 -87
- examples/qwen_coder/scripts/infer_coder.sh +0 -19
- examples/qwen_coder/scripts/train_coder_30b.sh +0 -22
- examples/qwen_coder/sft_full_17b.py +0 -103
- examples/qwen_coder/sft_lora_30b.py +0 -110
- examples/qwen_coder/subset_jsonl.py +0 -39
- examples/qwen_coder/todos.md +0 -38
- examples/qwen_coder/validate_jsonl.py +0 -60
- examples/rl/README.md +0 -169
- examples/rl/download_dataset.py +0 -80
- examples/run_crafter_demo.sh +0 -10
- examples/sft/README.md +0 -139
- examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
- examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
- examples/sft/evaluate.py +0 -119
- examples/sft/export_dataset.py +0 -117
- examples/sft/generate_traces.py +0 -164
- examples/swe/__init__.py +0 -12
- examples/swe/task_app/README.md +0 -105
- examples/swe/task_app/__init__.py +0 -2
- examples/swe/task_app/grpo_swe_mini.py +0 -601
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
- examples/swe/task_app/hosted/README.md +0 -173
- examples/swe/task_app/hosted/__init__.py +0 -5
- examples/swe/task_app/hosted/branching.py +0 -143
- examples/swe/task_app/hosted/environment_routes.py +0 -1289
- examples/swe/task_app/hosted/envs/__init__.py +0 -1
- examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
- examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
- examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
- examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
- examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
- examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
- examples/swe/task_app/hosted/hosted_app.py +0 -204
- examples/swe/task_app/hosted/inference/__init__.py +0 -5
- examples/swe/task_app/hosted/inference/openai_client.py +0 -618
- examples/swe/task_app/hosted/main.py +0 -100
- examples/swe/task_app/hosted/policy_routes.py +0 -1079
- examples/swe/task_app/hosted/registry.py +0 -195
- examples/swe/task_app/hosted/rollout.py +0 -1911
- examples/swe/task_app/hosted/storage/__init__.py +0 -5
- examples/swe/task_app/hosted/storage/volume.py +0 -211
- examples/swe/task_app/hosted/test_agents.py +0 -161
- examples/swe/task_app/hosted/test_service.py +0 -136
- examples/swe/task_app/hosted/utils.py +0 -62
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +0 -258
- examples/task_apps/TESTING.md +0 -275
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +0 -273
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +0 -152
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +0 -174
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +0 -268
- examples/task_apps/crafter/QUERY_EXAMPLES.md +0 -203
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +0 -316
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +0 -28
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +0 -36
- examples/task_apps/crafter/filter_sft_dataset.toml +0 -16
- examples/task_apps/crafter/task_app/README.md +0 -42
- examples/task_apps/crafter/task_app/__init__.py +0 -5
- examples/task_apps/crafter/task_app/grpo_crafter.py +0 -973
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +0 -146
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +0 -173
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +0 -5
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +0 -143
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +0 -1226
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +0 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -532
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -547
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -123
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +0 -204
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +0 -5
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +0 -704
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +0 -100
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +0 -1152
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +0 -195
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +0 -2160
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +0 -5
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +0 -211
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +0 -161
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +0 -136
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +0 -218
- examples/task_apps/dev/pokemon_emerald/__init__.py +0 -2
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +0 -811
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +0 -120
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +0 -160
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +0 -155
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +0 -69
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +0 -96
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +0 -1502
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +0 -4
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +0 -68
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +0 -216
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +0 -35
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +0 -631
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +0 -1544
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +0 -1428
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +0 -4848
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +0 -41
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +0 -298
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +0 -95
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +0 -204
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +0 -2152
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +0 -429
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +0 -155
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +0 -78
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +0 -122
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +0 -76
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +0 -413
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +0 -204
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +0 -133
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +0 -229
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +0 -300
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +0 -205
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +0 -200
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +0 -284
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +0 -468
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +0 -575
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +0 -311
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +0 -259
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +0 -372
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +0 -296
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +0 -275
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +0 -22
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +0 -44
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +0 -514
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +0 -415
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +0 -1763
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +0 -33
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +0 -106
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +0 -334
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +0 -1020
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +0 -188
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +0 -1481
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +0 -862
- examples/task_apps/dev/pokemon_emerald/modal_app.py +0 -114
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +0 -81
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +0 -6
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +0 -685
- examples/task_apps/enron/__init__.py +0 -1
- examples/task_apps/enron/eval_groq_qwen32.toml +0 -16
- examples/task_apps/enron/filter_sft.toml +0 -5
- examples/task_apps/enron/task_app/README.md +0 -14
- examples/task_apps/enron/task_app/__init__.py +0 -1
- examples/task_apps/enron/task_app/grpo_enron.py +0 -906
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +0 -146
- examples/task_apps/enron/tests/__init__.py +0 -4
- examples/task_apps/enron/tests/conftest.py +0 -115
- examples/task_apps/enron/tests/integration/__init__.py +0 -4
- examples/task_apps/enron/tests/integration/test_enron_eval.py +0 -179
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +0 -135
- examples/task_apps/enron/tests/unit/__init__.py +0 -4
- examples/task_apps/enron/tests/unit/test_enron_environment.py +0 -126
- examples/task_apps/math/README.md +0 -22
- examples/task_apps/math/__init__.py +0 -0
- examples/task_apps/math/math_single_step.py +0 -1000
- examples/task_apps/math/math_task_app.py +0 -115
- examples/task_apps/pokemon_battle/__init__.py +0 -2
- examples/task_apps/pokemon_battle/modal_app.py +0 -104
- examples/task_apps/pokemon_battle/task_app/README.md +0 -68
- examples/task_apps/pokemon_battle/task_app/__init__.py +0 -6
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +0 -932
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +0 -283
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +0 -155
- examples/task_apps/pokemon_red/README.md +0 -357
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +0 -415
- examples/task_apps/pokemon_red/__init__.py +0 -3
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +0 -29
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +0 -225
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +0 -75
- examples/task_apps/pokemon_red/task_app.py +0 -799
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +0 -193
- examples/task_apps/sokoban/README.md +0 -307
- examples/task_apps/sokoban/__init__.py +0 -3
- examples/task_apps/sokoban/eval_groq_qwen32.toml +0 -16
- examples/task_apps/sokoban/eval_openai_gpt5.toml +0 -16
- examples/task_apps/sokoban/filter_sft.toml +0 -5
- examples/task_apps/sokoban/task_app.py +0 -1058
- examples/task_apps/sokoban/tests/__init__.py +0 -4
- examples/task_apps/sokoban/tests/conftest.py +0 -113
- examples/task_apps/sokoban/tests/integration/__init__.py +0 -4
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +0 -57
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +0 -198
- examples/task_apps/sokoban/tests/unit/__init__.py +0 -4
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +0 -114
- examples/task_apps/verilog/__init__.py +0 -1
- examples/task_apps/verilog/eval_groq_qwen32b.toml +0 -24
- examples/task_apps/verilog/filter_sft.toml +0 -5
- examples/task_apps/verilog/task_app/README.md +0 -12
- examples/task_apps/verilog/task_app/__init__.py +0 -1
- examples/task_apps/verilog/task_app/grpo_verilog.py +0 -1166
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +0 -145
- examples/task_apps/verilog/tests/__init__.py +0 -4
- examples/task_apps/verilog/tests/conftest.py +0 -115
- examples/task_apps/verilog/tests/integration/__init__.py +0 -4
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +0 -181
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +0 -55
- examples/task_apps/verilog/tests/unit/__init__.py +0 -4
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +0 -118
- examples/vlm/PROPOSAL.md +0 -53
- examples/vlm/README.md +0 -68
- examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
- examples/vlm/crafter_image_only_agent.py +0 -207
- examples/vlm/crafter_openai_vlm_agent.py +0 -277
- examples/vlm/filter_image_rows.py +0 -63
- examples/vlm/run_crafter_vlm_benchmark.py +0 -316
- examples/warming_up_to_rl/analyze_trace_db.py +0 -422
- examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +0 -35
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +0 -26
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +0 -36
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +0 -32
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
- examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
- examples/warming_up_to_rl/export_trace_sft.py +0 -723
- examples/warming_up_to_rl/groq_test.py +0 -97
- examples/warming_up_to_rl/manage_secrets.py +0 -131
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- examples/warming_up_to_rl/readme.md +0 -179
- examples/warming_up_to_rl/run_eval.py +0 -736
- examples/warming_up_to_rl/run_fft_and_save.py +0 -380
- examples/warming_up_to_rl/run_local_rollout.py +0 -239
- examples/warming_up_to_rl/run_local_rollout_modal.py +0 -248
- examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -405
- examples/warming_up_to_rl/run_local_rollout_traced.py +0 -477
- examples/warming_up_to_rl/run_rl_and_save.py +0 -124
- examples/warming_up_to_rl/run_rollout_remote.py +0 -156
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +0 -15
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +0 -11
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +0 -35
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +0 -74
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +0 -35
- examples/workflows/math_rl/download_dataset.py +0 -80
- examples/workflows/math_rl/run_eval.py +0 -436
- examples/workflows/math_rl/run_rl_and_save.py +0 -111
- synth_ai/api/models/supported.py +0 -377
- synth_ai/api/train/__init__.py +0 -5
- synth_ai/api/train/builders.py +0 -351
- synth_ai/api/train/cli.py +0 -635
- synth_ai/api/train/config_finder.py +0 -228
- synth_ai/api/train/configs/__init__.py +0 -44
- synth_ai/api/train/configs/rl.py +0 -134
- synth_ai/api/train/configs/sft.py +0 -95
- synth_ai/api/train/configs/shared.py +0 -24
- synth_ai/api/train/env_resolver.py +0 -349
- synth_ai/api/train/pollers.py +0 -75
- synth_ai/api/train/supported_algos.py +0 -147
- synth_ai/api/train/task_app.py +0 -195
- synth_ai/api/train/utils.py +0 -225
- synth_ai/cli/_modal_wrapper.py +0 -29
- synth_ai/cli/_storage.py +0 -20
- synth_ai/cli/_typer_patch.py +0 -49
- synth_ai/cli/_validate_task_app.py +0 -11
- synth_ai/cli/balance.py +0 -216
- synth_ai/cli/calc.py +0 -84
- synth_ai/cli/demo.py +0 -165
- synth_ai/cli/legacy_root_backup.py +0 -468
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/recent.py +0 -132
- synth_ai/cli/rl_demo.py +0 -254
- synth_ai/cli/status.py +0 -134
- synth_ai/cli/task_apps.py +0 -4523
- synth_ai/cli/traces.py +0 -164
- synth_ai/cli/tui.py +0 -57
- synth_ai/cli/watch.py +0 -506
- synth_ai/compound/cais.py +0 -0
- synth_ai/config/base_url.py +0 -107
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/demos/core/__init__.py +0 -1
- synth_ai/demos/core/cli.py +0 -1718
- synth_ai/demos/demo_task_apps/core.py +0 -440
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -184
- synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
- synth_ai/environments/__init__.py +0 -31
- synth_ai/environments/environment/__init__.py +0 -1
- synth_ai/environments/environment/artifacts/__init__.py +0 -1
- synth_ai/environments/environment/artifacts/base.py +0 -52
- synth_ai/environments/environment/core.py +0 -67
- synth_ai/environments/environment/db/__init__.py +0 -1
- synth_ai/environments/environment/db/sqlite.py +0 -45
- synth_ai/environments/environment/registry.py +0 -233
- synth_ai/environments/environment/resources/sqlite.py +0 -45
- synth_ai/environments/environment/results.py +0 -1
- synth_ai/environments/environment/rewards/__init__.py +0 -1
- synth_ai/environments/environment/rewards/core.py +0 -29
- synth_ai/environments/environment/shared_engine.py +0 -26
- synth_ai/environments/environment/tools/__init__.py +0 -200
- synth_ai/environments/examples/__init__.py +0 -1
- synth_ai/environments/examples/bandit/__init__.py +0 -33
- synth_ai/environments/examples/bandit/engine.py +0 -302
- synth_ai/environments/examples/bandit/environment.py +0 -194
- synth_ai/environments/examples/bandit/taskset.py +0 -200
- synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
- synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
- synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
- synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
- synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
- synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
- synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
- synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
- synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
- synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
- synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
- synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
- synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
- synth_ai/environments/examples/crafter_classic/engine.py +0 -579
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
- synth_ai/environments/examples/crafter_classic/environment.py +0 -495
- synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
- synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
- synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
- synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
- synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
- synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
- synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
- synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
- synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
- synth_ai/environments/examples/crafter_custom/environment.py +0 -312
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
- synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
- synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
- synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
- synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
- synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
- synth_ai/environments/examples/enron/engine.py +0 -300
- synth_ai/environments/examples/enron/environment.py +0 -234
- synth_ai/environments/examples/enron/taskset.py +0 -112
- synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
- synth_ai/environments/examples/minigrid/__init__.py +0 -48
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
- synth_ai/environments/examples/minigrid/engine.py +0 -589
- synth_ai/environments/examples/minigrid/environment.py +0 -274
- synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
- synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
- synth_ai/environments/examples/minigrid/taskset.py +0 -583
- synth_ai/environments/examples/nethack/__init__.py +0 -7
- synth_ai/environments/examples/nethack/achievements.py +0 -337
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
- synth_ai/environments/examples/nethack/engine.py +0 -739
- synth_ai/environments/examples/nethack/environment.py +0 -256
- synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
- synth_ai/environments/examples/nethack/taskset.py +0 -323
- synth_ai/environments/examples/red/__init__.py +0 -7
- synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
- synth_ai/environments/examples/red/config_logging.py +0 -110
- synth_ai/environments/examples/red/engine.py +0 -721
- synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -35
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +0 -477
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -172
- synth_ai/environments/examples/red/environment.py +0 -298
- synth_ai/environments/examples/red/taskset.py +0 -79
- synth_ai/environments/examples/red/units/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
- synth_ai/environments/examples/sokoban/engine.py +0 -678
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
- synth_ai/environments/examples/sokoban/environment.py +0 -229
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
- synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
- synth_ai/environments/examples/sokoban/taskset.py +0 -544
- synth_ai/environments/examples/tictactoe/__init__.py +0 -1
- synth_ai/environments/examples/tictactoe/engine.py +0 -368
- synth_ai/environments/examples/tictactoe/environment.py +0 -240
- synth_ai/environments/examples/tictactoe/taskset.py +0 -215
- synth_ai/environments/examples/verilog/__init__.py +0 -10
- synth_ai/environments/examples/verilog/engine.py +0 -421
- synth_ai/environments/examples/verilog/environment.py +0 -350
- synth_ai/environments/examples/verilog/taskset.py +0 -420
- synth_ai/environments/examples/wordle/__init__.py +0 -29
- synth_ai/environments/examples/wordle/engine.py +0 -398
- synth_ai/environments/examples/wordle/environment.py +0 -159
- synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
- synth_ai/environments/examples/wordle/taskset.py +0 -230
- synth_ai/environments/reproducibility/core.py +0 -42
- synth_ai/environments/reproducibility/helpers.py +0 -0
- synth_ai/environments/reproducibility/tree.py +0 -363
- synth_ai/environments/service/app.py +0 -97
- synth_ai/environments/service/core_routes.py +0 -1021
- synth_ai/environments/service/external_registry.py +0 -56
- synth_ai/environments/service/registry.py +0 -9
- synth_ai/environments/stateful/__init__.py +0 -1
- synth_ai/environments/stateful/core.py +0 -163
- synth_ai/environments/stateful/engine.py +0 -21
- synth_ai/environments/stateful/state.py +0 -7
- synth_ai/environments/tasks/api.py +0 -19
- synth_ai/environments/tasks/core.py +0 -81
- synth_ai/environments/tasks/filters.py +0 -40
- synth_ai/environments/tasks/utils.py +0 -90
- synth_ai/environments/v0_observability/history.py +0 -3
- synth_ai/environments/v0_observability/log.py +0 -2
- synth_ai/evals/__init__.py +0 -15
- synth_ai/evals/base.py +0 -13
- synth_ai/evals/client.py +0 -82
- synth_ai/handshake.py +0 -109
- synth_ai/http.py +0 -26
- synth_ai/http_client.py +0 -136
- synth_ai/inference/__init__.py +0 -5
- synth_ai/inference/client.py +0 -34
- synth_ai/jobs/client.py +0 -295
- synth_ai/judge_schemas.py +0 -127
- synth_ai/learning/__init__.py +0 -59
- synth_ai/learning/client.py +0 -241
- synth_ai/learning/ft_client.py +0 -7
- synth_ai/learning/health.py +0 -49
- synth_ai/learning/jobs.py +0 -201
- synth_ai/learning/rl/client.py +0 -267
- synth_ai/learning/rl/contracts.py +0 -27
- synth_ai/learning/rl/env_keys.py +0 -166
- synth_ai/learning/rl/secrets.py +0 -13
- synth_ai/learning/sft/client.py +0 -68
- synth_ai/learning/sft/config.py +0 -270
- synth_ai/learning/sft/data.py +0 -295
- synth_ai/learning/validators.py +0 -49
- synth_ai/lm/__init__.py +0 -25
- synth_ai/task/__init__.py +0 -121
- synth_ai/task/apps/__init__.py +0 -129
- synth_ai/task/config.py +0 -257
- synth_ai/task/contracts.py +0 -236
- synth_ai/task/datasets.py +0 -108
- synth_ai/task/proxy.py +0 -251
- synth_ai/task/rubrics/__init__.py +0 -56
- synth_ai/task/rubrics/loaders.py +0 -152
- synth_ai/task/server.py +0 -432
- synth_ai/task/trace_correlation_helpers.py +0 -315
- synth_ai/task/tracing_utils.py +0 -84
- synth_ai/task/validators.py +0 -418
- synth_ai/tracing_v3/__init__.py +0 -97
- synth_ai/tracing_v3/abstractions.py +0 -302
- synth_ai/tracing_v3/config.py +0 -84
- synth_ai/tracing_v3/db_config.py +0 -194
- synth_ai/tracing_v3/decorators.py +0 -398
- synth_ai/tracing_v3/llm_call_record_helpers.py +0 -391
- synth_ai/tracing_v3/migration_helper.py +0 -120
- synth_ai/tracing_v3/session_tracer.py +0 -540
- synth_ai/tracing_v3/storage/base.py +0 -210
- synth_ai/tracing_v3/storage/config.py +0 -75
- synth_ai/tracing_v3/storage/factory.py +0 -39
- synth_ai/tracing_v3/trace_utils.py +0 -317
- synth_ai/tracing_v3/turso/daemon.py +0 -151
- synth_ai/tracing_v3/turso/models.py +0 -469
- synth_ai/tracing_v3/turso/native_manager.py +0 -1209
- synth_ai/tracing_v3/utils.py +0 -108
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -906
- synth_ai/v0/api/__init__.py +0 -8
- synth_ai/v0/api/models/__init__.py +0 -8
- synth_ai/v0/api/models/supported.py +0 -8
- synth_ai/v0/config/__init__.py +0 -15
- synth_ai/v0/config/base_url.py +0 -12
- synth_ai/v0/lm/__init__.py +0 -51
- synth_ai/v0/lm/caching/__init__.py +0 -0
- synth_ai/v0/lm/caching/constants.py +0 -6
- synth_ai/v0/lm/caching/dbs.py +0 -0
- synth_ai/v0/lm/caching/ephemeral.py +0 -100
- synth_ai/v0/lm/caching/handler.py +0 -137
- synth_ai/v0/lm/caching/initialize.py +0 -11
- synth_ai/v0/lm/caching/persistent.py +0 -114
- synth_ai/v0/lm/config.py +0 -115
- synth_ai/v0/lm/constants.py +0 -32
- synth_ai/v0/lm/core/__init__.py +0 -8
- synth_ai/v0/lm/core/all.py +0 -73
- synth_ai/v0/lm/core/exceptions.py +0 -5
- synth_ai/v0/lm/core/main.py +0 -331
- synth_ai/v0/lm/core/main_v3.py +0 -594
- synth_ai/v0/lm/core/synth_models.py +0 -35
- synth_ai/v0/lm/core/vendor_clients.py +0 -190
- synth_ai/v0/lm/cost/__init__.py +0 -0
- synth_ai/v0/lm/cost/monitor.py +0 -1
- synth_ai/v0/lm/cost/statefulness.py +0 -1
- synth_ai/v0/lm/injection.py +0 -80
- synth_ai/v0/lm/overrides.py +0 -206
- synth_ai/v0/lm/provider_support/__init__.py +0 -8
- synth_ai/v0/lm/provider_support/anthropic.py +0 -972
- synth_ai/v0/lm/provider_support/openai.py +0 -1139
- synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
- synth_ai/v0/lm/structured_outputs/handler.py +0 -440
- synth_ai/v0/lm/structured_outputs/inject.py +0 -297
- synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/v0/lm/tools/__init__.py +0 -3
- synth_ai/v0/lm/tools/base.py +0 -172
- synth_ai/v0/lm/unified_interface.py +0 -202
- synth_ai/v0/lm/vendors/__init__.py +0 -0
- synth_ai/v0/lm/vendors/base.py +0 -81
- synth_ai/v0/lm/vendors/core/__init__.py +0 -0
- synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
- synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/v0/lm/vendors/local/__init__.py +0 -0
- synth_ai/v0/lm/vendors/local/ollama.py +0 -0
- synth_ai/v0/lm/vendors/openai_standard.py +0 -782
- synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
- synth_ai/v0/lm/vendors/retries.py +0 -22
- synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
- synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
- synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/v0/lm/vendors/supported/grok.py +0 -75
- synth_ai/v0/lm/vendors/supported/groq.py +0 -16
- synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
- synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/v0/lm/vendors/supported/together.py +0 -11
- synth_ai/v0/lm/vendors/synth_client.py +0 -835
- synth_ai/v0/lm/warmup.py +0 -186
- synth_ai/v0/tracing/__init__.py +0 -0
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -409
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/v0/tracing_v3/__init__.py +0 -10
- synth_ai/v0/tracing_v3/abstractions.py +0 -3
- synth_ai/v0/tracing_v3/decorators.py +0 -3
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
- synth_ai/v0/tracing_v3/session_tracer.py +0 -3
- synth_ai-0.2.14.dist-info/METADATA +0 -139
- synth_ai-0.2.14.dist-info/RECORD +0 -762
- synth_ai-0.2.14.dist-info/top_level.txt +0 -2
- /synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/config.toml +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
- {examples/task_apps → synth_ai/core/apps}/__init__.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/serialization.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/turso/__init__.py +0 -0
- /synth_ai/{evals → sdk/judging}/types.py +0 -0
- /synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
- /synth_ai/{learning → sdk/learning}/config.py +0 -0
- /synth_ai/{learning → sdk/learning}/constants.py +0 -0
- /synth_ai/{learning → sdk/learning}/core.py +0 -0
- /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
- /synth_ai/{learning → sdk/learning}/rl/__init__.py +0 -0
- /synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
- /synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
- /synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
- /synth_ai/{learning → sdk/learning}/sse.py +0 -0
- /synth_ai/{task → sdk/task}/auth.py +0 -0
- /synth_ai/{task → sdk/task}/client.py +0 -0
- /synth_ai/{task → sdk/task}/errors.py +0 -0
- /synth_ai/{task → sdk/task}/health.py +0 -0
- /synth_ai/{task → sdk/task}/json.py +0 -0
- /synth_ai/{task → sdk/task}/rubrics/models.py +0 -0
- /synth_ai/{task → sdk/task}/rubrics/scoring.py +0 -0
- /synth_ai/{task → sdk/task}/rubrics/strict.py +0 -0
- /synth_ai/{task → sdk/task}/vendors.py +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.4.1.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.4.1.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Validate that a JSONL file parses and contains chat-like records.
|
|
3
|
-
|
|
4
|
-
Checks first N lines (default 50) for objects with `messages` including an
|
|
5
|
-
assistant response (role == "assistant").
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from __future__ import annotations
|
|
9
|
-
|
|
10
|
-
import argparse
|
|
11
|
-
import json
|
|
12
|
-
from pathlib import Path
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def main() -> None:
|
|
16
|
-
p = argparse.ArgumentParser(description=__doc__)
|
|
17
|
-
p.add_argument("path", help="Path to JSONL file")
|
|
18
|
-
p.add_argument("--n", type=int, default=50, help="Number of lines to sample")
|
|
19
|
-
args = p.parse_args()
|
|
20
|
-
|
|
21
|
-
src = Path(args.path)
|
|
22
|
-
if not src.exists():
|
|
23
|
-
raise SystemExit(f"No such file: {src}")
|
|
24
|
-
|
|
25
|
-
checked = 0
|
|
26
|
-
ok = 0
|
|
27
|
-
with src.open("r", encoding="utf-8") as f:
|
|
28
|
-
for i, line in enumerate(f, start=1):
|
|
29
|
-
if i > args.n:
|
|
30
|
-
break
|
|
31
|
-
line = line.strip()
|
|
32
|
-
if not line:
|
|
33
|
-
continue
|
|
34
|
-
checked += 1
|
|
35
|
-
try:
|
|
36
|
-
obj = json.loads(line)
|
|
37
|
-
except Exception as exc:
|
|
38
|
-
raise SystemExit(f"Line {i} is not valid JSON: {exc}")
|
|
39
|
-
msgs = obj.get("messages") if isinstance(obj, dict) else None
|
|
40
|
-
if not isinstance(msgs, list):
|
|
41
|
-
raise SystemExit(f"Line {i} missing 'messages' list")
|
|
42
|
-
has_assistant = any(
|
|
43
|
-
isinstance(m, dict) and m.get("role") == "assistant" and m.get("content")
|
|
44
|
-
for m in msgs
|
|
45
|
-
)
|
|
46
|
-
if has_assistant:
|
|
47
|
-
ok += 1
|
|
48
|
-
|
|
49
|
-
if checked == 0:
|
|
50
|
-
raise SystemExit("No lines checked; file empty?")
|
|
51
|
-
if ok == 0:
|
|
52
|
-
raise SystemExit("No assistant messages found in sampled lines")
|
|
53
|
-
print(f"Validated: {ok}/{checked} sampled lines contain assistant messages")
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
if __name__ == "__main__":
|
|
57
|
-
main()
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
examples/rl/README.md
DELETED
|
@@ -1,169 +0,0 @@
|
|
|
1
|
-
# Math RL Demo (Single Step)
|
|
2
|
-
|
|
3
|
-
This example trains a reinforcement learning policy on single-step math problems sourced from the [EleutherAI/math](https://huggingface.co/datasets/EleutherAI/math) dataset. Episodes consist of a single tool call: the model must emit a `math_submit` function call whose `answer` field contains the final solution. Missing or malformed tool calls receive negative reward; correct answers earn positive reward.
|
|
4
|
-
|
|
5
|
-
## Quick Commands
|
|
6
|
-
|
|
7
|
-
```bash
|
|
8
|
-
# Serve locally with tracing
|
|
9
|
-
uvx synth-ai serve math-single-step --port 8101 --env-file examples/rl/.env --trace traces/math
|
|
10
|
-
|
|
11
|
-
# Modal deployment
|
|
12
|
-
uvx synth-ai deploy --name synth-math-single-step --env-file examples/rl/.env
|
|
13
|
-
|
|
14
|
-
# Evaluate base Qwen policy (validation split)
|
|
15
|
-
uv run python examples/rl/run_eval.py --toml examples/rl/configs/eval_base_qwen.toml
|
|
16
|
-
|
|
17
|
-
# Launch RL job from base model
|
|
18
|
-
uvx synth-ai train --type rl --config examples/rl/configs/rl_from_base_qwen.toml
|
|
19
|
-
|
|
20
|
-
# Evaluate RL checkpoint on held-out test split
|
|
21
|
-
uv run python examples/rl/run_eval.py --toml examples/rl/configs/eval_rl_qwen.toml
|
|
22
|
-
```
|
|
23
|
-
|
|
24
|
-
## 1. Prerequisites
|
|
25
|
-
|
|
26
|
-
- Python 3.11+
|
|
27
|
-
- `uv`/`uvx`
|
|
28
|
-
- Modal CLI (`modal token new`) for deployment
|
|
29
|
-
- `.env` at `examples/rl/.env` containing at least:
|
|
30
|
-
- `SYNTH_API_KEY`
|
|
31
|
-
- `ENVIRONMENT_API_KEY`
|
|
32
|
-
- Optional: `TASK_APP_URL` (Modal URL), `GROQ_API_KEY`, `OPENAI_API_KEY`
|
|
33
|
-
|
|
34
|
-
Run `uvx synth-ai setup` to populate the `.env` if you have not paired the SDK before.
|
|
35
|
-
|
|
36
|
-
## 2. Task App
|
|
37
|
-
|
|
38
|
-
The task app is defined in `synth_ai/task/apps/math_single_step.py` and registered as `math-single-step`. It loads problems from the Hugging Face dataset (configurable via `MATH_DATASET_*` env vars) and manages per-episode state with an in-memory environment manager.
|
|
39
|
-
|
|
40
|
-
- **Observation**: single math problem (string) plus dataset metadata.
|
|
41
|
-
- **Actions**: exactly one `math_submit` tool call with an `answer` string.
|
|
42
|
-
- **Rewards**:
|
|
43
|
-
- `+1.0` for correct answer
|
|
44
|
-
- `0.0` for incorrect answer
|
|
45
|
-
- `-0.5` if the tool call omits an answer or uses the wrong tool
|
|
46
|
-
- `-1.0` when no tool call is provided
|
|
47
|
-
|
|
48
|
-
Serve locally with tracing to capture trajectories:
|
|
49
|
-
|
|
50
|
-
```bash
|
|
51
|
-
uvx synth-ai serve math-single-step \
|
|
52
|
-
--port 8101 \
|
|
53
|
-
--env-file examples/rl/.env \
|
|
54
|
-
--trace traces/math \
|
|
55
|
-
--trace-db traces/math/synth_ai.db
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
Deploy or serve on Modal using the same env file; the registration includes a `ModalDeploymentConfig` that installs the `datasets` package automatically.
|
|
59
|
-
|
|
60
|
-
## 3. Evaluation
|
|
61
|
-
|
|
62
|
-
`examples/rl/run_eval.py` evaluates a policy by sampling deterministic seeds from the dataset splits. TOML configuration controls the model, split, and number of episodes. Example config (`eval_base_qwen.toml`):
|
|
63
|
-
|
|
64
|
-
```toml
|
|
65
|
-
provider = "synth"
|
|
66
|
-
task_app_url = "http://localhost:8101"
|
|
67
|
-
model = "Qwen/Qwen3-4B"
|
|
68
|
-
split = "validation"
|
|
69
|
-
num_episodes = 50
|
|
70
|
-
seed_start = 0
|
|
71
|
-
|
|
72
|
-
[policy]
|
|
73
|
-
inference_url = "http://localhost:8000/api/inference"
|
|
74
|
-
max_tokens = 128
|
|
75
|
-
temperature = 0.0
|
|
76
|
-
# Optional: override headers for inference requests
|
|
77
|
-
# [policy.extra_headers]
|
|
78
|
-
# Authorization = "Bearer ..."
|
|
79
|
-
```
|
|
80
|
-
|
|
81
|
-
The `[policy]` table maps directly to the inference payload; add `[policy.headers]` if you need to forward custom HTTP headers (e.g., `Authorization`). If `SYNTH_API_KEY` is present, the evaluator automatically sends `Authorization: Bearer <key>`.
|
|
82
|
-
|
|
83
|
-
Set `--use-rollout` to exercise the server-side rollout endpoint instead of the per-step API.
|
|
84
|
-
|
|
85
|
-
The script reports accuracy and a breakdown of failure modes (`missing_tool_call`, `blank_answer`, etc.).
|
|
86
|
-
|
|
87
|
-
## 4. RL Training
|
|
88
|
-
|
|
89
|
-
Example RL config (`configs/rl_from_base_qwen.toml`):
|
|
90
|
-
|
|
91
|
-
```toml
|
|
92
|
-
[services]
|
|
93
|
-
task_url = "https://your-app.modal.run"
|
|
94
|
-
|
|
95
|
-
[model]
|
|
96
|
-
base = "Qwen/Qwen3-4B"
|
|
97
|
-
|
|
98
|
-
[data]
|
|
99
|
-
split = "train"
|
|
100
|
-
seed_start = 0
|
|
101
|
-
episodes_per_iteration = 2048
|
|
102
|
-
|
|
103
|
-
[training]
|
|
104
|
-
max_turns = 1
|
|
105
|
-
ops = ["agent", "env"]
|
|
106
|
-
batch_size = 128
|
|
107
|
-
group_size = 1024
|
|
108
|
-
reward_positive = 1.0
|
|
109
|
-
reward_negative_no_tool = -1.0
|
|
110
|
-
reward_negative_no_answer = -0.5
|
|
111
|
-
|
|
112
|
-
[policy]
|
|
113
|
-
model = "Qwen/Qwen3-4B"
|
|
114
|
-
inference_url = "https://your-inference-host"
|
|
115
|
-
max_tokens = 128
|
|
116
|
-
temperature = 0.0
|
|
117
|
-
|
|
118
|
-
[tags]
|
|
119
|
-
experiment = "math_single_step"
|
|
120
|
-
```
|
|
121
|
-
|
|
122
|
-
Submit jobs interactively with:
|
|
123
|
-
|
|
124
|
-
```bash
|
|
125
|
-
uvx synth-ai train --type rl --config examples/rl/configs/rl_from_base_qwen.toml
|
|
126
|
-
```
|
|
127
|
-
|
|
128
|
-
The CLI ensures the task app is reachable (`/health`, `/task_info`), prompts for missing secrets, and polls job status until completion. For scripted automation, use `run_rl_and_save.py`:
|
|
129
|
-
|
|
130
|
-
```bash
|
|
131
|
-
uv run python examples/rl/run_rl_and_save.py \
|
|
132
|
-
--config examples/rl/configs/rl_from_base_qwen.toml \
|
|
133
|
-
--backend https://backend.synth.ai/api
|
|
134
|
-
```
|
|
135
|
-
|
|
136
|
-
## 5. Evaluating RL Outputs
|
|
137
|
-
|
|
138
|
-
After training completes, set `model = "rl:<job_or_model_id>"` in `configs/eval_rl_qwen.toml` (and update `split = "test"` for a held-out set). Re-run `run_eval.py` to compare:
|
|
139
|
-
|
|
140
|
-
```bash
|
|
141
|
-
uv run python examples/rl/run_eval.py --toml examples/rl/configs/eval_rl_qwen.toml
|
|
142
|
-
```
|
|
143
|
-
|
|
144
|
-
Record both validation (pre-RL) and test (post-RL) accuracy to quantify improvements.
|
|
145
|
-
|
|
146
|
-
## 6. Dataset Notes
|
|
147
|
-
|
|
148
|
-
- By default the task app loads the [Hendrycks MATH benchmark](https://huggingface.co/datasets/nlile/hendrycks-MATH-benchmark). Override via `MATH_DATASET_NAME` / `MATH_DATASET_CONFIG` env vars if you want a different variant. The dataset is public and automatically downloaded when the task app starts; the server will fail fast with a clear error if it cannot be fetched.
|
|
149
|
-
- For offline use, run `uv run python examples/rl/download_dataset.py --output-dir examples/rl/data --dataset nlile/hendrycks-MATH-benchmark --config algebra --limit 2000`. Then start the task app with `MATH_DATASET_LOCAL_DIR=examples/rl/data` (or set `MATH_DATASET_LOCAL_<SPLIT>_FILE`).
|
|
150
|
-
- Hugging Face downloads occur at runtime; pre-fetch locally or mount a Modal volume if you need offline access.
|
|
151
|
-
- Hugging Face downloads occur at runtime; pre-fetch locally or mount a Modal volume if you need offline access.
|
|
152
|
-
- Seeds map directly to dataset indices. Use `seed_start` to control determinism in configs and evaluations.
|
|
153
|
-
|
|
154
|
-
## 7. Additional Utilities
|
|
155
|
-
|
|
156
|
-
- `examples/rl/task_app/math_task_app.py` – legacy runner (`python .../math_task_app.py --reload`).
|
|
157
|
-
- `examples/rl/run_eval.py` – CLI evaluation helper (supports proxying Groq or hitting arbitrary inference URLs).
|
|
158
|
-
- `examples/rl/run_rl_and_save.py` – thin wrapper around the Synth `/rl/jobs` API.
|
|
159
|
-
|
|
160
|
-
For broader background on Synth task apps, CLI commands, and tracing, see the new documentation under `docs/`.
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
uv run python examples/rl/run_eval.py --toml examples/rl/configs/eval_base_qwen.toml
|
|
165
|
-
uvx synth-ai serve math-single-step \
|
|
166
|
-
--port 8101 \
|
|
167
|
-
--env-file examples/rl/.env \
|
|
168
|
-
--trace traces/math \
|
|
169
|
-
--force
|
examples/rl/download_dataset.py
DELETED
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Download subsets of the MATH dataset to local JSONL files."""
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
import argparse
|
|
7
|
-
import json
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
10
|
-
|
|
11
|
-
from datasets import load_dataset
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def extract_examples(dataset: Any, *, limit: int | None) -> list[dict[str, str]]:
|
|
15
|
-
if limit is not None:
|
|
16
|
-
dataset = dataset.select(range(min(limit, len(dataset))))
|
|
17
|
-
examples: list[dict[str, str]] = []
|
|
18
|
-
for item in dataset:
|
|
19
|
-
problem = (item.get("problem") or "").strip()
|
|
20
|
-
solution = item.get("solution") or ""
|
|
21
|
-
if isinstance(solution, list):
|
|
22
|
-
solution = "\n".join(str(part) for part in solution)
|
|
23
|
-
examples.append(
|
|
24
|
-
{
|
|
25
|
-
"problem": problem,
|
|
26
|
-
"solution": solution,
|
|
27
|
-
}
|
|
28
|
-
)
|
|
29
|
-
return examples
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def write_jsonl(path: Path, rows: list[dict[str, str]]) -> None:
|
|
33
|
-
path.parent.mkdir(parents=True, exist_ok=True)
|
|
34
|
-
with path.open("w", encoding="utf-8") as fh:
|
|
35
|
-
for row in rows:
|
|
36
|
-
fh.write(json.dumps(row, ensure_ascii=False) + "\n")
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def main() -> None:
|
|
40
|
-
parser = argparse.ArgumentParser(
|
|
41
|
-
description="Download MATH dataset splits to JSONL for offline use"
|
|
42
|
-
)
|
|
43
|
-
parser.add_argument(
|
|
44
|
-
"--output-dir", default="examples/rl/data", help="Directory to write <split>.jsonl files"
|
|
45
|
-
)
|
|
46
|
-
parser.add_argument(
|
|
47
|
-
"--dataset",
|
|
48
|
-
default="nlile/hendrycks-MATH-benchmark",
|
|
49
|
-
help="Hugging Face dataset identifier",
|
|
50
|
-
)
|
|
51
|
-
parser.add_argument(
|
|
52
|
-
"--config", default="algebra", help="Hugging Face dataset config (if required)"
|
|
53
|
-
)
|
|
54
|
-
parser.add_argument(
|
|
55
|
-
"--splits", nargs="*", default=["train", "validation", "test"], help="Splits to download"
|
|
56
|
-
)
|
|
57
|
-
parser.add_argument(
|
|
58
|
-
"--limit", type=int, default=None, help="Optional cap on examples per split"
|
|
59
|
-
)
|
|
60
|
-
args = parser.parse_args()
|
|
61
|
-
|
|
62
|
-
output_dir = Path(args.output_dir).expanduser()
|
|
63
|
-
output_dir.mkdir(parents=True, exist_ok=True)
|
|
64
|
-
|
|
65
|
-
for split in args.splits:
|
|
66
|
-
print(f"[INFO] Downloading {args.dataset} ({args.config}) split={split}")
|
|
67
|
-
if args.config:
|
|
68
|
-
dataset = load_dataset(args.dataset, args.config, split=split)
|
|
69
|
-
else:
|
|
70
|
-
dataset = load_dataset(args.dataset, split=split)
|
|
71
|
-
rows = extract_examples(dataset, limit=args.limit)
|
|
72
|
-
out_path = output_dir / f"{split}.jsonl"
|
|
73
|
-
write_jsonl(out_path, rows)
|
|
74
|
-
print(f"[INFO] Wrote {len(rows)} examples to {out_path}")
|
|
75
|
-
|
|
76
|
-
print("Done. Set MATH_DATASET_LOCAL_DIR to the output directory when serving the task app.")
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
if __name__ == "__main__":
|
|
80
|
-
main()
|
examples/run_crafter_demo.sh
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
|
|
3
|
-
# Run a Crafter agent demo with Gemini
|
|
4
|
-
# This script demonstrates a reactive agent in the Crafter environment
|
|
5
|
-
|
|
6
|
-
echo "🚀 Starting Crafter agent demo with Gemini 1.5 Flash..."
|
|
7
|
-
echo "Make sure the synth-ai service is running: uvx synth-ai serve"
|
|
8
|
-
echo ""
|
|
9
|
-
|
|
10
|
-
uv run python -m synth_ai.environments.examples.crafter_classic.agent_demos.test_crafter_react_agent --model gemini-1.5-flash
|
examples/sft/README.md
DELETED
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
### Supervised Fine-Tuning for Crafter
|
|
2
|
-
|
|
3
|
-
This folder provides a minimal, reusable SFT workflow that pulls out the SFT step from `examples/warming_up_to_rl/` and focuses it on LoRA/QLoRA. We've also added guidance for running full finetuning (FFT) so you can compare adapters against end-to-end weight updates.
|
|
4
|
-
|
|
5
|
-
It supports distilling Groq (or other vendor) rollouts into JSONL using tracing and then training a small base model like `Qwen/Qwen3-0.6B`.
|
|
6
|
-
|
|
7
|
-
---
|
|
8
|
-
|
|
9
|
-
### 0) Load environment from .env.dev (recommended)
|
|
10
|
-
|
|
11
|
-
Use your dev env file so keys/URLs are sourced consistently:
|
|
12
|
-
|
|
13
|
-
```bash
|
|
14
|
-
# Example path; update to your actual dev env
|
|
15
|
-
set -a && source /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev && set +a
|
|
16
|
-
```
|
|
17
|
-
|
|
18
|
-
This ensures `ENVIRONMENT_API_KEY`, `GROQ_API_KEY`, and (optionally) `BACKEND_BASE_URL` are available to the steps below.
|
|
19
|
-
|
|
20
|
-
---
|
|
21
|
-
|
|
22
|
-
### 1) Collect traces and export SFT JSONL
|
|
23
|
-
|
|
24
|
-
You can generate traces with the Crafter task app and then export them to SFT JSONL using the existing exporter:
|
|
25
|
-
|
|
26
|
-
```bash
|
|
27
|
-
# Serve the task app locally with tracing enabled (example)
|
|
28
|
-
uvx synth-ai serve grpo-crafter \
|
|
29
|
-
--trace traces/v3 \
|
|
30
|
-
--trace-db traces/v3/synth_ai.db \
|
|
31
|
-
--port 8001
|
|
32
|
-
|
|
33
|
-
# Or run traced local rollouts to accumulate data
|
|
34
|
-
uv run python examples/warming_up_to_rl/run_local_rollout_traced.py \
|
|
35
|
-
--episodes 50 --max-turns 10
|
|
36
|
-
|
|
37
|
-
# Export SFT dataset from the trace DB
|
|
38
|
-
uv run python examples/warming_up_to_rl/export_trace_sft.py \
|
|
39
|
-
--db traces/v3/synth_ai.db \
|
|
40
|
-
--min-unique 0 \
|
|
41
|
-
--output examples/sft/ft_data/crafter_traces.jsonl
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
Notes:
|
|
45
|
-
- The exporter uses achievements and event rewards to filter high-signal steps. Combine `--min-unique`, `--min-outcome-reward`, `--event-reward`, and `--require-achievement` to control data quality.
|
|
46
|
-
- You can restrict to sessions from certain providers/models with `--provider`/`--model`.
|
|
47
|
-
- Use `--limit` while debugging to reduce dataset size quickly.
|
|
48
|
-
|
|
49
|
-
---
|
|
50
|
-
|
|
51
|
-
### 2a) Train LoRA (QLoRA) on Qwen/Qwen3-0.6B
|
|
52
|
-
|
|
53
|
-
Use the standard CLI. Do not use a custom Python finetuning script. Point the CLI at your `.env.dev` so it picks up keys automatically:
|
|
54
|
-
|
|
55
|
-
```bash
|
|
56
|
-
uvx synth-ai train \
|
|
57
|
-
--type sft \
|
|
58
|
-
--config examples/sft/configs/crafter_lora_qwen0p6b.toml \
|
|
59
|
-
--dataset examples/sft/ft_data/crafter_traces.jsonl \
|
|
60
|
-
--env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
The config sets `training.use_qlora = true` and `hyperparameters.train_kind = "peft"` to request LoRA adapters.
|
|
64
|
-
|
|
65
|
-
Experiment tips:
|
|
66
|
-
- The backend currently defaults to a LoRA rank of 16. If you need other ranks, generate the payload with `--dry-run`, add `"lora_rank": <value>` (and optional `"lora_alpha"`, `"lora_dropout"`) under `hyperparameters`, and submit it via the API until the CLI exposes these knobs directly.
|
|
67
|
-
- Duplicate the TOML and adjust `hyperparameters.warmup_ratio`, `learning_rate`, or `gradient_accumulation_steps` to keep the global batch size comparable across datasets.
|
|
68
|
-
|
|
69
|
-
---
|
|
70
|
-
|
|
71
|
-
### 2b) Train Full Finetune (FFT) on Qwen/Qwen3-0.6B
|
|
72
|
-
|
|
73
|
-
Full finetuning updates all weights and uses a near-identical CLI flow with the LoRA toggle disabled. The helper config lives alongside the LoRA sample:
|
|
74
|
-
|
|
75
|
-
```bash
|
|
76
|
-
uvx synth-ai train \
|
|
77
|
-
--type sft \
|
|
78
|
-
--config examples/sft/configs/crafter_fft_qwen0p6b.toml \
|
|
79
|
-
--dataset examples/sft/ft_data/crafter_traces.jsonl \
|
|
80
|
-
--env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
Key differences vs LoRA:
|
|
84
|
-
- `training.use_qlora = false` and `hyperparameters.train_kind = "fft"` request a full-weight update.
|
|
85
|
-
- `per_device_batch` defaults to 1 to keep memory use comfortable on a single H100; raise gradually as you confirm headroom.
|
|
86
|
-
- FFT runs slower per step. Consider trimming the dataset with `--examples` or the exporter filters for quick baselines.
|
|
87
|
-
|
|
88
|
-
If you want the 4B Crafter FFT baseline from the RL examples, reuse `examples/warming_up_to_rl/configs/crafter_fft_4b.toml` with the same CLI command.
|
|
89
|
-
|
|
90
|
-
---
|
|
91
|
-
|
|
92
|
-
### 3) Evaluate the fine-tuned models
|
|
93
|
-
|
|
94
|
-
After the job completes, list your fine-tuned models and evaluate them in the Crafter loop:
|
|
95
|
-
|
|
96
|
-
```bash
|
|
97
|
-
# List models
|
|
98
|
-
uv run python - <<'PY'
|
|
99
|
-
import asyncio
|
|
100
|
-
import os
|
|
101
|
-
from synth_ai.learning.client import LearningClient
|
|
102
|
-
|
|
103
|
-
backend = os.getenv("BACKEND_BASE_URL", "https://agent-learning.onrender.com/api")
|
|
104
|
-
api_key = os.getenv("SYNTH_API_KEY", "")
|
|
105
|
-
async def main():
|
|
106
|
-
client = LearningClient(backend, api_key)
|
|
107
|
-
models = await client.list_fine_tuned_models()
|
|
108
|
-
for m in models:
|
|
109
|
-
print(m)
|
|
110
|
-
asyncio.run(main())
|
|
111
|
-
PY
|
|
112
|
-
|
|
113
|
-
# Evaluate in the Crafter eval loop (example via warming_up_to_rl)
|
|
114
|
-
TASK_APP_URL=http://localhost:8001 \
|
|
115
|
-
uv run python examples/warming_up_to_rl/run_eval.py \
|
|
116
|
-
--toml examples/warming_up_to_rl/configs/eval_local_vllm.toml \
|
|
117
|
-
--model ft:YOUR_FT_MODEL_ID \
|
|
118
|
-
--use-rollout
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
---
|
|
122
|
-
|
|
123
|
-
### 4) Plan comparison runs
|
|
124
|
-
|
|
125
|
-
Keep runs comparable by adjusting one axis at a time and logging the settings in your experiment tracker (spreadsheet, weights & biases, etc.).
|
|
126
|
-
|
|
127
|
-
- **LoRA rank sweeps:** start from `crafter_lora_qwen0p6b.toml`, clone it per rank (e.g., `r=4,8,16,64`). For now add the desired `lora_rank` in the job payload manually (see note above) and include it in the run name.
|
|
128
|
-
- **Dataset size:** duplicate the exported JSONL and slice with `head -n`, or pass `--examples N` to the CLI for quick subsamples. Track the effective token count using the exporter logs.
|
|
129
|
-
- **Data quality:** increase `--min-unique`, require specific achievements, or exclude low-reward sessions with `export_trace_sft.py`. Capture the filter tuple in your run metadata so evaluations stay reproducible.
|
|
130
|
-
- **FFT vs LoRA:** run both configs on the same dataset/cardinality so differences reflect the training method rather than the data.
|
|
131
|
-
|
|
132
|
-
For each sweep, use consistent evaluation seeds and write down throughput (tokens/sec) so you can weigh quality vs cost.
|
|
133
|
-
|
|
134
|
-
---
|
|
135
|
-
|
|
136
|
-
### Files
|
|
137
|
-
- `configs/crafter_lora_qwen0p6b.toml`: LoRA/QLoRA SFT config for `Qwen/Qwen3-0.6B`.
|
|
138
|
-
- `configs/crafter_fft_qwen0p6b.toml`: Full-finetune SFT config for `Qwen/Qwen3-0.6B`.
|
|
139
|
-
- `ft_data/`: place your exported JSONL here (ignored by VCS).
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
[job]
|
|
2
|
-
model = "Qwen/Qwen3-0.6B"
|
|
3
|
-
# Prefer passing --dataset at runtime for repeatability
|
|
4
|
-
# data = "examples/sft/ft_data/crafter_traces.jsonl"
|
|
5
|
-
|
|
6
|
-
[compute]
|
|
7
|
-
gpu_type = "H100"
|
|
8
|
-
gpu_count = 1
|
|
9
|
-
nodes = 1
|
|
10
|
-
|
|
11
|
-
[data]
|
|
12
|
-
topology = {}
|
|
13
|
-
# Optional validation set if you have one locally
|
|
14
|
-
# validation_path = "examples/sft/ft_data/crafter_traces.val.jsonl"
|
|
15
|
-
|
|
16
|
-
[training]
|
|
17
|
-
mode = "sft_offline"
|
|
18
|
-
use_qlora = false
|
|
19
|
-
|
|
20
|
-
[training.validation]
|
|
21
|
-
enabled = true
|
|
22
|
-
evaluation_strategy = "steps"
|
|
23
|
-
eval_steps = 50
|
|
24
|
-
save_best_model_at_end = true
|
|
25
|
-
metric_for_best_model = "val.loss"
|
|
26
|
-
greater_is_better = false
|
|
27
|
-
|
|
28
|
-
[hyperparameters]
|
|
29
|
-
n_epochs = 1
|
|
30
|
-
train_kind = "fft"
|
|
31
|
-
per_device_batch = 1
|
|
32
|
-
gradient_accumulation_steps = 32
|
|
33
|
-
sequence_length = 4096
|
|
34
|
-
learning_rate = 1e-5
|
|
35
|
-
warmup_ratio = 0.03
|
|
36
|
-
weight_decay = 0.01
|
|
37
|
-
|
|
38
|
-
[hyperparameters.parallelism]
|
|
39
|
-
use_deepspeed = true
|
|
40
|
-
deepspeed_stage = 2
|
|
41
|
-
fsdp = false
|
|
42
|
-
bf16 = true
|
|
43
|
-
fp16 = false
|
|
44
|
-
activation_checkpointing = true
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
[job]
|
|
2
|
-
model = "Qwen/Qwen3-0.6B"
|
|
3
|
-
# Optionally set here, but prefer passing --dataset at runtime
|
|
4
|
-
# data = "examples/sft/ft_data/crafter_traces.jsonl"
|
|
5
|
-
|
|
6
|
-
[compute]
|
|
7
|
-
gpu_type = "H100"
|
|
8
|
-
gpu_count = 1
|
|
9
|
-
nodes = 1
|
|
10
|
-
|
|
11
|
-
[data]
|
|
12
|
-
# Forwarded into metadata.effective_config
|
|
13
|
-
topology = {}
|
|
14
|
-
# Optional validation set if you have one locally
|
|
15
|
-
# validation_path = "examples/sft/ft_data/crafter_traces.val.jsonl"
|
|
16
|
-
|
|
17
|
-
[training]
|
|
18
|
-
mode = "lora"
|
|
19
|
-
use_qlora = true
|
|
20
|
-
|
|
21
|
-
[training.validation]
|
|
22
|
-
enabled = true
|
|
23
|
-
evaluation_strategy = "steps"
|
|
24
|
-
eval_steps = 50
|
|
25
|
-
save_best_model_at_end = true
|
|
26
|
-
metric_for_best_model = "val.loss"
|
|
27
|
-
greater_is_better = false
|
|
28
|
-
|
|
29
|
-
[hyperparameters]
|
|
30
|
-
n_epochs = 1
|
|
31
|
-
train_kind = "peft"
|
|
32
|
-
per_device_batch = 2
|
|
33
|
-
gradient_accumulation_steps = 32
|
|
34
|
-
sequence_length = 4096
|
|
35
|
-
learning_rate = 5e-6
|
|
36
|
-
warmup_ratio = 0.03
|
|
37
|
-
|
|
38
|
-
[hyperparameters.parallelism]
|
|
39
|
-
use_deepspeed = true
|
|
40
|
-
deepspeed_stage = 2
|
|
41
|
-
fsdp = false
|
|
42
|
-
bf16 = true
|
|
43
|
-
fp16 = false
|
|
44
|
-
activation_checkpointing = true
|
|
45
|
-
|
examples/sft/evaluate.py
DELETED
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Evaluate a base or fine-tuned model on Crafter via the Task App rollout.
|
|
3
|
-
|
|
4
|
-
This mirrors the minimal evaluation loop: call `/rollout` for a set of seeds
|
|
5
|
-
and report outcome/step metrics. If tracing is enabled server-side, you can
|
|
6
|
-
use the exported sqlite DB for further analysis.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from __future__ import annotations
|
|
10
|
-
|
|
11
|
-
import argparse
|
|
12
|
-
import asyncio
|
|
13
|
-
import os
|
|
14
|
-
from dataclasses import dataclass
|
|
15
|
-
from typing import Any
|
|
16
|
-
|
|
17
|
-
from synth_ai.task import (
|
|
18
|
-
RolloutEnvSpec,
|
|
19
|
-
RolloutPolicySpec,
|
|
20
|
-
RolloutRecordConfig,
|
|
21
|
-
RolloutRequest,
|
|
22
|
-
TaskAppClient,
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
@dataclass(slots=True)
|
|
27
|
-
class EvalArgs:
|
|
28
|
-
base_url: str
|
|
29
|
-
api_key: str
|
|
30
|
-
model: str
|
|
31
|
-
inference_url: str
|
|
32
|
-
inference_api_key: str
|
|
33
|
-
seeds: list[int]
|
|
34
|
-
max_llm_calls: int
|
|
35
|
-
timeout: float
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def _ops(n: int) -> list[str]:
|
|
39
|
-
n = max(1, n)
|
|
40
|
-
ops: list[str] = []
|
|
41
|
-
for _ in range(n):
|
|
42
|
-
ops.extend(["agent", "env"]) # one LLM step followed by one env step
|
|
43
|
-
return ops
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def _request(seed: int, a: EvalArgs) -> RolloutRequest:
|
|
47
|
-
from synth_ai.task.contracts import RolloutMode
|
|
48
|
-
return RolloutRequest(
|
|
49
|
-
run_id=f"eval-{seed}",
|
|
50
|
-
env=RolloutEnvSpec(env_name="crafter", seed=seed, config={}),
|
|
51
|
-
policy=RolloutPolicySpec(
|
|
52
|
-
policy_name="crafter-react",
|
|
53
|
-
config={"model": a.model, "inference_url": a.inference_url, "api_key": a.inference_api_key},
|
|
54
|
-
),
|
|
55
|
-
ops=_ops(a.max_llm_calls),
|
|
56
|
-
record=RolloutRecordConfig(trajectories=True, return_trace=False, trace_format="compact"),
|
|
57
|
-
mode=RolloutMode.EVAL,
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
async def _eval_seed(client: TaskAppClient, seed: int, a: EvalArgs) -> dict[str, Any]:
|
|
62
|
-
resp = await client.rollout(_request(seed, a))
|
|
63
|
-
m = resp.metrics
|
|
64
|
-
return {"seed": seed, "num_steps": m.num_steps, "episode_returns": m.episode_returns, "outcome_score": m.outcome_score}
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
async def main() -> None:
|
|
68
|
-
p = argparse.ArgumentParser(description=__doc__)
|
|
69
|
-
p.add_argument("--base-url", default=os.getenv("TASK_APP_URL", "http://localhost:8001"))
|
|
70
|
-
p.add_argument("--api-key", default=os.getenv("ENVIRONMENT_API_KEY"))
|
|
71
|
-
p.add_argument("--model", required=True, help="Base or ft:<id> to evaluate")
|
|
72
|
-
p.add_argument("--inference-url", default=os.getenv("INFERENCE_URL", "https://api.groq.com/openai"))
|
|
73
|
-
p.add_argument("--inference-api-key", default=os.getenv("GROQ_API_KEY"))
|
|
74
|
-
p.add_argument("--seeds", default="0,1,2,3,4,5,6,7,8,9")
|
|
75
|
-
p.add_argument("--max-llm-calls", type=int, default=10)
|
|
76
|
-
p.add_argument("--timeout", type=float, default=60.0)
|
|
77
|
-
args = p.parse_args()
|
|
78
|
-
|
|
79
|
-
seeds = [int(s) for s in str(args.seeds).split(",") if s.strip()]
|
|
80
|
-
a = EvalArgs(
|
|
81
|
-
base_url=str(args.base_url).strip(),
|
|
82
|
-
api_key=str(args.api_key or "").strip(),
|
|
83
|
-
model=str(args.model).strip(),
|
|
84
|
-
inference_url=str(args.inference_url).strip(),
|
|
85
|
-
inference_api_key=str(args.inference_api_key or "").strip(),
|
|
86
|
-
seeds=seeds,
|
|
87
|
-
max_llm_calls=int(args.max_llm_calls),
|
|
88
|
-
timeout=float(args.timeout),
|
|
89
|
-
)
|
|
90
|
-
if not a.api_key:
|
|
91
|
-
raise SystemExit("ENVIRONMENT_API_KEY is required")
|
|
92
|
-
if not a.inference_api_key:
|
|
93
|
-
raise SystemExit("Inference API key (e.g., GROQ_API_KEY) is required")
|
|
94
|
-
|
|
95
|
-
results: list[dict[str, Any]] = []
|
|
96
|
-
async with TaskAppClient(a.base_url, api_key=a.api_key, timeout=a.timeout) as client:
|
|
97
|
-
for seed in a.seeds:
|
|
98
|
-
r = await _eval_seed(client, seed, a)
|
|
99
|
-
results.append(r)
|
|
100
|
-
print(f"seed={seed} return={r.get('episode_returns')}")
|
|
101
|
-
|
|
102
|
-
# Simple aggregate
|
|
103
|
-
flat_returns: list[float] = []
|
|
104
|
-
for r in results:
|
|
105
|
-
ers = r.get("episode_returns") or []
|
|
106
|
-
if isinstance(ers, list) and ers:
|
|
107
|
-
try:
|
|
108
|
-
flat_returns.append(float(ers[0]))
|
|
109
|
-
except Exception:
|
|
110
|
-
pass
|
|
111
|
-
if flat_returns:
|
|
112
|
-
mean_ret = sum(flat_returns) / len(flat_returns)
|
|
113
|
-
print(f"mean_return={mean_ret:.3f} over {len(flat_returns)} episodes")
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
if __name__ == "__main__":
|
|
117
|
-
asyncio.run(main())
|
|
118
|
-
|
|
119
|
-
|