synth-ai 0.2.14__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/__init__.py +25 -46
- synth_ai/__main__.py +30 -3
- synth_ai/cli/__init__.py +98 -72
- synth_ai/cli/__main__.py +42 -0
- synth_ai/cli/_internal/__init__.py +5 -0
- synth_ai/cli/_internal/modal_wrapper.py +31 -0
- synth_ai/cli/_internal/storage.py +20 -0
- synth_ai/cli/_internal/typer_patch.py +47 -0
- synth_ai/cli/_internal/validate_task_app.py +29 -0
- synth_ai/cli/agents/__init__.py +17 -0
- synth_ai/cli/agents/claude.py +77 -0
- synth_ai/cli/agents/codex.py +265 -0
- synth_ai/cli/agents/opencode.py +253 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/artifacts/__init__.py +13 -0
- synth_ai/cli/commands/artifacts/client.py +119 -0
- synth_ai/cli/commands/artifacts/config.py +57 -0
- synth_ai/cli/commands/artifacts/core.py +24 -0
- synth_ai/cli/commands/artifacts/download.py +188 -0
- synth_ai/cli/commands/artifacts/export.py +186 -0
- synth_ai/cli/commands/artifacts/list.py +156 -0
- synth_ai/cli/commands/artifacts/parsing.py +250 -0
- synth_ai/cli/commands/artifacts/show.py +336 -0
- synth_ai/cli/commands/demo/__init__.py +3 -0
- synth_ai/cli/commands/demo/core.py +153 -0
- synth_ai/cli/commands/eval/__init__.py +10 -0
- synth_ai/cli/commands/eval/config.py +338 -0
- synth_ai/cli/commands/eval/core.py +258 -0
- synth_ai/cli/commands/eval/runner.py +704 -0
- synth_ai/cli/commands/eval/validation.py +60 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/scan/__init__.py +19 -0
- synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
- synth_ai/cli/commands/scan/core.py +344 -0
- synth_ai/cli/commands/scan/health_checker.py +242 -0
- synth_ai/cli/commands/scan/local_scanner.py +278 -0
- synth_ai/cli/commands/scan/models.py +83 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1428 -0
- synth_ai/cli/commands/status/__init__.py +3 -0
- synth_ai/cli/commands/status/client.py +91 -0
- synth_ai/cli/commands/status/config.py +12 -0
- synth_ai/cli/commands/status/errors.py +11 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
- synth_ai/cli/commands/status/subcommands/config.py +13 -0
- synth_ai/cli/commands/status/subcommands/files.py +34 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
- synth_ai/cli/commands/status/subcommands/models.py +35 -0
- synth_ai/cli/commands/status/subcommands/runs.py +34 -0
- synth_ai/cli/commands/status/subcommands/session.py +77 -0
- synth_ai/cli/commands/status/subcommands/summary.py +39 -0
- synth_ai/cli/commands/status/subcommands/utils.py +41 -0
- synth_ai/cli/commands/status/utils.py +23 -0
- synth_ai/cli/commands/train/__init__.py +51 -0
- synth_ai/cli/commands/train/core.py +22 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/prompt_learning_validation.py +632 -0
- synth_ai/cli/commands/train/validation.py +392 -0
- synth_ai/cli/commands/train/verifier_schemas.py +200 -0
- synth_ai/cli/commands/train/verifier_validation.py +235 -0
- synth_ai/cli/demo_apps/__init__.py +10 -0
- synth_ai/cli/demo_apps/core/__init__.py +28 -0
- synth_ai/cli/demo_apps/core/cli.py +1735 -0
- synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
- synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/cli/demo_apps/demo_registry.py +176 -0
- synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
- synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
- synth_ai/cli/demo_apps/math/__init__.py +1 -0
- synth_ai/cli/demo_apps/math/_common.py +16 -0
- synth_ai/cli/demo_apps/math/app.py +38 -0
- synth_ai/cli/demo_apps/math/config.toml +75 -0
- synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
- synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
- synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
- synth_ai/cli/demo_apps/mipro/main.py +271 -0
- synth_ai/cli/demo_apps/mipro/task_app.py +911 -0
- synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
- synth_ai/cli/demos/__init__.py +12 -0
- synth_ai/cli/demos/demo.py +32 -0
- synth_ai/cli/demos/rl_demo.py +254 -0
- synth_ai/cli/deploy.py +216 -0
- synth_ai/cli/infra/__init__.py +14 -0
- synth_ai/cli/infra/balance.py +216 -0
- synth_ai/cli/infra/mcp.py +35 -0
- synth_ai/cli/infra/modal_app.py +36 -0
- synth_ai/cli/infra/setup.py +69 -0
- synth_ai/cli/infra/status.py +16 -0
- synth_ai/cli/infra/turso.py +77 -0
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/agents.py +76 -0
- synth_ai/cli/lib/apps/modal_app.py +101 -0
- synth_ai/cli/lib/apps/task_app.py +642 -0
- synth_ai/cli/lib/bin.py +39 -0
- synth_ai/cli/lib/env.py +375 -0
- synth_ai/cli/lib/errors.py +85 -0
- synth_ai/cli/lib/modal.py +315 -0
- synth_ai/cli/lib/plotting.py +126 -0
- synth_ai/cli/lib/prompt_args.py +39 -0
- synth_ai/cli/lib/prompts.py +284 -0
- synth_ai/cli/lib/sqld.py +122 -0
- synth_ai/cli/lib/task_app_discovery.py +884 -0
- synth_ai/cli/lib/task_app_env.py +295 -0
- synth_ai/cli/lib/train_cfgs.py +300 -0
- synth_ai/cli/lib/tunnel_records.py +207 -0
- synth_ai/cli/local/__init__.py +14 -0
- synth_ai/cli/local/experiment_queue/__init__.py +72 -0
- synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
- synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
- synth_ai/cli/local/experiment_queue/config.py +128 -0
- synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
- synth_ai/cli/local/experiment_queue/database.py +175 -0
- synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
- synth_ai/cli/local/experiment_queue/models.py +231 -0
- synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
- synth_ai/cli/local/experiment_queue/results.py +373 -0
- synth_ai/cli/local/experiment_queue/schemas.py +131 -0
- synth_ai/cli/local/experiment_queue/service.py +344 -0
- synth_ai/cli/local/experiment_queue/status.py +372 -0
- synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
- synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
- synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
- synth_ai/cli/local/experiment_queue/validation.py +157 -0
- synth_ai/cli/local/session/__init__.py +92 -0
- synth_ai/cli/local/session/client.py +383 -0
- synth_ai/cli/local/session/constants.py +63 -0
- synth_ai/cli/local/session/exceptions.py +105 -0
- synth_ai/cli/local/session/manager.py +139 -0
- synth_ai/cli/local/session/models.py +89 -0
- synth_ai/cli/local/session/query.py +110 -0
- synth_ai/cli/root.py +30 -6
- synth_ai/cli/task_apps/__init__.py +37 -0
- synth_ai/cli/task_apps/commands.py +3145 -0
- synth_ai/cli/task_apps/deploy.py +7 -0
- synth_ai/cli/task_apps/list.py +26 -0
- synth_ai/cli/task_apps/main.py +36 -0
- synth_ai/cli/task_apps/modal_serve.py +11 -0
- synth_ai/cli/task_apps/serve.py +11 -0
- synth_ai/cli/training/__init__.py +8 -0
- synth_ai/cli/training/train.py +5 -0
- synth_ai/cli/training/train_cfg.py +34 -0
- synth_ai/cli/training/watch.py +506 -0
- synth_ai/cli/turso.py +34 -55
- synth_ai/cli/utils/__init__.py +8 -0
- synth_ai/cli/utils/experiments.py +235 -0
- synth_ai/cli/utils/queue.py +504 -0
- synth_ai/cli/utils/recent.py +133 -0
- synth_ai/cli/utils/traces.py +164 -0
- synth_ai/contracts/__init__.py +67 -0
- synth_ai/core/__init__.py +100 -0
- synth_ai/core/_utils/__init__.py +54 -0
- synth_ai/core/_utils/base_url.py +10 -0
- synth_ai/core/_utils/http.py +10 -0
- synth_ai/core/_utils/prompts.py +14 -0
- synth_ai/core/_utils/task_app_state.py +12 -0
- synth_ai/core/_utils/user_config.py +10 -0
- synth_ai/core/apps/common.py +116 -0
- synth_ai/core/auth.py +95 -0
- synth_ai/core/cfgs.py +240 -0
- synth_ai/core/config/__init__.py +16 -0
- synth_ai/core/config/base.py +168 -0
- synth_ai/core/config/resolver.py +89 -0
- synth_ai/core/env.py +231 -0
- synth_ai/core/errors.py +125 -0
- synth_ai/core/http.py +230 -0
- synth_ai/core/integrations/__init__.py +11 -0
- synth_ai/core/integrations/cloudflare.py +1886 -0
- synth_ai/core/integrations/mcp/__init__.py +6 -0
- synth_ai/core/integrations/mcp/__main__.py +8 -0
- synth_ai/core/integrations/mcp/claude.py +36 -0
- synth_ai/core/integrations/mcp/main.py +254 -0
- synth_ai/core/integrations/mcp/setup.py +100 -0
- synth_ai/core/integrations/modal.py +277 -0
- synth_ai/core/json.py +72 -0
- synth_ai/core/log_filter.py +99 -0
- synth_ai/core/logging.py +82 -0
- synth_ai/core/paths.py +107 -0
- synth_ai/core/pricing.py +109 -0
- synth_ai/core/process.py +233 -0
- synth_ai/core/ssl.py +25 -0
- synth_ai/core/storage/__init__.py +71 -0
- synth_ai/core/task_app_state.py +318 -0
- synth_ai/core/telemetry.py +282 -0
- synth_ai/core/tracing_v3/__init__.py +99 -0
- synth_ai/core/tracing_v3/abstractions.py +348 -0
- synth_ai/core/tracing_v3/config.py +229 -0
- synth_ai/core/tracing_v3/constants.py +21 -0
- synth_ai/core/tracing_v3/db_config.py +182 -0
- synth_ai/core/tracing_v3/decorators.py +401 -0
- synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
- synth_ai/core/tracing_v3/migration_helper.py +119 -0
- synth_ai/core/tracing_v3/session_tracer.py +542 -0
- synth_ai/core/tracing_v3/storage/base.py +211 -0
- synth_ai/core/tracing_v3/storage/config.py +109 -0
- synth_ai/core/tracing_v3/storage/factory.py +39 -0
- synth_ai/core/tracing_v3/trace_utils.py +326 -0
- synth_ai/core/tracing_v3/turso/daemon.py +278 -0
- synth_ai/core/tracing_v3/turso/models.py +470 -0
- synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
- synth_ai/core/tracing_v3/utils.py +108 -0
- synth_ai/core/urls.py +18 -0
- synth_ai/core/user_config.py +137 -0
- synth_ai/core/uvicorn.py +222 -0
- synth_ai/data/__init__.py +83 -0
- synth_ai/data/enums.py +122 -0
- synth_ai/data/rewards.py +249 -0
- synth_ai/data/traces.py +35 -0
- synth_ai/products/__init__.py +6 -0
- synth_ai/products/graph_evolve/__init__.py +45 -0
- synth_ai/products/graph_evolve/client.py +226 -0
- synth_ai/products/graph_evolve/config.py +591 -0
- synth_ai/products/graph_evolve/converters/__init__.py +42 -0
- synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
- synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
- synth_ai/products/graph_evolve/run.py +222 -0
- synth_ai/products/graph_gepa/__init__.py +23 -0
- synth_ai/products/graph_gepa/converters/__init__.py +19 -0
- synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
- synth_ai/sdk/__init__.py +129 -0
- synth_ai/sdk/api/__init__.py +1 -0
- synth_ai/sdk/api/eval/__init__.py +33 -0
- synth_ai/sdk/api/eval/job.py +732 -0
- synth_ai/sdk/api/models/supported.py +514 -0
- synth_ai/sdk/api/research_agent/__init__.py +296 -0
- synth_ai/sdk/api/train/__init__.py +85 -0
- synth_ai/sdk/api/train/builders.py +1076 -0
- synth_ai/sdk/api/train/cli.py +2196 -0
- synth_ai/sdk/api/train/config_finder.py +267 -0
- synth_ai/sdk/api/train/configs/__init__.py +67 -0
- synth_ai/sdk/api/train/configs/prompt_learning.py +1800 -0
- synth_ai/sdk/api/train/configs/rl.py +436 -0
- synth_ai/sdk/api/train/configs/sft.py +263 -0
- synth_ai/sdk/api/train/configs/shared.py +81 -0
- synth_ai/sdk/api/train/context_learning.py +312 -0
- synth_ai/sdk/api/train/env_resolver.py +418 -0
- synth_ai/sdk/api/train/graph_validators.py +216 -0
- synth_ai/sdk/api/train/graphgen.py +1102 -0
- synth_ai/sdk/api/train/graphgen_models.py +873 -0
- synth_ai/sdk/api/train/graphgen_validators.py +109 -0
- synth_ai/sdk/api/train/local_api.py +10 -0
- synth_ai/sdk/api/train/pollers.py +160 -0
- synth_ai/sdk/api/train/progress/__init__.py +97 -0
- synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
- synth_ai/sdk/api/train/progress/events.py +326 -0
- synth_ai/sdk/api/train/progress/results.py +428 -0
- synth_ai/sdk/api/train/progress/tracker.py +641 -0
- synth_ai/sdk/api/train/prompt_learning.py +800 -0
- synth_ai/sdk/api/train/rl.py +478 -0
- synth_ai/sdk/api/train/sft.py +398 -0
- synth_ai/sdk/api/train/summary.py +522 -0
- synth_ai/sdk/api/train/supported_algos.py +147 -0
- synth_ai/sdk/api/train/task_app.py +351 -0
- synth_ai/sdk/api/train/utils.py +279 -0
- synth_ai/sdk/api/train/validators.py +2424 -0
- synth_ai/sdk/graphs/__init__.py +15 -0
- synth_ai/sdk/graphs/completions.py +776 -0
- synth_ai/sdk/graphs/verifier_schemas.py +222 -0
- synth_ai/sdk/inference/__init__.py +6 -0
- synth_ai/sdk/inference/client.py +128 -0
- synth_ai/sdk/jobs/__init__.py +16 -0
- synth_ai/sdk/jobs/client.py +371 -0
- synth_ai/sdk/learning/__init__.py +99 -0
- synth_ai/sdk/learning/client.py +240 -0
- synth_ai/sdk/learning/context_learning_client.py +531 -0
- synth_ai/sdk/learning/context_learning_types.py +294 -0
- synth_ai/sdk/learning/ft_client.py +7 -0
- synth_ai/sdk/learning/health.py +49 -0
- synth_ai/sdk/learning/jobs.py +202 -0
- synth_ai/sdk/learning/prompt_extraction.py +334 -0
- synth_ai/sdk/learning/prompt_learning_client.py +455 -0
- synth_ai/sdk/learning/prompt_learning_types.py +186 -0
- synth_ai/sdk/learning/rl/__init__.py +35 -0
- synth_ai/sdk/learning/rl/client.py +268 -0
- synth_ai/sdk/learning/rl/contracts.py +23 -0
- synth_ai/sdk/learning/rl/env_keys.py +166 -0
- synth_ai/sdk/learning/rl/secrets.py +13 -0
- synth_ai/sdk/learning/sft/client.py +95 -0
- synth_ai/sdk/learning/sft/config.py +270 -0
- synth_ai/sdk/learning/sft/data.py +698 -0
- synth_ai/sdk/learning/validators.py +52 -0
- synth_ai/sdk/localapi/__init__.py +40 -0
- synth_ai/sdk/localapi/apps/__init__.py +28 -0
- synth_ai/sdk/localapi/client.py +10 -0
- synth_ai/sdk/localapi/contracts.py +10 -0
- synth_ai/sdk/localapi/helpers.py +519 -0
- synth_ai/sdk/localapi/rollouts.py +93 -0
- synth_ai/sdk/localapi/server.py +29 -0
- synth_ai/sdk/localapi/template.py +49 -0
- synth_ai/sdk/streaming/__init__.py +35 -0
- synth_ai/sdk/streaming/config.py +94 -0
- synth_ai/sdk/streaming/handlers.py +1997 -0
- synth_ai/sdk/streaming/streamer.py +708 -0
- synth_ai/sdk/streaming/types.py +112 -0
- synth_ai/sdk/task/__init__.py +164 -0
- synth_ai/sdk/task/apps/__init__.py +169 -0
- synth_ai/sdk/task/client.py +175 -0
- synth_ai/sdk/task/config.py +256 -0
- synth_ai/sdk/task/contracts.py +340 -0
- synth_ai/sdk/task/datasets.py +108 -0
- synth_ai/sdk/task/in_process.py +1200 -0
- synth_ai/sdk/task/in_process_runner.py +314 -0
- synth_ai/sdk/task/inference_api.py +299 -0
- synth_ai/sdk/task/proxy.py +287 -0
- synth_ai/sdk/task/rubrics/__init__.py +54 -0
- synth_ai/sdk/task/rubrics/loaders.py +156 -0
- synth_ai/sdk/task/rubrics/strict.py +148 -0
- synth_ai/sdk/task/rubrics.py +219 -0
- synth_ai/sdk/task/server.py +640 -0
- synth_ai/sdk/task/trace_correlation_helpers.py +557 -0
- synth_ai/sdk/task/tracing_utils.py +95 -0
- synth_ai/sdk/task/validators.py +441 -0
- synth_ai/sdk/training/__init__.py +93 -0
- synth_ai/sdk/tunnels/__init__.py +118 -0
- synth_ai/sdk/tunnels/cleanup.py +83 -0
- synth_ai/sdk/tunnels/ports.py +120 -0
- synth_ai/sdk/tunnels/tunneled_api.py +363 -0
- synth_ai/utils/__init__.py +213 -0
- synth_ai-0.4.4.dist-info/METADATA +262 -0
- synth_ai-0.4.4.dist-info/RECORD +369 -0
- synth_ai-0.4.4.dist-info/top_level.txt +1 -0
- examples/__init__.py +0 -16
- examples/analyze_semantic_words.sh +0 -17
- examples/crafter_debug_render.py +0 -186
- examples/dev/qwen3_32b_qlora_4xh100.toml +0 -40
- examples/multi_step/configs/README_verilog_rl.md +0 -77
- examples/multi_step/configs/VERILOG_REWARDS.md +0 -90
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +0 -183
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +0 -35
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +0 -36
- examples/multi_step/configs/crafter_rl_outcome.toml +0 -74
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +0 -187
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +0 -83
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +0 -78
- examples/multi_step/configs/crafter_synth_backend.md +0 -40
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +0 -31
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +0 -33
- examples/multi_step/configs/verilog_rl_lora.toml +0 -190
- examples/multi_step/crafter_rl_lora.md +0 -70
- examples/multi_step/judges/crafter_backend_judge.py +0 -220
- examples/multi_step/judges/verilog_backend_judge.py +0 -234
- examples/multi_step/readme.md +0 -48
- examples/multi_step/sse_metrics_streaming_notes.md +0 -357
- examples/multi_step/task_app_config_notes.md +0 -494
- examples/multi_step/verilog_rl_lora.md +0 -218
- examples/qwen_coder/README.md +0 -102
- examples/qwen_coder/_shared.py +0 -113
- examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
- examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
- examples/qwen_coder/configs/coder_lora_small.toml +0 -58
- examples/qwen_coder/generate_dataset.py +0 -98
- examples/qwen_coder/infer_ft_smoke.py +0 -65
- examples/qwen_coder/infer_prod_proxy.py +0 -73
- examples/qwen_coder/infer_via_synth.py +0 -87
- examples/qwen_coder/scripts/infer_coder.sh +0 -19
- examples/qwen_coder/scripts/train_coder_30b.sh +0 -22
- examples/qwen_coder/sft_full_17b.py +0 -103
- examples/qwen_coder/sft_lora_30b.py +0 -110
- examples/qwen_coder/subset_jsonl.py +0 -39
- examples/qwen_coder/todos.md +0 -38
- examples/qwen_coder/validate_jsonl.py +0 -60
- examples/rl/README.md +0 -169
- examples/rl/download_dataset.py +0 -80
- examples/run_crafter_demo.sh +0 -10
- examples/sft/README.md +0 -139
- examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
- examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
- examples/sft/evaluate.py +0 -119
- examples/sft/export_dataset.py +0 -117
- examples/sft/generate_traces.py +0 -164
- examples/swe/__init__.py +0 -12
- examples/swe/task_app/README.md +0 -105
- examples/swe/task_app/__init__.py +0 -2
- examples/swe/task_app/grpo_swe_mini.py +0 -601
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
- examples/swe/task_app/hosted/README.md +0 -173
- examples/swe/task_app/hosted/__init__.py +0 -5
- examples/swe/task_app/hosted/branching.py +0 -143
- examples/swe/task_app/hosted/environment_routes.py +0 -1289
- examples/swe/task_app/hosted/envs/__init__.py +0 -1
- examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
- examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
- examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
- examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
- examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
- examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
- examples/swe/task_app/hosted/hosted_app.py +0 -204
- examples/swe/task_app/hosted/inference/__init__.py +0 -5
- examples/swe/task_app/hosted/inference/openai_client.py +0 -618
- examples/swe/task_app/hosted/main.py +0 -100
- examples/swe/task_app/hosted/policy_routes.py +0 -1079
- examples/swe/task_app/hosted/registry.py +0 -195
- examples/swe/task_app/hosted/rollout.py +0 -1911
- examples/swe/task_app/hosted/storage/__init__.py +0 -5
- examples/swe/task_app/hosted/storage/volume.py +0 -211
- examples/swe/task_app/hosted/test_agents.py +0 -161
- examples/swe/task_app/hosted/test_service.py +0 -136
- examples/swe/task_app/hosted/utils.py +0 -62
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +0 -258
- examples/task_apps/TESTING.md +0 -275
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +0 -273
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +0 -152
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +0 -174
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +0 -268
- examples/task_apps/crafter/QUERY_EXAMPLES.md +0 -203
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +0 -316
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +0 -28
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +0 -36
- examples/task_apps/crafter/filter_sft_dataset.toml +0 -16
- examples/task_apps/crafter/task_app/README.md +0 -42
- examples/task_apps/crafter/task_app/__init__.py +0 -5
- examples/task_apps/crafter/task_app/grpo_crafter.py +0 -973
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +0 -146
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +0 -173
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +0 -5
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +0 -143
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +0 -1226
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +0 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -532
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -547
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -123
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +0 -204
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +0 -5
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +0 -704
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +0 -100
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +0 -1152
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +0 -195
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +0 -2160
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +0 -5
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +0 -211
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +0 -161
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +0 -136
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +0 -218
- examples/task_apps/dev/pokemon_emerald/__init__.py +0 -2
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +0 -811
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +0 -120
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +0 -160
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +0 -155
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +0 -69
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +0 -96
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +0 -1502
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +0 -4
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +0 -68
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +0 -216
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +0 -35
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +0 -631
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +0 -1544
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +0 -1428
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +0 -4848
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +0 -41
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +0 -298
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +0 -95
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +0 -204
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +0 -2152
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +0 -429
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +0 -155
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +0 -78
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +0 -122
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +0 -76
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +0 -413
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +0 -204
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +0 -133
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +0 -229
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +0 -300
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +0 -205
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +0 -200
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +0 -284
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +0 -468
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +0 -575
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +0 -311
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +0 -259
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +0 -372
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +0 -296
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +0 -275
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +0 -22
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +0 -44
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +0 -514
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +0 -415
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +0 -1763
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +0 -33
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +0 -106
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +0 -334
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +0 -1020
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +0 -188
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +0 -1481
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +0 -862
- examples/task_apps/dev/pokemon_emerald/modal_app.py +0 -114
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +0 -81
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +0 -6
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +0 -685
- examples/task_apps/enron/__init__.py +0 -1
- examples/task_apps/enron/eval_groq_qwen32.toml +0 -16
- examples/task_apps/enron/filter_sft.toml +0 -5
- examples/task_apps/enron/task_app/README.md +0 -14
- examples/task_apps/enron/task_app/__init__.py +0 -1
- examples/task_apps/enron/task_app/grpo_enron.py +0 -906
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +0 -146
- examples/task_apps/enron/tests/__init__.py +0 -4
- examples/task_apps/enron/tests/conftest.py +0 -115
- examples/task_apps/enron/tests/integration/__init__.py +0 -4
- examples/task_apps/enron/tests/integration/test_enron_eval.py +0 -179
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +0 -135
- examples/task_apps/enron/tests/unit/__init__.py +0 -4
- examples/task_apps/enron/tests/unit/test_enron_environment.py +0 -126
- examples/task_apps/math/README.md +0 -22
- examples/task_apps/math/__init__.py +0 -0
- examples/task_apps/math/math_single_step.py +0 -1000
- examples/task_apps/math/math_task_app.py +0 -115
- examples/task_apps/pokemon_battle/__init__.py +0 -2
- examples/task_apps/pokemon_battle/modal_app.py +0 -104
- examples/task_apps/pokemon_battle/task_app/README.md +0 -68
- examples/task_apps/pokemon_battle/task_app/__init__.py +0 -6
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +0 -932
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +0 -283
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +0 -155
- examples/task_apps/pokemon_red/README.md +0 -357
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +0 -415
- examples/task_apps/pokemon_red/__init__.py +0 -3
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +0 -29
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +0 -225
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +0 -75
- examples/task_apps/pokemon_red/task_app.py +0 -799
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +0 -193
- examples/task_apps/sokoban/README.md +0 -307
- examples/task_apps/sokoban/__init__.py +0 -3
- examples/task_apps/sokoban/eval_groq_qwen32.toml +0 -16
- examples/task_apps/sokoban/eval_openai_gpt5.toml +0 -16
- examples/task_apps/sokoban/filter_sft.toml +0 -5
- examples/task_apps/sokoban/task_app.py +0 -1058
- examples/task_apps/sokoban/tests/__init__.py +0 -4
- examples/task_apps/sokoban/tests/conftest.py +0 -113
- examples/task_apps/sokoban/tests/integration/__init__.py +0 -4
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +0 -57
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +0 -198
- examples/task_apps/sokoban/tests/unit/__init__.py +0 -4
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +0 -114
- examples/task_apps/verilog/__init__.py +0 -1
- examples/task_apps/verilog/eval_groq_qwen32b.toml +0 -24
- examples/task_apps/verilog/filter_sft.toml +0 -5
- examples/task_apps/verilog/task_app/README.md +0 -12
- examples/task_apps/verilog/task_app/__init__.py +0 -1
- examples/task_apps/verilog/task_app/grpo_verilog.py +0 -1166
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +0 -145
- examples/task_apps/verilog/tests/__init__.py +0 -4
- examples/task_apps/verilog/tests/conftest.py +0 -115
- examples/task_apps/verilog/tests/integration/__init__.py +0 -4
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +0 -181
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +0 -55
- examples/task_apps/verilog/tests/unit/__init__.py +0 -4
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +0 -118
- examples/vlm/PROPOSAL.md +0 -53
- examples/vlm/README.md +0 -68
- examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
- examples/vlm/crafter_image_only_agent.py +0 -207
- examples/vlm/crafter_openai_vlm_agent.py +0 -277
- examples/vlm/filter_image_rows.py +0 -63
- examples/vlm/run_crafter_vlm_benchmark.py +0 -316
- examples/warming_up_to_rl/analyze_trace_db.py +0 -422
- examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +0 -35
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +0 -26
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +0 -36
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +0 -32
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
- examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
- examples/warming_up_to_rl/export_trace_sft.py +0 -723
- examples/warming_up_to_rl/groq_test.py +0 -97
- examples/warming_up_to_rl/manage_secrets.py +0 -131
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- examples/warming_up_to_rl/readme.md +0 -179
- examples/warming_up_to_rl/run_eval.py +0 -736
- examples/warming_up_to_rl/run_fft_and_save.py +0 -380
- examples/warming_up_to_rl/run_local_rollout.py +0 -239
- examples/warming_up_to_rl/run_local_rollout_modal.py +0 -248
- examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -405
- examples/warming_up_to_rl/run_local_rollout_traced.py +0 -477
- examples/warming_up_to_rl/run_rl_and_save.py +0 -124
- examples/warming_up_to_rl/run_rollout_remote.py +0 -156
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +0 -15
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +0 -11
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +0 -35
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +0 -74
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +0 -35
- examples/workflows/math_rl/download_dataset.py +0 -80
- examples/workflows/math_rl/run_eval.py +0 -436
- examples/workflows/math_rl/run_rl_and_save.py +0 -111
- synth_ai/api/models/supported.py +0 -377
- synth_ai/api/train/__init__.py +0 -5
- synth_ai/api/train/builders.py +0 -351
- synth_ai/api/train/cli.py +0 -635
- synth_ai/api/train/config_finder.py +0 -228
- synth_ai/api/train/configs/__init__.py +0 -44
- synth_ai/api/train/configs/rl.py +0 -134
- synth_ai/api/train/configs/sft.py +0 -95
- synth_ai/api/train/configs/shared.py +0 -24
- synth_ai/api/train/env_resolver.py +0 -349
- synth_ai/api/train/pollers.py +0 -75
- synth_ai/api/train/supported_algos.py +0 -147
- synth_ai/api/train/task_app.py +0 -195
- synth_ai/api/train/utils.py +0 -225
- synth_ai/cli/_modal_wrapper.py +0 -29
- synth_ai/cli/_storage.py +0 -20
- synth_ai/cli/_typer_patch.py +0 -49
- synth_ai/cli/_validate_task_app.py +0 -11
- synth_ai/cli/balance.py +0 -216
- synth_ai/cli/calc.py +0 -84
- synth_ai/cli/demo.py +0 -165
- synth_ai/cli/legacy_root_backup.py +0 -468
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/recent.py +0 -132
- synth_ai/cli/rl_demo.py +0 -254
- synth_ai/cli/status.py +0 -134
- synth_ai/cli/task_apps.py +0 -4523
- synth_ai/cli/traces.py +0 -164
- synth_ai/cli/tui.py +0 -57
- synth_ai/cli/watch.py +0 -506
- synth_ai/compound/cais.py +0 -0
- synth_ai/config/base_url.py +0 -107
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/demos/core/__init__.py +0 -1
- synth_ai/demos/core/cli.py +0 -1718
- synth_ai/demos/demo_task_apps/core.py +0 -440
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -184
- synth_ai/demos/demo_task_apps/math/config.toml +0 -74
- synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
- synth_ai/environments/__init__.py +0 -31
- synth_ai/environments/environment/__init__.py +0 -1
- synth_ai/environments/environment/artifacts/__init__.py +0 -1
- synth_ai/environments/environment/artifacts/base.py +0 -52
- synth_ai/environments/environment/core.py +0 -67
- synth_ai/environments/environment/db/__init__.py +0 -1
- synth_ai/environments/environment/db/sqlite.py +0 -45
- synth_ai/environments/environment/registry.py +0 -233
- synth_ai/environments/environment/resources/sqlite.py +0 -45
- synth_ai/environments/environment/results.py +0 -1
- synth_ai/environments/environment/rewards/__init__.py +0 -1
- synth_ai/environments/environment/rewards/core.py +0 -29
- synth_ai/environments/environment/shared_engine.py +0 -26
- synth_ai/environments/environment/tools/__init__.py +0 -200
- synth_ai/environments/examples/__init__.py +0 -1
- synth_ai/environments/examples/bandit/__init__.py +0 -33
- synth_ai/environments/examples/bandit/engine.py +0 -302
- synth_ai/environments/examples/bandit/environment.py +0 -194
- synth_ai/environments/examples/bandit/taskset.py +0 -200
- synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
- synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
- synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
- synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
- synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
- synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
- synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
- synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
- synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
- synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
- synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
- synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
- synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
- synth_ai/environments/examples/crafter_classic/engine.py +0 -579
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
- synth_ai/environments/examples/crafter_classic/environment.py +0 -495
- synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
- synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
- synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
- synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
- synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
- synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
- synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
- synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
- synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
- synth_ai/environments/examples/crafter_custom/environment.py +0 -312
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
- synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
- synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
- synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
- synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
- synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
- synth_ai/environments/examples/enron/engine.py +0 -300
- synth_ai/environments/examples/enron/environment.py +0 -234
- synth_ai/environments/examples/enron/taskset.py +0 -112
- synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
- synth_ai/environments/examples/minigrid/__init__.py +0 -48
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
- synth_ai/environments/examples/minigrid/engine.py +0 -589
- synth_ai/environments/examples/minigrid/environment.py +0 -274
- synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
- synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
- synth_ai/environments/examples/minigrid/taskset.py +0 -583
- synth_ai/environments/examples/nethack/__init__.py +0 -7
- synth_ai/environments/examples/nethack/achievements.py +0 -337
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
- synth_ai/environments/examples/nethack/engine.py +0 -739
- synth_ai/environments/examples/nethack/environment.py +0 -256
- synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
- synth_ai/environments/examples/nethack/taskset.py +0 -323
- synth_ai/environments/examples/red/__init__.py +0 -7
- synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
- synth_ai/environments/examples/red/config_logging.py +0 -110
- synth_ai/environments/examples/red/engine.py +0 -721
- synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -35
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +0 -477
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -172
- synth_ai/environments/examples/red/environment.py +0 -298
- synth_ai/environments/examples/red/taskset.py +0 -79
- synth_ai/environments/examples/red/units/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
- synth_ai/environments/examples/sokoban/engine.py +0 -678
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
- synth_ai/environments/examples/sokoban/environment.py +0 -229
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
- synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
- synth_ai/environments/examples/sokoban/taskset.py +0 -544
- synth_ai/environments/examples/tictactoe/__init__.py +0 -1
- synth_ai/environments/examples/tictactoe/engine.py +0 -368
- synth_ai/environments/examples/tictactoe/environment.py +0 -240
- synth_ai/environments/examples/tictactoe/taskset.py +0 -215
- synth_ai/environments/examples/verilog/__init__.py +0 -10
- synth_ai/environments/examples/verilog/engine.py +0 -421
- synth_ai/environments/examples/verilog/environment.py +0 -350
- synth_ai/environments/examples/verilog/taskset.py +0 -420
- synth_ai/environments/examples/wordle/__init__.py +0 -29
- synth_ai/environments/examples/wordle/engine.py +0 -398
- synth_ai/environments/examples/wordle/environment.py +0 -159
- synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
- synth_ai/environments/examples/wordle/taskset.py +0 -230
- synth_ai/environments/reproducibility/core.py +0 -42
- synth_ai/environments/reproducibility/helpers.py +0 -0
- synth_ai/environments/reproducibility/tree.py +0 -363
- synth_ai/environments/service/app.py +0 -97
- synth_ai/environments/service/core_routes.py +0 -1021
- synth_ai/environments/service/external_registry.py +0 -56
- synth_ai/environments/service/registry.py +0 -9
- synth_ai/environments/stateful/__init__.py +0 -1
- synth_ai/environments/stateful/core.py +0 -163
- synth_ai/environments/stateful/engine.py +0 -21
- synth_ai/environments/stateful/state.py +0 -7
- synth_ai/environments/tasks/api.py +0 -19
- synth_ai/environments/tasks/core.py +0 -81
- synth_ai/environments/tasks/filters.py +0 -40
- synth_ai/environments/tasks/utils.py +0 -90
- synth_ai/environments/v0_observability/history.py +0 -3
- synth_ai/environments/v0_observability/log.py +0 -2
- synth_ai/evals/__init__.py +0 -15
- synth_ai/evals/base.py +0 -13
- synth_ai/evals/client.py +0 -82
- synth_ai/evals/types.py +0 -42
- synth_ai/handshake.py +0 -109
- synth_ai/http.py +0 -26
- synth_ai/http_client.py +0 -136
- synth_ai/inference/__init__.py +0 -5
- synth_ai/inference/client.py +0 -34
- synth_ai/jobs/client.py +0 -295
- synth_ai/judge_schemas.py +0 -127
- synth_ai/learning/__init__.py +0 -59
- synth_ai/learning/client.py +0 -241
- synth_ai/learning/ft_client.py +0 -7
- synth_ai/learning/health.py +0 -49
- synth_ai/learning/jobs.py +0 -201
- synth_ai/learning/rl/__init__.py +0 -39
- synth_ai/learning/rl/client.py +0 -267
- synth_ai/learning/rl/contracts.py +0 -27
- synth_ai/learning/rl/env_keys.py +0 -166
- synth_ai/learning/rl/secrets.py +0 -13
- synth_ai/learning/sft/client.py +0 -68
- synth_ai/learning/sft/config.py +0 -270
- synth_ai/learning/sft/data.py +0 -295
- synth_ai/learning/validators.py +0 -49
- synth_ai/lm/__init__.py +0 -25
- synth_ai/task/__init__.py +0 -121
- synth_ai/task/apps/__init__.py +0 -129
- synth_ai/task/client.py +0 -167
- synth_ai/task/config.py +0 -257
- synth_ai/task/contracts.py +0 -236
- synth_ai/task/datasets.py +0 -108
- synth_ai/task/proxy.py +0 -251
- synth_ai/task/rubrics/__init__.py +0 -56
- synth_ai/task/rubrics/loaders.py +0 -152
- synth_ai/task/rubrics/strict.py +0 -149
- synth_ai/task/server.py +0 -432
- synth_ai/task/trace_correlation_helpers.py +0 -315
- synth_ai/task/tracing_utils.py +0 -84
- synth_ai/task/validators.py +0 -418
- synth_ai/tracing_v3/__init__.py +0 -97
- synth_ai/tracing_v3/abstractions.py +0 -302
- synth_ai/tracing_v3/config.py +0 -84
- synth_ai/tracing_v3/db_config.py +0 -194
- synth_ai/tracing_v3/decorators.py +0 -398
- synth_ai/tracing_v3/llm_call_record_helpers.py +0 -391
- synth_ai/tracing_v3/migration_helper.py +0 -120
- synth_ai/tracing_v3/session_tracer.py +0 -540
- synth_ai/tracing_v3/storage/base.py +0 -210
- synth_ai/tracing_v3/storage/config.py +0 -75
- synth_ai/tracing_v3/storage/factory.py +0 -39
- synth_ai/tracing_v3/trace_utils.py +0 -317
- synth_ai/tracing_v3/turso/daemon.py +0 -151
- synth_ai/tracing_v3/turso/models.py +0 -469
- synth_ai/tracing_v3/turso/native_manager.py +0 -1209
- synth_ai/tracing_v3/utils.py +0 -108
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -906
- synth_ai/v0/api/__init__.py +0 -8
- synth_ai/v0/api/models/__init__.py +0 -8
- synth_ai/v0/api/models/supported.py +0 -8
- synth_ai/v0/config/__init__.py +0 -15
- synth_ai/v0/config/base_url.py +0 -12
- synth_ai/v0/lm/__init__.py +0 -51
- synth_ai/v0/lm/caching/__init__.py +0 -0
- synth_ai/v0/lm/caching/constants.py +0 -6
- synth_ai/v0/lm/caching/dbs.py +0 -0
- synth_ai/v0/lm/caching/ephemeral.py +0 -100
- synth_ai/v0/lm/caching/handler.py +0 -137
- synth_ai/v0/lm/caching/initialize.py +0 -11
- synth_ai/v0/lm/caching/persistent.py +0 -114
- synth_ai/v0/lm/config.py +0 -115
- synth_ai/v0/lm/constants.py +0 -32
- synth_ai/v0/lm/core/__init__.py +0 -8
- synth_ai/v0/lm/core/all.py +0 -73
- synth_ai/v0/lm/core/exceptions.py +0 -5
- synth_ai/v0/lm/core/main.py +0 -331
- synth_ai/v0/lm/core/main_v3.py +0 -594
- synth_ai/v0/lm/core/synth_models.py +0 -35
- synth_ai/v0/lm/core/vendor_clients.py +0 -190
- synth_ai/v0/lm/cost/__init__.py +0 -0
- synth_ai/v0/lm/cost/monitor.py +0 -1
- synth_ai/v0/lm/cost/statefulness.py +0 -1
- synth_ai/v0/lm/injection.py +0 -80
- synth_ai/v0/lm/overrides.py +0 -206
- synth_ai/v0/lm/provider_support/__init__.py +0 -8
- synth_ai/v0/lm/provider_support/anthropic.py +0 -972
- synth_ai/v0/lm/provider_support/openai.py +0 -1139
- synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
- synth_ai/v0/lm/structured_outputs/handler.py +0 -440
- synth_ai/v0/lm/structured_outputs/inject.py +0 -297
- synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/v0/lm/tools/__init__.py +0 -3
- synth_ai/v0/lm/tools/base.py +0 -172
- synth_ai/v0/lm/unified_interface.py +0 -202
- synth_ai/v0/lm/vendors/__init__.py +0 -0
- synth_ai/v0/lm/vendors/base.py +0 -81
- synth_ai/v0/lm/vendors/core/__init__.py +0 -0
- synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
- synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/v0/lm/vendors/local/__init__.py +0 -0
- synth_ai/v0/lm/vendors/local/ollama.py +0 -0
- synth_ai/v0/lm/vendors/openai_standard.py +0 -782
- synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
- synth_ai/v0/lm/vendors/retries.py +0 -22
- synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
- synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
- synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/v0/lm/vendors/supported/grok.py +0 -75
- synth_ai/v0/lm/vendors/supported/groq.py +0 -16
- synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
- synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/v0/lm/vendors/supported/together.py +0 -11
- synth_ai/v0/lm/vendors/synth_client.py +0 -835
- synth_ai/v0/lm/warmup.py +0 -186
- synth_ai/v0/tracing/__init__.py +0 -0
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -409
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/v0/tracing_v3/__init__.py +0 -10
- synth_ai/v0/tracing_v3/abstractions.py +0 -3
- synth_ai/v0/tracing_v3/decorators.py +0 -3
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
- synth_ai/v0/tracing_v3/session_tracer.py +0 -3
- synth_ai-0.2.14.dist-info/METADATA +0 -139
- synth_ai-0.2.14.dist-info/RECORD +0 -762
- synth_ai-0.2.14.dist-info/top_level.txt +0 -2
- /synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
- {examples/task_apps → synth_ai/core/apps}/__init__.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/serialization.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/turso/__init__.py +0 -0
- /synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
- /synth_ai/{learning → sdk/learning}/config.py +0 -0
- /synth_ai/{learning → sdk/learning}/constants.py +0 -0
- /synth_ai/{learning → sdk/learning}/core.py +0 -0
- /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
- /synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
- /synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
- /synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
- /synth_ai/{learning → sdk/learning}/sse.py +0 -0
- /synth_ai/{task → sdk/task}/auth.py +0 -0
- /synth_ai/{task → sdk/task}/errors.py +0 -0
- /synth_ai/{task → sdk/task}/health.py +0 -0
- /synth_ai/{task → sdk/task}/json.py +0 -0
- /synth_ai/{task → sdk/task}/rubrics/models.py +0 -0
- /synth_ai/{task → sdk/task}/rubrics/scoring.py +0 -0
- /synth_ai/{task → sdk/task}/vendors.py +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1997 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
import time
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from collections import deque
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Callable
|
|
12
|
+
|
|
13
|
+
import click
|
|
14
|
+
|
|
15
|
+
from .types import StreamMessage, StreamType
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _mask_sensitive_urls(text: str) -> str:
|
|
19
|
+
"""Mask S3/Wasabi URLs and sensitive paths in log messages.
|
|
20
|
+
|
|
21
|
+
Replaces full S3/Wasabi URLs with masked versions to prevent leaking
|
|
22
|
+
bucket names, paths, and infrastructure details in public SDK logs.
|
|
23
|
+
|
|
24
|
+
Examples:
|
|
25
|
+
s3://synth-artifacts/models/... -> s3://***/***/[masked]
|
|
26
|
+
Wasabi s3://bucket/path/file.tar.gz -> Wasabi s3://***/***/[masked]
|
|
27
|
+
"""
|
|
28
|
+
if not text:
|
|
29
|
+
return text
|
|
30
|
+
|
|
31
|
+
# Pattern matches:
|
|
32
|
+
# - Optional "Wasabi " prefix
|
|
33
|
+
# - s3:// or http(s):// scheme
|
|
34
|
+
# - Any bucket/host
|
|
35
|
+
# - Any path
|
|
36
|
+
# - Common model file extensions
|
|
37
|
+
pattern = r'(Wasabi\s+)?((s3|https?)://[^\s]+\.(tar\.gz|zip|pt|pth|safetensors|ckpt|bin))'
|
|
38
|
+
|
|
39
|
+
def replace_url(match: re.Match) -> str:
|
|
40
|
+
prefix = match.group(1) or "" # "Wasabi " or empty
|
|
41
|
+
url = match.group(2)
|
|
42
|
+
# Extract just the filename
|
|
43
|
+
filename = url.split("/")[-1] if "/" in url else "file"
|
|
44
|
+
return f'{prefix}s3://***/***/[{filename}]'
|
|
45
|
+
|
|
46
|
+
return re.sub(pattern, replace_url, text, flags=re.IGNORECASE)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class StreamHandler(ABC):
|
|
50
|
+
"""Base class for log handlers that consume ``StreamMessage`` objects."""
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def handle(self, message: StreamMessage) -> None:
|
|
54
|
+
"""Process a message produced by the streamer."""
|
|
55
|
+
|
|
56
|
+
def should_handle(self, message: StreamMessage) -> bool: # pragma: no cover - trivial
|
|
57
|
+
"""Predicate allowing handlers to filter messages before processing."""
|
|
58
|
+
return True
|
|
59
|
+
|
|
60
|
+
def flush(self) -> None: # pragma: no cover - optional
|
|
61
|
+
"""Flush buffered output."""
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class CLIHandler(StreamHandler):
|
|
66
|
+
"""Simple CLI output mirroring current poller behaviour."""
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
*,
|
|
71
|
+
hidden_event_types: set[str] | None = None,
|
|
72
|
+
hidden_event_substrings: set[str] | None = None,
|
|
73
|
+
) -> None:
|
|
74
|
+
self._hidden_event_types = set(hidden_event_types or set())
|
|
75
|
+
self._hidden_event_substrings = {s.lower() for s in (hidden_event_substrings or set())}
|
|
76
|
+
|
|
77
|
+
def handle(self, message: StreamMessage) -> None:
|
|
78
|
+
if not self.should_handle(message):
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
82
|
+
if message.stream_type is StreamType.STATUS:
|
|
83
|
+
status = str(message.data.get("status") or message.data.get("state") or "unknown")
|
|
84
|
+
click.echo(f"[{timestamp}] status={status}")
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
if message.stream_type is StreamType.EVENTS:
|
|
88
|
+
event_type = message.data.get("type", "event")
|
|
89
|
+
if event_type in self._hidden_event_types:
|
|
90
|
+
return
|
|
91
|
+
level = message.data.get("level")
|
|
92
|
+
msg = message.data.get("message") or ""
|
|
93
|
+
# Evaluate substring filters against lower-cased concatenated text
|
|
94
|
+
if self._hidden_event_substrings:
|
|
95
|
+
blob = " ".join(
|
|
96
|
+
[
|
|
97
|
+
event_type or "",
|
|
98
|
+
str(msg),
|
|
99
|
+
json.dumps(message.data.get("data", "")),
|
|
100
|
+
]
|
|
101
|
+
).lower()
|
|
102
|
+
if any(sub in blob for sub in self._hidden_event_substrings):
|
|
103
|
+
return
|
|
104
|
+
prefix = f"[{timestamp}] [{message.seq}] {event_type}"
|
|
105
|
+
if level:
|
|
106
|
+
prefix += f" ({level})"
|
|
107
|
+
# Mask sensitive URLs before displaying
|
|
108
|
+
sanitized_msg = _mask_sensitive_urls(msg)
|
|
109
|
+
|
|
110
|
+
# For error events, show full details including underlying errors
|
|
111
|
+
if level == "error" or event_type.endswith(".failed"):
|
|
112
|
+
click.echo(f"{prefix}: {sanitized_msg}")
|
|
113
|
+
# Show error details from data field if available
|
|
114
|
+
data = message.data.get("data", {})
|
|
115
|
+
if isinstance(data, dict):
|
|
116
|
+
error_detail = data.get("detail") or data.get("error") or data.get("error_detail")
|
|
117
|
+
if error_detail and str(error_detail) != sanitized_msg:
|
|
118
|
+
# Show underlying error if different from main message
|
|
119
|
+
click.echo(f" Error details: {error_detail}")
|
|
120
|
+
# Show traceback or stack if available
|
|
121
|
+
traceback_info = data.get("traceback") or data.get("stack")
|
|
122
|
+
if traceback_info:
|
|
123
|
+
lines = str(traceback_info).split("\n")
|
|
124
|
+
# Show last few lines of traceback (most relevant)
|
|
125
|
+
for line in lines[-5:]:
|
|
126
|
+
if line.strip():
|
|
127
|
+
click.echo(f" {line}")
|
|
128
|
+
else:
|
|
129
|
+
click.echo(f"{prefix}: {sanitized_msg}".rstrip(": "))
|
|
130
|
+
|
|
131
|
+
data = message.data.get("data") if isinstance(message.data.get("data"), dict) else {}
|
|
132
|
+
if event_type == "prompt.learning.mipro.complete" and data:
|
|
133
|
+
best_prompt = data.get("best_prompt")
|
|
134
|
+
if isinstance(best_prompt, dict):
|
|
135
|
+
sections = best_prompt.get("sections")
|
|
136
|
+
if isinstance(sections, list) and sections:
|
|
137
|
+
click.echo(" --- BEST PROMPT ---")
|
|
138
|
+
for section in sections:
|
|
139
|
+
if not isinstance(section, dict):
|
|
140
|
+
continue
|
|
141
|
+
role = section.get("role", "unknown").upper()
|
|
142
|
+
name = section.get("name")
|
|
143
|
+
header = f" [{role}]"
|
|
144
|
+
if name:
|
|
145
|
+
header += f" {name}"
|
|
146
|
+
click.echo(header)
|
|
147
|
+
content = section.get("content", "")
|
|
148
|
+
if isinstance(content, str) and content:
|
|
149
|
+
click.echo(f" {content}")
|
|
150
|
+
click.echo(" -------------------")
|
|
151
|
+
|
|
152
|
+
if event_type == "mipro.topk.evaluated" and data:
|
|
153
|
+
rank = data.get("rank")
|
|
154
|
+
train_score = data.get("train_score")
|
|
155
|
+
test_score = data.get("test_score")
|
|
156
|
+
instruction_text = data.get("instruction_text", "")
|
|
157
|
+
demo_indices = data.get("demo_indices", [])
|
|
158
|
+
lift_abs = data.get("lift_absolute")
|
|
159
|
+
lift_pct = data.get("lift_percent")
|
|
160
|
+
stage_payloads = data.get("stage_payloads", {})
|
|
161
|
+
details: list[str] = []
|
|
162
|
+
if rank is not None:
|
|
163
|
+
details.append(f"Rank {rank}")
|
|
164
|
+
if isinstance(train_score, int | float):
|
|
165
|
+
train_score_float = float(train_score)
|
|
166
|
+
details.append(f"train={train_score_float:.3f} ({train_score_float*100:.1f}%)")
|
|
167
|
+
if isinstance(test_score, int | float):
|
|
168
|
+
test_score_float = float(test_score)
|
|
169
|
+
details.append(f"test={test_score_float:.3f} ({test_score_float*100:.1f}%)")
|
|
170
|
+
if isinstance(lift_abs, int | float) and isinstance(lift_pct, int | float):
|
|
171
|
+
details.append(f"lift={lift_abs:+.3f} ({lift_pct:+.1f}%)")
|
|
172
|
+
if details:
|
|
173
|
+
click.echo(" --- TOP-K CANDIDATE ---")
|
|
174
|
+
click.echo(f" {' | '.join(details)}")
|
|
175
|
+
if isinstance(instruction_text, str) and instruction_text.strip():
|
|
176
|
+
snippet = instruction_text.strip()
|
|
177
|
+
click.echo(f" Instruction: {snippet}")
|
|
178
|
+
if isinstance(demo_indices, list) and demo_indices:
|
|
179
|
+
click.echo(f" Demo indices: {demo_indices}")
|
|
180
|
+
|
|
181
|
+
# Display per-stage information if available
|
|
182
|
+
if isinstance(stage_payloads, dict) and stage_payloads:
|
|
183
|
+
click.echo(" Per-stage breakdown:")
|
|
184
|
+
for stage_id, payload in stage_payloads.items():
|
|
185
|
+
if isinstance(payload, dict):
|
|
186
|
+
module_id = payload.get("module_id", stage_id)
|
|
187
|
+
instr_ids = payload.get("instruction_indices", [])
|
|
188
|
+
demo_ids = payload.get("demo_indices", [])
|
|
189
|
+
click.echo(f" [{module_id}/{stage_id}] instr_ids={instr_ids} demo_ids={demo_ids}")
|
|
190
|
+
|
|
191
|
+
seed_scores = data.get("test_seed_scores")
|
|
192
|
+
if isinstance(seed_scores, list) and seed_scores:
|
|
193
|
+
formatted_scores = ", ".join(
|
|
194
|
+
f"{item.get('seed')}: {item.get('score'):.2f}"
|
|
195
|
+
for item in seed_scores
|
|
196
|
+
if isinstance(item, dict) and isinstance(item.get("seed"), int) and isinstance(item.get("score"), int | float)
|
|
197
|
+
)
|
|
198
|
+
if formatted_scores:
|
|
199
|
+
click.echo(f" Test per-seed: {formatted_scores}")
|
|
200
|
+
click.echo(" ----------------------")
|
|
201
|
+
return
|
|
202
|
+
|
|
203
|
+
if message.stream_type is StreamType.METRICS:
|
|
204
|
+
name = message.data.get("name")
|
|
205
|
+
value = message.data.get("value")
|
|
206
|
+
step = message.data.get("step")
|
|
207
|
+
data = message.data.get("data", {})
|
|
208
|
+
|
|
209
|
+
# Format metric display
|
|
210
|
+
metric_str = f"[{timestamp}] [metric] {name}={value:.4f}" if isinstance(value, int | float) else f"[{timestamp}] [metric] {name}={value}"
|
|
211
|
+
if step is not None:
|
|
212
|
+
metric_str += f" (step={step})"
|
|
213
|
+
|
|
214
|
+
# Add any additional context from data field
|
|
215
|
+
if isinstance(data, dict):
|
|
216
|
+
n = data.get("n")
|
|
217
|
+
if n is not None:
|
|
218
|
+
metric_str += f" n={n}"
|
|
219
|
+
|
|
220
|
+
click.echo(metric_str)
|
|
221
|
+
return
|
|
222
|
+
|
|
223
|
+
if message.stream_type is StreamType.TIMELINE:
|
|
224
|
+
phase = message.data.get("phase", "phase")
|
|
225
|
+
click.echo(f"[{timestamp}] timeline={phase}")
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class JSONHandler(StreamHandler):
|
|
229
|
+
"""Emit messages as JSON lines suitable for machine parsing."""
|
|
230
|
+
|
|
231
|
+
def __init__(self, output_file: str | None = None, *, indent: int | None = None) -> None:
|
|
232
|
+
self.output_file = Path(output_file).expanduser() if output_file else None
|
|
233
|
+
self._indent = indent
|
|
234
|
+
|
|
235
|
+
def handle(self, message: StreamMessage) -> None:
|
|
236
|
+
if not self.should_handle(message):
|
|
237
|
+
return
|
|
238
|
+
|
|
239
|
+
payload: dict[str, Any] = {
|
|
240
|
+
"stream_type": message.stream_type.name,
|
|
241
|
+
"timestamp": message.timestamp,
|
|
242
|
+
"job_id": message.job_id,
|
|
243
|
+
"data": message.data,
|
|
244
|
+
}
|
|
245
|
+
if message.seq is not None:
|
|
246
|
+
payload["seq"] = message.seq
|
|
247
|
+
if message.step is not None:
|
|
248
|
+
payload["step"] = message.step
|
|
249
|
+
if message.phase is not None:
|
|
250
|
+
payload["phase"] = message.phase
|
|
251
|
+
|
|
252
|
+
line = json.dumps(payload, indent=self._indent)
|
|
253
|
+
if self.output_file:
|
|
254
|
+
with self.output_file.open("a", encoding="utf-8") as fh:
|
|
255
|
+
fh.write(line)
|
|
256
|
+
if self._indent is None:
|
|
257
|
+
fh.write("\n")
|
|
258
|
+
else:
|
|
259
|
+
click.echo(line)
|
|
260
|
+
|
|
261
|
+
def flush(self) -> None:
|
|
262
|
+
return None
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class CallbackHandler(StreamHandler):
|
|
266
|
+
"""Invoke user-provided callbacks for specific stream types."""
|
|
267
|
+
|
|
268
|
+
def __init__(
|
|
269
|
+
self,
|
|
270
|
+
*,
|
|
271
|
+
on_status: Callable[[dict[str, Any]], None] | None = None,
|
|
272
|
+
on_event: Callable[[dict[str, Any]], None] | None = None,
|
|
273
|
+
on_metric: Callable[[dict[str, Any]], None] | None = None,
|
|
274
|
+
on_timeline: Callable[[dict[str, Any]], None] | None = None,
|
|
275
|
+
) -> None:
|
|
276
|
+
self._on_status = on_status
|
|
277
|
+
self._on_event = on_event
|
|
278
|
+
self._on_metric = on_metric
|
|
279
|
+
self._on_timeline = on_timeline
|
|
280
|
+
|
|
281
|
+
def handle(self, message: StreamMessage) -> None:
|
|
282
|
+
if not self.should_handle(message):
|
|
283
|
+
return
|
|
284
|
+
|
|
285
|
+
if message.stream_type is StreamType.STATUS and self._on_status:
|
|
286
|
+
self._on_status(message.data)
|
|
287
|
+
elif message.stream_type is StreamType.EVENTS and self._on_event:
|
|
288
|
+
self._on_event(message.data)
|
|
289
|
+
elif message.stream_type is StreamType.METRICS and self._on_metric:
|
|
290
|
+
self._on_metric(message.data)
|
|
291
|
+
elif message.stream_type is StreamType.TIMELINE and self._on_timeline:
|
|
292
|
+
self._on_timeline(message.data)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
class BufferedHandler(StreamHandler):
|
|
296
|
+
"""Collect messages and emit them in batches."""
|
|
297
|
+
|
|
298
|
+
def __init__(self, *, flush_interval: float = 5.0, max_buffer_size: int = 100) -> None:
|
|
299
|
+
self.flush_interval = flush_interval
|
|
300
|
+
self.max_buffer_size = max_buffer_size
|
|
301
|
+
self._buffer: list[StreamMessage] = []
|
|
302
|
+
self._last_flush = time.time()
|
|
303
|
+
|
|
304
|
+
def handle(self, message: StreamMessage) -> None:
|
|
305
|
+
if not self.should_handle(message):
|
|
306
|
+
return
|
|
307
|
+
|
|
308
|
+
self._buffer.append(message)
|
|
309
|
+
now = time.time()
|
|
310
|
+
if len(self._buffer) >= self.max_buffer_size or now - self._last_flush >= self.flush_interval:
|
|
311
|
+
self.flush()
|
|
312
|
+
|
|
313
|
+
def flush(self) -> None:
|
|
314
|
+
if not self._buffer:
|
|
315
|
+
return
|
|
316
|
+
self.process_batch(self._buffer)
|
|
317
|
+
self._buffer.clear()
|
|
318
|
+
self._last_flush = time.time()
|
|
319
|
+
|
|
320
|
+
def process_batch(self, messages: list[StreamMessage]) -> None: # pragma: no cover - abstract
|
|
321
|
+
"""Override to define how buffered messages should be processed."""
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class IntegrationTestHandler(StreamHandler):
|
|
325
|
+
"""Collect messages for integration tests or programmatic assertions."""
|
|
326
|
+
|
|
327
|
+
def __init__(self) -> None:
|
|
328
|
+
self.messages: list[StreamMessage] = []
|
|
329
|
+
|
|
330
|
+
def handle(self, message: StreamMessage) -> None:
|
|
331
|
+
self.messages.append(message)
|
|
332
|
+
|
|
333
|
+
def clear(self) -> None:
|
|
334
|
+
self.messages.clear()
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
class GraphGenHandler(StreamHandler):
|
|
338
|
+
"""Handler for Graph Opt jobs that delegate child job streams to an underlying handler.
|
|
339
|
+
|
|
340
|
+
Graph Opt jobs emit events from child jobs (GEPA, MIPRO, RL, SFT, etc.). This handler
|
|
341
|
+
provides light Graph Opt-aware filtering and routing while keeping child job output
|
|
342
|
+
intact via a delegate handler. The delegate can be supplied directly or created
|
|
343
|
+
via a factory; by default we choose a prompt-learning handler for GEPA/MIPRO and
|
|
344
|
+
a basic CLI handler for other job types.
|
|
345
|
+
"""
|
|
346
|
+
|
|
347
|
+
def __init__(
|
|
348
|
+
self,
|
|
349
|
+
*,
|
|
350
|
+
child_handler: StreamHandler | None = None,
|
|
351
|
+
child_handler_factory: Callable[[str | None], StreamHandler | None] | None = None,
|
|
352
|
+
show_trial_results: bool = True,
|
|
353
|
+
show_transformations: bool = False,
|
|
354
|
+
show_validation: bool = True,
|
|
355
|
+
filter_verbose_events: bool = True,
|
|
356
|
+
wrap_child_events: bool = True,
|
|
357
|
+
) -> None:
|
|
358
|
+
# User-supplied delegate or factory; both are optional.
|
|
359
|
+
self.child_handler = child_handler
|
|
360
|
+
self._child_handler_factory = child_handler_factory
|
|
361
|
+
|
|
362
|
+
# Options for the default prompt-learning delegate
|
|
363
|
+
self._pl_show_trial_results = show_trial_results
|
|
364
|
+
self._pl_show_transformations = show_transformations
|
|
365
|
+
self._pl_show_validation = show_validation
|
|
366
|
+
|
|
367
|
+
self.filter_verbose_events = filter_verbose_events
|
|
368
|
+
# If False, skip Graph Opt-specific filtering/transformations and just pass through.
|
|
369
|
+
self.wrap_child_events = wrap_child_events
|
|
370
|
+
|
|
371
|
+
# Detected child job type (gepa/mipro/rl/sft/etc.)
|
|
372
|
+
self.child_job_type: str | None = None
|
|
373
|
+
# Track whether we created the delegate automatically (so we can swap if needed)
|
|
374
|
+
self._delegate_auto_created = False
|
|
375
|
+
|
|
376
|
+
def handle(self, message: StreamMessage) -> None:
|
|
377
|
+
if not self.should_handle(message):
|
|
378
|
+
return
|
|
379
|
+
|
|
380
|
+
if message.stream_type is StreamType.EVENTS:
|
|
381
|
+
self._detect_child_job_type(message)
|
|
382
|
+
self._maybe_reset_delegate_for_child_type()
|
|
383
|
+
|
|
384
|
+
if self.wrap_child_events and self.filter_verbose_events:
|
|
385
|
+
if self._should_filter_event(message):
|
|
386
|
+
return
|
|
387
|
+
|
|
388
|
+
if self.wrap_child_events:
|
|
389
|
+
message = self._transform_event_message(message)
|
|
390
|
+
|
|
391
|
+
delegate = self._get_child_handler()
|
|
392
|
+
if delegate:
|
|
393
|
+
delegate.handle(message)
|
|
394
|
+
|
|
395
|
+
def _get_child_handler(self) -> StreamHandler:
|
|
396
|
+
"""Return or create the delegate handler used for child job events."""
|
|
397
|
+
if self.child_handler:
|
|
398
|
+
return self.child_handler
|
|
399
|
+
|
|
400
|
+
handler: StreamHandler | None = None
|
|
401
|
+
if self._child_handler_factory:
|
|
402
|
+
handler = self._child_handler_factory(self.child_job_type)
|
|
403
|
+
|
|
404
|
+
if handler is None:
|
|
405
|
+
# Choose a sensible default based on detected child job type
|
|
406
|
+
if self._is_prompt_learning_type(self.child_job_type):
|
|
407
|
+
handler = PromptLearningHandler(
|
|
408
|
+
show_trial_results=self._pl_show_trial_results,
|
|
409
|
+
show_transformations=self._pl_show_transformations,
|
|
410
|
+
show_validation=self._pl_show_validation,
|
|
411
|
+
)
|
|
412
|
+
else:
|
|
413
|
+
handler = CLIHandler()
|
|
414
|
+
|
|
415
|
+
self.child_handler = handler
|
|
416
|
+
self._delegate_auto_created = self._child_handler_factory is None and self.child_handler is not None
|
|
417
|
+
return handler
|
|
418
|
+
|
|
419
|
+
def _detect_child_job_type(self, message: StreamMessage) -> None:
|
|
420
|
+
"""Infer the child job type from event types."""
|
|
421
|
+
if self.child_job_type:
|
|
422
|
+
return
|
|
423
|
+
|
|
424
|
+
event_type = str(message.data.get("type") or "").lower()
|
|
425
|
+
if not event_type:
|
|
426
|
+
return
|
|
427
|
+
|
|
428
|
+
if event_type.startswith("graph_evolve."):
|
|
429
|
+
self.child_job_type = "graph_evolve"
|
|
430
|
+
elif "mipro" in event_type:
|
|
431
|
+
self.child_job_type = "mipro"
|
|
432
|
+
elif "gepa" in event_type or event_type.startswith("prompt.learning"):
|
|
433
|
+
self.child_job_type = "prompt_learning"
|
|
434
|
+
elif event_type.startswith("rl.") or ".rl." in event_type:
|
|
435
|
+
self.child_job_type = "rl"
|
|
436
|
+
elif event_type.startswith("sft.") or ".sft." in event_type:
|
|
437
|
+
self.child_job_type = "sft"
|
|
438
|
+
else:
|
|
439
|
+
# Fall back to the first segment as a hint (e.g., "graphgen.child_type")
|
|
440
|
+
parts = event_type.split(".")
|
|
441
|
+
if parts:
|
|
442
|
+
self.child_job_type = parts[0]
|
|
443
|
+
|
|
444
|
+
def _maybe_reset_delegate_for_child_type(self) -> None:
|
|
445
|
+
"""Swap out auto-created delegates when we later detect a different child type."""
|
|
446
|
+
if not self.child_handler or not self._delegate_auto_created:
|
|
447
|
+
return
|
|
448
|
+
|
|
449
|
+
# If the detected type does not match the current delegate choice, rebuild.
|
|
450
|
+
wants_prompt_learning = self._is_prompt_learning_type(self.child_job_type)
|
|
451
|
+
has_prompt_learning_handler = isinstance(self.child_handler, PromptLearningHandler)
|
|
452
|
+
|
|
453
|
+
if wants_prompt_learning and not has_prompt_learning_handler:
|
|
454
|
+
self.child_handler = None
|
|
455
|
+
self._delegate_auto_created = False
|
|
456
|
+
elif not wants_prompt_learning and has_prompt_learning_handler:
|
|
457
|
+
self.child_handler = None
|
|
458
|
+
self._delegate_auto_created = False
|
|
459
|
+
|
|
460
|
+
def _should_filter_event(self, message: StreamMessage) -> bool:
|
|
461
|
+
"""Determine if an event should be hidden from output."""
|
|
462
|
+
event_type = message.data.get("type", "") or ""
|
|
463
|
+
event_type_lower = event_type.lower()
|
|
464
|
+
|
|
465
|
+
# Never filter graph_evolve events - they're important for GraphGen jobs
|
|
466
|
+
if event_type.startswith("graph_evolve."):
|
|
467
|
+
return False
|
|
468
|
+
|
|
469
|
+
# Only filter prompt-learning style events; leave other job types untouched.
|
|
470
|
+
if not any(key in event_type_lower for key in ("prompt.learning", "gepa", "mipro")):
|
|
471
|
+
return False
|
|
472
|
+
|
|
473
|
+
important_events = {
|
|
474
|
+
"prompt.learning.created",
|
|
475
|
+
"prompt.learning.gepa.start",
|
|
476
|
+
"prompt.learning.gepa.complete",
|
|
477
|
+
"prompt.learning.mipro.job.started",
|
|
478
|
+
"prompt.learning.mipro.optimization.exhausted",
|
|
479
|
+
"prompt.learning.trial.results",
|
|
480
|
+
"prompt.learning.progress",
|
|
481
|
+
"prompt.learning.gepa.new_best",
|
|
482
|
+
"prompt.learning.validation.summary",
|
|
483
|
+
"prompt.learning.candidate.evaluated",
|
|
484
|
+
"prompt.learning.candidate.evaluation.started",
|
|
485
|
+
# GraphGen/graph_evolve important events
|
|
486
|
+
"graph_evolve.job_started",
|
|
487
|
+
"graph_evolve.generation_started",
|
|
488
|
+
"graph_evolve.generation_completed",
|
|
489
|
+
"graph_evolve.candidate_evaluated",
|
|
490
|
+
"graph_evolve.archive_updated",
|
|
491
|
+
"graph_evolve.job_completed",
|
|
492
|
+
"graph_evolve.job_failed",
|
|
493
|
+
}
|
|
494
|
+
if event_type in important_events:
|
|
495
|
+
return False
|
|
496
|
+
|
|
497
|
+
verbose_patterns = [
|
|
498
|
+
"gepa.transformation.proposed",
|
|
499
|
+
"gepa.proposal.scored",
|
|
500
|
+
"prompt.learning.proposal.scored",
|
|
501
|
+
"mipro.tpe.update",
|
|
502
|
+
"prompt.learning.stream.connected",
|
|
503
|
+
]
|
|
504
|
+
return any(pattern in event_type_lower for pattern in verbose_patterns)
|
|
505
|
+
|
|
506
|
+
def _transform_event_message(self, message: StreamMessage) -> StreamMessage:
|
|
507
|
+
"""Transform event messages for Graph Opt context (currently passthrough)."""
|
|
508
|
+
return message
|
|
509
|
+
|
|
510
|
+
def flush(self) -> None:
|
|
511
|
+
# Ensure delegate flushes buffered output if needed.
|
|
512
|
+
if self.child_handler and hasattr(self.child_handler, "flush"):
|
|
513
|
+
with contextlib.suppress(Exception):
|
|
514
|
+
self.child_handler.flush()
|
|
515
|
+
|
|
516
|
+
@staticmethod
|
|
517
|
+
def _is_prompt_learning_type(job_type: str | None) -> bool:
|
|
518
|
+
"""Return True if the child job type should use prompt-learning formatting."""
|
|
519
|
+
return job_type in {"gepa", "mipro", "prompt_learning", "prompt-learning", None}
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
class LossCurveHandler(StreamHandler):
|
|
523
|
+
"""Render a live-updating loss chart inside a fixed Rich panel."""
|
|
524
|
+
|
|
525
|
+
def __init__(
|
|
526
|
+
self,
|
|
527
|
+
*,
|
|
528
|
+
metric_name: str = "train.loss",
|
|
529
|
+
max_points: int = 200,
|
|
530
|
+
width: int = 60,
|
|
531
|
+
console: Any | None = None,
|
|
532
|
+
live: Any | None = None,
|
|
533
|
+
) -> None:
|
|
534
|
+
try:
|
|
535
|
+
from rich.console import Console
|
|
536
|
+
from rich.live import Live
|
|
537
|
+
from rich.panel import Panel
|
|
538
|
+
from rich.text import Text
|
|
539
|
+
except ImportError as exc: # pragma: no cover - optional dependency guard
|
|
540
|
+
raise RuntimeError(
|
|
541
|
+
"LossCurveHandler requires the 'rich' package. Install synth-ai[all] or rich>=13."
|
|
542
|
+
) from exc
|
|
543
|
+
|
|
544
|
+
self.metric_name = metric_name
|
|
545
|
+
self.max_points = max_points
|
|
546
|
+
self.width = width
|
|
547
|
+
|
|
548
|
+
self._console_class = Console
|
|
549
|
+
self._panel_class = Panel
|
|
550
|
+
self._text_class = Text
|
|
551
|
+
|
|
552
|
+
self._console = console or Console()
|
|
553
|
+
self._live = live or Live(console=self._console, transient=False, refresh_per_second=8)
|
|
554
|
+
self._started = False
|
|
555
|
+
|
|
556
|
+
self._steps: list[int] = []
|
|
557
|
+
self._values: list[float] = []
|
|
558
|
+
self._status = "waiting"
|
|
559
|
+
self._last_event: str | None = None
|
|
560
|
+
|
|
561
|
+
def handle(self, message: StreamMessage) -> None:
|
|
562
|
+
updated = False
|
|
563
|
+
|
|
564
|
+
if message.stream_type is StreamType.STATUS:
|
|
565
|
+
status = str(message.data.get("status") or message.data.get("state") or "unknown")
|
|
566
|
+
if status != self._status:
|
|
567
|
+
self._status = status
|
|
568
|
+
updated = True
|
|
569
|
+
|
|
570
|
+
elif message.stream_type is StreamType.EVENTS:
|
|
571
|
+
event_type = message.data.get("type", "")
|
|
572
|
+
msg = message.data.get("message") or ""
|
|
573
|
+
level = message.data.get("level")
|
|
574
|
+
summary = f"{event_type}".strip()
|
|
575
|
+
if level:
|
|
576
|
+
summary += f" ({level})"
|
|
577
|
+
if msg:
|
|
578
|
+
summary += f": {msg}"
|
|
579
|
+
if summary != self._last_event:
|
|
580
|
+
self._last_event = summary
|
|
581
|
+
updated = True
|
|
582
|
+
|
|
583
|
+
elif message.stream_type is StreamType.METRICS:
|
|
584
|
+
if message.data.get("name") != self.metric_name:
|
|
585
|
+
return
|
|
586
|
+
value = message.data.get("value")
|
|
587
|
+
step = message.data.get("step")
|
|
588
|
+
if not isinstance(value, int | float) or not isinstance(step, int):
|
|
589
|
+
return
|
|
590
|
+
self._values.append(float(value))
|
|
591
|
+
self._steps.append(step)
|
|
592
|
+
if len(self._values) > self.max_points:
|
|
593
|
+
self._values = self._values[-self.max_points :]
|
|
594
|
+
self._steps = self._steps[-self.max_points :]
|
|
595
|
+
updated = True
|
|
596
|
+
|
|
597
|
+
elif message.stream_type is StreamType.TIMELINE:
|
|
598
|
+
phase = message.data.get("phase")
|
|
599
|
+
if phase:
|
|
600
|
+
self._status = str(phase)
|
|
601
|
+
updated = True
|
|
602
|
+
|
|
603
|
+
if updated:
|
|
604
|
+
self._refresh()
|
|
605
|
+
|
|
606
|
+
def flush(self) -> None:
|
|
607
|
+
if self._started:
|
|
608
|
+
with contextlib.suppress(Exception):
|
|
609
|
+
self._live.stop()
|
|
610
|
+
self._started = False
|
|
611
|
+
|
|
612
|
+
def _ensure_live(self) -> None:
|
|
613
|
+
if not self._started:
|
|
614
|
+
with contextlib.suppress(Exception):
|
|
615
|
+
self._live.start()
|
|
616
|
+
self._started = True
|
|
617
|
+
|
|
618
|
+
def _refresh(self) -> None:
|
|
619
|
+
self._ensure_live()
|
|
620
|
+
body = self._build_body()
|
|
621
|
+
title = f"{self.metric_name} | status={self._status}"
|
|
622
|
+
self._live.update(self._panel_class(body, title=title, border_style="cyan"))
|
|
623
|
+
|
|
624
|
+
def _build_body(self) -> Any:
|
|
625
|
+
if not self._values:
|
|
626
|
+
return self._text_class("Waiting for metrics…", style="yellow")
|
|
627
|
+
|
|
628
|
+
chart = self._render_sparkline()
|
|
629
|
+
last_value = self._values[-1]
|
|
630
|
+
lines = [
|
|
631
|
+
chart,
|
|
632
|
+
f"latest: {last_value:.4f} (step {self._steps[-1]})",
|
|
633
|
+
]
|
|
634
|
+
if self._last_event:
|
|
635
|
+
lines.append(f"event: {self._last_event}")
|
|
636
|
+
return "\n".join(lines)
|
|
637
|
+
|
|
638
|
+
def _render_sparkline(self) -> str:
|
|
639
|
+
blocks = "▁▂▃▄▅▆▇█"
|
|
640
|
+
tail_len = min(self.width, len(self._values))
|
|
641
|
+
tail = self._values[-tail_len:]
|
|
642
|
+
minimum = min(tail)
|
|
643
|
+
maximum = max(tail)
|
|
644
|
+
if maximum == minimum:
|
|
645
|
+
level = blocks[0]
|
|
646
|
+
return f"{minimum:.2f} {level * tail_len} {maximum:.2f}"
|
|
647
|
+
scale = (len(blocks) - 1) / (maximum - minimum)
|
|
648
|
+
chars = "".join(blocks[int((v - minimum) * scale + 0.5)] for v in tail)
|
|
649
|
+
return f"{minimum:.2f} {chars} {maximum:.2f}"
|
|
650
|
+
|
|
651
|
+
def __del__(self) -> None: # pragma: no cover - defensive cleanup
|
|
652
|
+
with contextlib.suppress(Exception):
|
|
653
|
+
self.flush()
|
|
654
|
+
|
|
655
|
+
class RichHandler(StreamHandler):
|
|
656
|
+
"""Rich powered handler with live progress and metrics table."""
|
|
657
|
+
|
|
658
|
+
def __init__(
|
|
659
|
+
self,
|
|
660
|
+
*,
|
|
661
|
+
event_log_size: int = 20,
|
|
662
|
+
console: Any | None = None,
|
|
663
|
+
) -> None:
|
|
664
|
+
try:
|
|
665
|
+
from rich.console import Console
|
|
666
|
+
from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn
|
|
667
|
+
from rich.table import Table
|
|
668
|
+
except ImportError as exc: # pragma: no cover - requires optional dependency
|
|
669
|
+
raise RuntimeError(
|
|
670
|
+
"RichHandler requires the 'rich' package. Install synth-ai[all] or rich>=13."
|
|
671
|
+
) from exc
|
|
672
|
+
|
|
673
|
+
self._console_class = Console
|
|
674
|
+
self._progress_class = Progress
|
|
675
|
+
self._spinner_column = SpinnerColumn
|
|
676
|
+
self._text_column = TextColumn
|
|
677
|
+
self._bar_column = BarColumn
|
|
678
|
+
self._table_class = Table
|
|
679
|
+
|
|
680
|
+
self._console = console or Console()
|
|
681
|
+
self._progress = Progress(
|
|
682
|
+
SpinnerColumn(),
|
|
683
|
+
TextColumn("[progress.description]{task.description}"),
|
|
684
|
+
BarColumn(),
|
|
685
|
+
TextColumn("{task.completed}/{task.total}" if console else ""),
|
|
686
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
687
|
+
transient=False,
|
|
688
|
+
console=self._console,
|
|
689
|
+
)
|
|
690
|
+
self._task_id: int | None = None
|
|
691
|
+
self._current_status = "unknown"
|
|
692
|
+
self._latest_metrics: dict[str, Any] = {}
|
|
693
|
+
self._event_log: deque[str] = deque(maxlen=event_log_size)
|
|
694
|
+
self._progress_started = False
|
|
695
|
+
|
|
696
|
+
def handle(self, message: StreamMessage) -> None:
|
|
697
|
+
if not self.should_handle(message):
|
|
698
|
+
return
|
|
699
|
+
|
|
700
|
+
if message.stream_type is StreamType.STATUS:
|
|
701
|
+
self._current_status = str(message.data.get("status") or message.data.get("state"))
|
|
702
|
+
self._ensure_progress_started()
|
|
703
|
+
if self._task_id is not None:
|
|
704
|
+
description = f"Status: {self._current_status}"
|
|
705
|
+
self._progress.update(self._task_id, description=description) # type: ignore[arg-type]
|
|
706
|
+
self._render_summary()
|
|
707
|
+
return
|
|
708
|
+
|
|
709
|
+
if message.stream_type is StreamType.EVENTS:
|
|
710
|
+
event_type = message.data.get("type", "event")
|
|
711
|
+
summary = message.data.get("message") or ""
|
|
712
|
+
level = message.data.get("level")
|
|
713
|
+
# Mask sensitive URLs before displaying
|
|
714
|
+
sanitized_summary = _mask_sensitive_urls(summary)
|
|
715
|
+
formatted = f"[{event_type}] {sanitized_summary}".strip()
|
|
716
|
+
if level:
|
|
717
|
+
formatted = f"{formatted} ({level})"
|
|
718
|
+
self._event_log.append(formatted)
|
|
719
|
+
data = message.data.get("data") or {}
|
|
720
|
+
step = data.get("step") or data.get("current_step")
|
|
721
|
+
total_steps = data.get("total_steps") or data.get("max_steps")
|
|
722
|
+
if step and total_steps:
|
|
723
|
+
self._ensure_progress_started(total_steps)
|
|
724
|
+
if self._task_id is not None:
|
|
725
|
+
self._progress.update(self._task_id, completed=int(step), total=int(total_steps)) # type: ignore[arg-type]
|
|
726
|
+
self._render_summary()
|
|
727
|
+
return
|
|
728
|
+
|
|
729
|
+
if message.stream_type is StreamType.METRICS:
|
|
730
|
+
name = message.data.get("name", "")
|
|
731
|
+
value = message.data.get("value")
|
|
732
|
+
if name:
|
|
733
|
+
self._latest_metrics[name] = value
|
|
734
|
+
self._render_summary()
|
|
735
|
+
return
|
|
736
|
+
|
|
737
|
+
if message.stream_type is StreamType.TIMELINE:
|
|
738
|
+
phase = message.data.get("phase", "")
|
|
739
|
+
if phase and phase.lower() not in {"training", "running"}:
|
|
740
|
+
self._event_log.append(f"[timeline] {phase}")
|
|
741
|
+
self._render_summary()
|
|
742
|
+
|
|
743
|
+
def flush(self) -> None:
|
|
744
|
+
if self._progress_started:
|
|
745
|
+
self._progress.stop()
|
|
746
|
+
self._progress_started = False
|
|
747
|
+
self._render_summary(force=True)
|
|
748
|
+
|
|
749
|
+
def _ensure_progress_started(self, total: int | float | None = None) -> None:
|
|
750
|
+
if not self._progress_started:
|
|
751
|
+
self._progress.start()
|
|
752
|
+
self._progress_started = True
|
|
753
|
+
if self._task_id is None:
|
|
754
|
+
self._task_id = self._progress.add_task(
|
|
755
|
+
f"Status: {self._current_status}", total=total or 100
|
|
756
|
+
)
|
|
757
|
+
elif total is not None and self._task_id is not None:
|
|
758
|
+
self._progress.update(self._task_id, total=total) # type: ignore[arg-type]
|
|
759
|
+
|
|
760
|
+
def _render_summary(self, force: bool = False) -> None:
|
|
761
|
+
if force and self._progress_started:
|
|
762
|
+
self._progress.refresh()
|
|
763
|
+
|
|
764
|
+
table = self._table_class(title="Latest Metrics")
|
|
765
|
+
table.add_column("Metric")
|
|
766
|
+
table.add_column("Value")
|
|
767
|
+
|
|
768
|
+
if not self._latest_metrics:
|
|
769
|
+
table.add_row("—", "—")
|
|
770
|
+
else:
|
|
771
|
+
for name, value in sorted(self._latest_metrics.items()):
|
|
772
|
+
table.add_row(str(name), str(value))
|
|
773
|
+
|
|
774
|
+
if self._progress_started:
|
|
775
|
+
self._progress.console.print(table)
|
|
776
|
+
else:
|
|
777
|
+
self._console.print(table)
|
|
778
|
+
|
|
779
|
+
if self._event_log:
|
|
780
|
+
self._console.print("\nRecent events:")
|
|
781
|
+
for entry in list(self._event_log):
|
|
782
|
+
self._console.print(f" • {entry}")
|
|
783
|
+
|
|
784
|
+
class ContextLearningHandler(StreamHandler):
|
|
785
|
+
"""CLI-friendly handler for Context Learning jobs.
|
|
786
|
+
|
|
787
|
+
Emits high-signal progress similar to other infra job handlers,
|
|
788
|
+
specialized for generation-based bash context optimization.
|
|
789
|
+
"""
|
|
790
|
+
|
|
791
|
+
def __init__(self) -> None:
|
|
792
|
+
self.best_score_so_far = 0.0
|
|
793
|
+
self.current_generation = 0
|
|
794
|
+
|
|
795
|
+
def handle(self, message: StreamMessage) -> None:
|
|
796
|
+
if not self.should_handle(message):
|
|
797
|
+
return
|
|
798
|
+
|
|
799
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
800
|
+
|
|
801
|
+
if message.stream_type is StreamType.STATUS:
|
|
802
|
+
status = str(message.data.get("status") or message.data.get("state") or "unknown")
|
|
803
|
+
click.echo(f"[{timestamp}] status={status}")
|
|
804
|
+
return
|
|
805
|
+
|
|
806
|
+
if message.stream_type is StreamType.METRICS:
|
|
807
|
+
name = message.data.get("name")
|
|
808
|
+
value = message.data.get("value")
|
|
809
|
+
step = message.data.get("step")
|
|
810
|
+
if isinstance(value, int | float):
|
|
811
|
+
try:
|
|
812
|
+
val_f = float(value)
|
|
813
|
+
if val_f > self.best_score_so_far:
|
|
814
|
+
self.best_score_so_far = val_f
|
|
815
|
+
if isinstance(step, int):
|
|
816
|
+
self.current_generation = max(self.current_generation, step)
|
|
817
|
+
click.echo(f"[{timestamp}] gen={step} best={val_f:.3f}")
|
|
818
|
+
return
|
|
819
|
+
except Exception:
|
|
820
|
+
pass
|
|
821
|
+
click.echo(f"[{timestamp}] metric {name}={value}")
|
|
822
|
+
return
|
|
823
|
+
|
|
824
|
+
if message.stream_type is StreamType.EVENTS:
|
|
825
|
+
event_type = str(message.data.get("type") or "")
|
|
826
|
+
msg = message.data.get("message") or ""
|
|
827
|
+
data = message.data.get("data") or {}
|
|
828
|
+
|
|
829
|
+
if event_type == "context.learning.generation.completed":
|
|
830
|
+
gen = data.get("generation") or data.get("gen") or self.current_generation
|
|
831
|
+
score = data.get("best_score") or data.get("score") or self.best_score_so_far
|
|
832
|
+
try:
|
|
833
|
+
score_f = float(score)
|
|
834
|
+
if score_f > self.best_score_so_far:
|
|
835
|
+
self.best_score_so_far = score_f
|
|
836
|
+
click.echo(f"[{timestamp}] generation {gen} best={score_f:.3f}")
|
|
837
|
+
except Exception:
|
|
838
|
+
click.echo(f"[{timestamp}] generation {gen} completed")
|
|
839
|
+
return
|
|
840
|
+
|
|
841
|
+
if event_type.endswith(".failed"):
|
|
842
|
+
click.echo(f"[{timestamp}] {event_type}: {msg}")
|
|
843
|
+
return
|
|
844
|
+
|
|
845
|
+
if msg:
|
|
846
|
+
click.echo(f"[{timestamp}] {event_type}: {msg}")
|
|
847
|
+
else:
|
|
848
|
+
click.echo(f"[{timestamp}] {event_type}")
|
|
849
|
+
|
|
850
|
+
|
|
851
|
+
class PromptLearningHandler(StreamHandler):
|
|
852
|
+
"""Enhanced handler for GEPA/MIPRO prompt optimization jobs with rich formatting and metrics tracking.
|
|
853
|
+
|
|
854
|
+
This handler processes streaming events from both GEPA (Genetic Evolutionary Prompt
|
|
855
|
+
Algorithm) and MIPRO (Meta-Instruction PROposer) optimization jobs. It provides:
|
|
856
|
+
|
|
857
|
+
- **Real-time progress tracking**: Shows trial results, rollouts, iterations, and budget usage
|
|
858
|
+
- **Optimization curve tracking**: Maintains a history of best scores over time
|
|
859
|
+
- **GEPA-specific features**: Tracks transformations, rollouts, and validation results
|
|
860
|
+
- **MIPRO-specific features**: Tracks iterations, trials, minibatch/full evaluations, and budget
|
|
861
|
+
- **Dual output**: Writes to both console (via click.echo) and optional log file
|
|
862
|
+
|
|
863
|
+
The handler filters verbose events (like TPE updates, proposed instructions) to keep
|
|
864
|
+
output readable while preserving important progress information. It formats output
|
|
865
|
+
consistently between GEPA and MIPRO for easier comparison.
|
|
866
|
+
|
|
867
|
+
Example:
|
|
868
|
+
>>> handler = PromptLearningHandler(
|
|
869
|
+
... show_trial_results=True,
|
|
870
|
+
... max_tokens=1_000_000,
|
|
871
|
+
... log_file=Path("optimization.log")
|
|
872
|
+
... )
|
|
873
|
+
>>> # Handler is used by JobStreamer to process events
|
|
874
|
+
"""
|
|
875
|
+
|
|
876
|
+
def __init__(
|
|
877
|
+
self,
|
|
878
|
+
*,
|
|
879
|
+
show_trial_results: bool = True,
|
|
880
|
+
show_transformations: bool = False,
|
|
881
|
+
show_validation: bool = True,
|
|
882
|
+
max_tokens: int | None = None,
|
|
883
|
+
max_time_seconds: float | None = None,
|
|
884
|
+
max_rollouts: int | None = None,
|
|
885
|
+
log_file: Path | None = None,
|
|
886
|
+
):
|
|
887
|
+
"""Initialize the prompt learning handler.
|
|
888
|
+
|
|
889
|
+
Args:
|
|
890
|
+
show_trial_results: Whether to display individual trial scores (default: True).
|
|
891
|
+
When True, shows each trial's score and best score so far.
|
|
892
|
+
show_transformations: Whether to display transformation/proposal details
|
|
893
|
+
(default: False). When True, shows verbose transformation events.
|
|
894
|
+
show_validation: Whether to display validation summaries (default: True).
|
|
895
|
+
Shows validation results comparing candidates against baseline.
|
|
896
|
+
max_tokens: Maximum token budget for MIPRO (from TOML termination_config).
|
|
897
|
+
Used to track progress and enforce limits.
|
|
898
|
+
max_time_seconds: Maximum time budget in seconds (from TOML termination_config).
|
|
899
|
+
Used to track elapsed time and ETA.
|
|
900
|
+
max_rollouts: Maximum rollouts budget (from TOML termination_config).
|
|
901
|
+
Used to track rollout progress for both GEPA and MIPRO.
|
|
902
|
+
log_file: Optional path to log file for persistent logging. If provided,
|
|
903
|
+
all output is written to both console and file. File is opened in
|
|
904
|
+
append mode and remains open for streaming.
|
|
905
|
+
"""
|
|
906
|
+
self.show_trial_results = show_trial_results
|
|
907
|
+
self.show_transformations = show_transformations
|
|
908
|
+
self.show_validation = show_validation
|
|
909
|
+
self.optimization_curve: list[tuple[int, float]] = []
|
|
910
|
+
self.trial_counter = 0
|
|
911
|
+
self.best_score_so_far = 0.0
|
|
912
|
+
|
|
913
|
+
# MIPRO progress tracking
|
|
914
|
+
self.mipro_start_time: float | None = None
|
|
915
|
+
self.mipro_total_trials: int | None = None
|
|
916
|
+
self.mipro_completed_trials: int = 0
|
|
917
|
+
self.mipro_total_tokens: int = 0
|
|
918
|
+
self.mipro_policy_tokens: int = 0 # Rollout tokens (policy only)
|
|
919
|
+
self.mipro_max_tokens: int | None = max_tokens # From TOML termination_config
|
|
920
|
+
self.mipro_total_cost: float = 0.0
|
|
921
|
+
self.mipro_max_cost: float | None = None
|
|
922
|
+
self.mipro_current_iteration: int = 0
|
|
923
|
+
self.mipro_num_iterations: int | None = None
|
|
924
|
+
self.mipro_trials_per_iteration: int | None = None
|
|
925
|
+
self.mipro_best_score: float = 0.0 # Track best full eval score
|
|
926
|
+
self.mipro_baseline_score: float | None = None # Track baseline for comparison
|
|
927
|
+
self.mipro_batch_size: int | None = None # Track minibatch size (N for minibatch scores)
|
|
928
|
+
self.mipro_rollouts_completed: int = 0 # Total rollouts completed
|
|
929
|
+
self.mipro_max_rollouts: int | None = max_rollouts # From TOML termination_config
|
|
930
|
+
self.mipro_max_time_seconds: float | None = max_time_seconds # From TOML termination_config
|
|
931
|
+
self._last_progress_emit_time: float | None = None # Throttle progress updates
|
|
932
|
+
self._progress_emit_interval: float = 5.0 # Emit progress at most every 5 seconds
|
|
933
|
+
|
|
934
|
+
# Log file for real-time streaming
|
|
935
|
+
self.log_file: Path | None = log_file
|
|
936
|
+
self._log_file_handle = None
|
|
937
|
+
if self.log_file:
|
|
938
|
+
try:
|
|
939
|
+
# Create parent directory if needed
|
|
940
|
+
self.log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
941
|
+
# Open file in append mode for live streaming
|
|
942
|
+
# Note: File must remain open for streaming, so we can't use context manager
|
|
943
|
+
from datetime import datetime
|
|
944
|
+
self._log_file_handle = open(self.log_file, "a", encoding="utf-8") # noqa: SIM115
|
|
945
|
+
# Write header
|
|
946
|
+
self._log_file_handle.write("=" * 80 + "\n")
|
|
947
|
+
self._log_file_handle.write("PROMPT LEARNING VERBOSE LOG\n")
|
|
948
|
+
self._log_file_handle.write(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
949
|
+
self._log_file_handle.write("=" * 80 + "\n\n")
|
|
950
|
+
self._log_file_handle.flush()
|
|
951
|
+
except Exception as e:
|
|
952
|
+
# If we can't open the log file, continue without it
|
|
953
|
+
click.echo(f"⚠️ Could not open log file {log_file}: {e}", err=True)
|
|
954
|
+
self.log_file = None
|
|
955
|
+
self._log_file_handle = None
|
|
956
|
+
|
|
957
|
+
def _write_log(self, text: str) -> None:
|
|
958
|
+
"""Write text to both console and log file."""
|
|
959
|
+
click.echo(text)
|
|
960
|
+
if self._log_file_handle:
|
|
961
|
+
try:
|
|
962
|
+
self._log_file_handle.write(text + "\n")
|
|
963
|
+
self._log_file_handle.flush()
|
|
964
|
+
except Exception:
|
|
965
|
+
# If write fails, close handle and continue without logging
|
|
966
|
+
from contextlib import suppress
|
|
967
|
+
with suppress(Exception):
|
|
968
|
+
self._log_file_handle.close()
|
|
969
|
+
self._log_file_handle = None
|
|
970
|
+
|
|
971
|
+
def handle(self, message: StreamMessage) -> None:
|
|
972
|
+
"""Handle a stream message from the prompt learning job.
|
|
973
|
+
|
|
974
|
+
Routes messages to appropriate handlers based on stream type:
|
|
975
|
+
- STATUS: Job status updates (queued, running, completed, etc.)
|
|
976
|
+
- EVENTS: Algorithm-specific events (trials, iterations, transformations)
|
|
977
|
+
- METRICS: Performance metrics (scores, accuracies, costs)
|
|
978
|
+
- TIMELINE: Phase transitions
|
|
979
|
+
|
|
980
|
+
Filters verbose events (TPE updates, proposed instructions) to keep output
|
|
981
|
+
readable. MIPRO and GEPA events are handled by specialized methods.
|
|
982
|
+
|
|
983
|
+
Args:
|
|
984
|
+
message: StreamMessage containing event data from the backend
|
|
985
|
+
"""
|
|
986
|
+
if not self.should_handle(message):
|
|
987
|
+
return
|
|
988
|
+
|
|
989
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
990
|
+
|
|
991
|
+
if message.stream_type is StreamType.STATUS:
|
|
992
|
+
status = str(message.data.get("status") or message.data.get("state") or "unknown")
|
|
993
|
+
self._write_log(f"[{timestamp}] status={status}")
|
|
994
|
+
return
|
|
995
|
+
|
|
996
|
+
if message.stream_type is StreamType.EVENTS:
|
|
997
|
+
event_type = message.data.get("type", "event")
|
|
998
|
+
level = message.data.get("level")
|
|
999
|
+
msg = message.data.get("message") or ""
|
|
1000
|
+
|
|
1001
|
+
# Handle MIPRO-specific events for progress tracking (before skipping hidden events)
|
|
1002
|
+
if event_type == "mipro.job.started":
|
|
1003
|
+
self._handle_mipro_job_started(message.data)
|
|
1004
|
+
# Continue to default display
|
|
1005
|
+
|
|
1006
|
+
if event_type == "mipro.budget.update":
|
|
1007
|
+
self._handle_mipro_budget_update(message.data)
|
|
1008
|
+
# Continue to default display
|
|
1009
|
+
|
|
1010
|
+
if event_type == "mipro.trial.complete":
|
|
1011
|
+
self._handle_mipro_trial_complete(message.data)
|
|
1012
|
+
# Continue to default display
|
|
1013
|
+
|
|
1014
|
+
# Show more MIPRO events - only hide the most verbose ones
|
|
1015
|
+
_hidden_mipro_events = {
|
|
1016
|
+
# Keep only the most verbose TPE updates hidden
|
|
1017
|
+
"mipro.tpe.update", # Very frequent, low value
|
|
1018
|
+
}
|
|
1019
|
+
if event_type in _hidden_mipro_events:
|
|
1020
|
+
return
|
|
1021
|
+
|
|
1022
|
+
# Show GEPA transformation proposals - they're useful for debugging
|
|
1023
|
+
# if event_type == "gepa.transformation.proposed":
|
|
1024
|
+
# return
|
|
1025
|
+
|
|
1026
|
+
# Handle trial results for optimization curve tracking
|
|
1027
|
+
if event_type == "prompt.learning.trial.results":
|
|
1028
|
+
self._handle_trial_results(message.data)
|
|
1029
|
+
# Continue to default display
|
|
1030
|
+
|
|
1031
|
+
# Handle validation summary
|
|
1032
|
+
if event_type == "prompt.learning.validation.summary":
|
|
1033
|
+
if self.show_validation:
|
|
1034
|
+
self._handle_validation_summary(message.data)
|
|
1035
|
+
# Continue to default display
|
|
1036
|
+
|
|
1037
|
+
# Handle progress events
|
|
1038
|
+
if event_type == "prompt.learning.progress":
|
|
1039
|
+
self._handle_progress(message.data)
|
|
1040
|
+
# Continue to default display
|
|
1041
|
+
|
|
1042
|
+
# Handle MIPRO-specific events for progress tracking
|
|
1043
|
+
if event_type == "mipro.iteration.start":
|
|
1044
|
+
self._handle_mipro_iteration_start(message.data)
|
|
1045
|
+
# Continue to default display
|
|
1046
|
+
|
|
1047
|
+
if event_type == "mipro.iteration.complete":
|
|
1048
|
+
self._handle_mipro_iteration_complete(message.data)
|
|
1049
|
+
# Continue to default display
|
|
1050
|
+
|
|
1051
|
+
if event_type == "mipro.fulleval.complete":
|
|
1052
|
+
self._handle_mipro_fulleval_complete(message.data)
|
|
1053
|
+
# Continue to default display
|
|
1054
|
+
|
|
1055
|
+
if event_type == "mipro.optimization.exhausted":
|
|
1056
|
+
# Graceful conclusion - show final progress
|
|
1057
|
+
self._emit_mipro_progress()
|
|
1058
|
+
# Continue to default display
|
|
1059
|
+
|
|
1060
|
+
if event_type == "mipro.new_incumbent":
|
|
1061
|
+
self._handle_mipro_new_incumbent(message.data)
|
|
1062
|
+
# Continue to default display
|
|
1063
|
+
|
|
1064
|
+
# Handle rollouts start event
|
|
1065
|
+
if event_type == "prompt.learning.rollouts.start":
|
|
1066
|
+
self._handle_rollouts_start(message.data)
|
|
1067
|
+
# Continue to default display
|
|
1068
|
+
|
|
1069
|
+
# Handle GEPA new best event
|
|
1070
|
+
if event_type == "prompt.learning.gepa.new_best":
|
|
1071
|
+
self._handle_gepa_new_best(message.data)
|
|
1072
|
+
# Continue to default display
|
|
1073
|
+
|
|
1074
|
+
# Handle phase changed event
|
|
1075
|
+
if event_type == "prompt.learning.phase.changed":
|
|
1076
|
+
self._handle_phase_changed(message.data)
|
|
1077
|
+
# Continue to default display
|
|
1078
|
+
|
|
1079
|
+
# Handle stream connected event (connection lifecycle)
|
|
1080
|
+
if event_type == "prompt.learning.stream.connected":
|
|
1081
|
+
self._handle_stream_connected(message.data)
|
|
1082
|
+
# Continue to default display
|
|
1083
|
+
|
|
1084
|
+
# Handle proposal scored events (transformations) - show by default
|
|
1085
|
+
if event_type == "prompt.learning.proposal.scored":
|
|
1086
|
+
self._handle_proposal_scored(message.data)
|
|
1087
|
+
# Continue to default display
|
|
1088
|
+
|
|
1089
|
+
# Show verbose transformation events by default - they're useful
|
|
1090
|
+
# Only skip if explicitly disabled via show_transformations=False
|
|
1091
|
+
# verbose_event_types = [
|
|
1092
|
+
# "prompt.learning.proposal.scored",
|
|
1093
|
+
# "prompt.learning.eval.summary",
|
|
1094
|
+
# "prompt.learning.validation.scored",
|
|
1095
|
+
# "prompt.learning.final.results",
|
|
1096
|
+
# ]
|
|
1097
|
+
# if event_type in verbose_event_types and not self.show_transformations:
|
|
1098
|
+
# return
|
|
1099
|
+
|
|
1100
|
+
# Default event display - show more details
|
|
1101
|
+
prefix = f"[{timestamp}] {event_type}"
|
|
1102
|
+
if level:
|
|
1103
|
+
prefix += f" ({level})"
|
|
1104
|
+
sanitized_msg = _mask_sensitive_urls(msg)
|
|
1105
|
+
|
|
1106
|
+
# Include key data fields if message is empty or short
|
|
1107
|
+
if not sanitized_msg or len(sanitized_msg) < 50:
|
|
1108
|
+
data = message.data.get("data", {})
|
|
1109
|
+
if isinstance(data, dict):
|
|
1110
|
+
# Show useful fields
|
|
1111
|
+
useful_fields = []
|
|
1112
|
+
for key in ["score", "accuracy", "mean", "step", "iteration", "trial", "completed", "total", "version_id"]:
|
|
1113
|
+
if key in data:
|
|
1114
|
+
value = data[key]
|
|
1115
|
+
if isinstance(value, (int, float)):
|
|
1116
|
+
useful_fields.append(f"{key}={value:.4f}" if isinstance(value, float) else f"{key}={value}")
|
|
1117
|
+
else:
|
|
1118
|
+
useful_fields.append(f"{key}={value}")
|
|
1119
|
+
if useful_fields:
|
|
1120
|
+
sanitized_msg = sanitized_msg + (" " if sanitized_msg else "") + " ".join(useful_fields[:5]) # Limit to 5 fields
|
|
1121
|
+
|
|
1122
|
+
self._write_log(f"{prefix}: {sanitized_msg}".rstrip(": "))
|
|
1123
|
+
return
|
|
1124
|
+
|
|
1125
|
+
if message.stream_type is StreamType.METRICS:
|
|
1126
|
+
name = message.data.get("name")
|
|
1127
|
+
value = message.data.get("value")
|
|
1128
|
+
step = message.data.get("step")
|
|
1129
|
+
data = message.data.get("data", {})
|
|
1130
|
+
|
|
1131
|
+
metric_str = f"[{timestamp}] [metric] {name}={value:.4f}" if isinstance(value, int | float) else f"[{timestamp}] [metric] {name}={value}"
|
|
1132
|
+
if step is not None:
|
|
1133
|
+
metric_str += f" (step={step})"
|
|
1134
|
+
|
|
1135
|
+
if isinstance(data, dict):
|
|
1136
|
+
n = data.get("n")
|
|
1137
|
+
if n is not None:
|
|
1138
|
+
metric_str += f" n={n}"
|
|
1139
|
+
|
|
1140
|
+
self._write_log(metric_str)
|
|
1141
|
+
return
|
|
1142
|
+
|
|
1143
|
+
if message.stream_type is StreamType.TIMELINE:
|
|
1144
|
+
phase = message.data.get("phase", "phase")
|
|
1145
|
+
self._write_log(f"[{timestamp}] timeline={phase}")
|
|
1146
|
+
|
|
1147
|
+
def _handle_trial_results(self, event_data: dict[str, Any]) -> None:
|
|
1148
|
+
"""Handle GEPA trial results events and track optimization curve.
|
|
1149
|
+
|
|
1150
|
+
Processes trial completion events from GEPA optimization, tracking:
|
|
1151
|
+
- Mean score for the trial
|
|
1152
|
+
- Best score achieved so far
|
|
1153
|
+
- Number of rollouts completed (N)
|
|
1154
|
+
- Optimization curve data points
|
|
1155
|
+
|
|
1156
|
+
Updates the optimization curve with (trial_number, best_score) tuples
|
|
1157
|
+
for visualization. Displays trial results if show_trial_results is True.
|
|
1158
|
+
|
|
1159
|
+
Args:
|
|
1160
|
+
event_data: Event data dictionary containing:
|
|
1161
|
+
- data.mean: Mean score for this trial
|
|
1162
|
+
- data.completed: Number of rollouts completed
|
|
1163
|
+
- data.total: Total rollouts planned
|
|
1164
|
+
"""
|
|
1165
|
+
data = event_data.get("data", {})
|
|
1166
|
+
if not isinstance(data, dict):
|
|
1167
|
+
return
|
|
1168
|
+
|
|
1169
|
+
mean_score = data.get("mean")
|
|
1170
|
+
if mean_score is not None:
|
|
1171
|
+
self.trial_counter += 1
|
|
1172
|
+
self.best_score_so_far = max(self.best_score_so_far, float(mean_score))
|
|
1173
|
+
self.optimization_curve.append((self.trial_counter, self.best_score_so_far))
|
|
1174
|
+
|
|
1175
|
+
if self.show_trial_results:
|
|
1176
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1177
|
+
|
|
1178
|
+
# Extract N (number of rollouts)
|
|
1179
|
+
completed = data.get("completed")
|
|
1180
|
+
total = data.get("total")
|
|
1181
|
+
|
|
1182
|
+
n_str = f" N={completed}/{total}" if completed is not None and total is not None else (f" N={completed}" if completed is not None else "")
|
|
1183
|
+
|
|
1184
|
+
self._write_log(f"[{timestamp}] [Trial {self.trial_counter}] Score: {mean_score:.4f} (Best: {self.best_score_so_far:.4f}){n_str}")
|
|
1185
|
+
|
|
1186
|
+
def _handle_validation_summary(self, event_data: dict[str, Any]) -> None:
|
|
1187
|
+
"""Handle validation summary events showing candidate performance.
|
|
1188
|
+
|
|
1189
|
+
Displays validation results comparing optimized prompts against a baseline.
|
|
1190
|
+
Shows baseline score, number of candidates evaluated (N), and top candidate
|
|
1191
|
+
scores. Only displayed if show_validation is True.
|
|
1192
|
+
|
|
1193
|
+
Args:
|
|
1194
|
+
event_data: Event data dictionary containing:
|
|
1195
|
+
- data.baseline: Baseline score (dict with accuracy/score or number)
|
|
1196
|
+
- data.results: List of candidate results with accuracy/score fields
|
|
1197
|
+
"""
|
|
1198
|
+
data = event_data.get("data", {})
|
|
1199
|
+
if not isinstance(data, dict):
|
|
1200
|
+
return
|
|
1201
|
+
|
|
1202
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1203
|
+
|
|
1204
|
+
# Extract baseline
|
|
1205
|
+
baseline = data.get("baseline")
|
|
1206
|
+
baseline_score = None
|
|
1207
|
+
if isinstance(baseline, dict):
|
|
1208
|
+
baseline_score = baseline.get("accuracy") or baseline.get("score")
|
|
1209
|
+
elif isinstance(baseline, int | float):
|
|
1210
|
+
baseline_score = baseline
|
|
1211
|
+
|
|
1212
|
+
# Extract results
|
|
1213
|
+
results = data.get("results", [])
|
|
1214
|
+
if not isinstance(results, list):
|
|
1215
|
+
results = []
|
|
1216
|
+
|
|
1217
|
+
# Display validation summary
|
|
1218
|
+
self._write_log(f"[{timestamp}] Validation Summary:")
|
|
1219
|
+
|
|
1220
|
+
# Show baseline if available
|
|
1221
|
+
if baseline_score is not None:
|
|
1222
|
+
self._write_log(f" Baseline: {baseline_score:.4f}")
|
|
1223
|
+
|
|
1224
|
+
# Show N (number of candidates)
|
|
1225
|
+
n_candidates = len(results)
|
|
1226
|
+
if n_candidates > 0:
|
|
1227
|
+
self._write_log(f" N={n_candidates}")
|
|
1228
|
+
|
|
1229
|
+
# Display validation results
|
|
1230
|
+
if results:
|
|
1231
|
+
for i, result in enumerate(results[:10]): # Show top 10
|
|
1232
|
+
if isinstance(result, dict):
|
|
1233
|
+
accuracy = result.get("accuracy") or result.get("score")
|
|
1234
|
+
if accuracy is not None:
|
|
1235
|
+
self._write_log(f" Candidate {i+1}: {accuracy:.4f}")
|
|
1236
|
+
|
|
1237
|
+
def _handle_progress(self, event_data: dict[str, Any]) -> None:
|
|
1238
|
+
"""Handle GEPA progress events with detailed rollout and transformation tracking.
|
|
1239
|
+
|
|
1240
|
+
Displays comprehensive progress information including:
|
|
1241
|
+
- Overall completion percentage
|
|
1242
|
+
- Rollout progress (completed/total with percentage)
|
|
1243
|
+
- Transformation progress (tried/planned with percentage)
|
|
1244
|
+
- Token usage (used/budget in millions)
|
|
1245
|
+
- Elapsed time and ETA
|
|
1246
|
+
|
|
1247
|
+
Formats progress in a human-readable format similar to CLI progress bars.
|
|
1248
|
+
|
|
1249
|
+
Args:
|
|
1250
|
+
event_data: Event data dictionary containing:
|
|
1251
|
+
- data.rollouts_completed: Number of rollouts completed
|
|
1252
|
+
- data.rollouts_total: Total rollouts planned
|
|
1253
|
+
- data.transformations_tried: Number of transformations tried
|
|
1254
|
+
- data.transformations_planned: Total transformations planned
|
|
1255
|
+
- data.rollout_tokens_used: Tokens consumed
|
|
1256
|
+
- data.rollout_tokens_budget: Token budget
|
|
1257
|
+
- data.elapsed_seconds: Time elapsed
|
|
1258
|
+
- data.eta_seconds: Estimated time remaining
|
|
1259
|
+
- data.percent_overall: Overall completion percentage
|
|
1260
|
+
"""
|
|
1261
|
+
data = event_data.get("data", {})
|
|
1262
|
+
if not isinstance(data, dict):
|
|
1263
|
+
return
|
|
1264
|
+
|
|
1265
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1266
|
+
|
|
1267
|
+
# Extract rollout progress
|
|
1268
|
+
rollouts_completed = data.get("rollouts_completed")
|
|
1269
|
+
rollouts_total = data.get("rollouts_total")
|
|
1270
|
+
percent_rollouts = data.get("percent_rollouts")
|
|
1271
|
+
|
|
1272
|
+
# Extract transformation progress
|
|
1273
|
+
transformations_tried = data.get("transformations_tried")
|
|
1274
|
+
transformations_planned = data.get("transformations_planned")
|
|
1275
|
+
percent_transformations = data.get("percent_transformations")
|
|
1276
|
+
|
|
1277
|
+
# Extract overall progress
|
|
1278
|
+
percent_overall = data.get("percent_overall")
|
|
1279
|
+
|
|
1280
|
+
# Extract timing
|
|
1281
|
+
elapsed_seconds = data.get("elapsed_seconds")
|
|
1282
|
+
eta_seconds = data.get("eta_seconds")
|
|
1283
|
+
|
|
1284
|
+
# Extract token usage
|
|
1285
|
+
rollout_tokens_used = data.get("rollout_tokens_used")
|
|
1286
|
+
rollout_tokens_budget = data.get("rollout_tokens_budget")
|
|
1287
|
+
|
|
1288
|
+
# Build progress message
|
|
1289
|
+
parts = []
|
|
1290
|
+
|
|
1291
|
+
# Overall percentage
|
|
1292
|
+
if percent_overall is not None:
|
|
1293
|
+
parts.append(f"{int(percent_overall * 100)}% complete")
|
|
1294
|
+
|
|
1295
|
+
# Rollout progress
|
|
1296
|
+
if rollouts_completed is not None and rollouts_total is not None:
|
|
1297
|
+
parts.append(f"rollouts={rollouts_completed}/{rollouts_total}")
|
|
1298
|
+
if percent_rollouts is not None:
|
|
1299
|
+
parts.append(f"({int(percent_rollouts * 100)}%)")
|
|
1300
|
+
elif rollouts_completed is not None:
|
|
1301
|
+
parts.append(f"rollouts={rollouts_completed}")
|
|
1302
|
+
|
|
1303
|
+
# Transformation progress
|
|
1304
|
+
if transformations_tried is not None and transformations_planned is not None:
|
|
1305
|
+
parts.append(f"transformations={transformations_tried}/{transformations_planned}")
|
|
1306
|
+
if percent_transformations is not None:
|
|
1307
|
+
parts.append(f"({int(percent_transformations * 100)}%)")
|
|
1308
|
+
elif transformations_tried is not None:
|
|
1309
|
+
parts.append(f"transformations={transformations_tried}")
|
|
1310
|
+
|
|
1311
|
+
# Token usage
|
|
1312
|
+
if rollout_tokens_used is not None:
|
|
1313
|
+
tokens_millions = rollout_tokens_used / 1_000_000.0
|
|
1314
|
+
if rollout_tokens_budget is not None:
|
|
1315
|
+
budget_millions = rollout_tokens_budget / 1_000_000.0
|
|
1316
|
+
parts.append(f"tokens={tokens_millions:.2f}M/{budget_millions:.2f}M")
|
|
1317
|
+
else:
|
|
1318
|
+
parts.append(f"tokens={tokens_millions:.2f}M")
|
|
1319
|
+
|
|
1320
|
+
# Timing
|
|
1321
|
+
if elapsed_seconds is not None:
|
|
1322
|
+
if elapsed_seconds >= 60:
|
|
1323
|
+
elapsed_str = f"{elapsed_seconds / 60:.1f}min"
|
|
1324
|
+
else:
|
|
1325
|
+
elapsed_str = f"{int(elapsed_seconds)}s"
|
|
1326
|
+
parts.append(f"elapsed={elapsed_str}")
|
|
1327
|
+
|
|
1328
|
+
if eta_seconds is not None:
|
|
1329
|
+
eta_str = f"{eta_seconds / 60:.1f}min" if eta_seconds >= 60 else f"{int(eta_seconds)}s"
|
|
1330
|
+
parts.append(f"eta={eta_str}")
|
|
1331
|
+
|
|
1332
|
+
# Fallback to simple step/total_steps if no detailed info
|
|
1333
|
+
if not parts:
|
|
1334
|
+
step = data.get("step") or data.get("current_step")
|
|
1335
|
+
total_steps = data.get("total_steps") or data.get("max_steps")
|
|
1336
|
+
if step is not None and total_steps is not None:
|
|
1337
|
+
parts.append(f"{step}/{total_steps} ({100 * step / total_steps:.1f}%)")
|
|
1338
|
+
|
|
1339
|
+
if parts:
|
|
1340
|
+
progress_msg = " ".join(parts)
|
|
1341
|
+
self._write_log(f"[{timestamp}] Progress: {progress_msg}")
|
|
1342
|
+
|
|
1343
|
+
def _handle_rollouts_start(self, event_data: dict[str, Any]) -> None:
|
|
1344
|
+
"""Handle GEPA rollouts start event.
|
|
1345
|
+
|
|
1346
|
+
Displays when rollouts begin, showing the number of training seeds
|
|
1347
|
+
that will be evaluated. This marks the start of the main optimization
|
|
1348
|
+
phase for GEPA.
|
|
1349
|
+
|
|
1350
|
+
Args:
|
|
1351
|
+
event_data: Event data dictionary containing:
|
|
1352
|
+
- data.train_seeds: List of training seed values
|
|
1353
|
+
"""
|
|
1354
|
+
data = event_data.get("data", {})
|
|
1355
|
+
if not isinstance(data, dict):
|
|
1356
|
+
return
|
|
1357
|
+
|
|
1358
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1359
|
+
train_seeds = data.get("train_seeds", [])
|
|
1360
|
+
|
|
1361
|
+
if isinstance(train_seeds, list) and train_seeds:
|
|
1362
|
+
num_seeds = len(train_seeds)
|
|
1363
|
+
self._write_log(f"[{timestamp}] Starting rollouts: {num_seeds} seeds")
|
|
1364
|
+
else:
|
|
1365
|
+
self._write_log(f"[{timestamp}] Starting rollouts")
|
|
1366
|
+
|
|
1367
|
+
def _handle_gepa_new_best(self, event_data: dict[str, Any]) -> None:
|
|
1368
|
+
"""Handle GEPA new best candidate event.
|
|
1369
|
+
|
|
1370
|
+
Displays when a new best candidate is found during optimization,
|
|
1371
|
+
showing the improvement over the previous best.
|
|
1372
|
+
|
|
1373
|
+
Args:
|
|
1374
|
+
event_data: Event data dictionary containing:
|
|
1375
|
+
- data.accuracy: New best accuracy score
|
|
1376
|
+
- data.previous_best_score: Previous best score
|
|
1377
|
+
- data.improvement: Absolute improvement
|
|
1378
|
+
- data.version_id: ID of the new best candidate
|
|
1379
|
+
"""
|
|
1380
|
+
data = event_data.get("data", {})
|
|
1381
|
+
if not isinstance(data, dict):
|
|
1382
|
+
return
|
|
1383
|
+
|
|
1384
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1385
|
+
accuracy = data.get("accuracy")
|
|
1386
|
+
previous = data.get("previous_best_score")
|
|
1387
|
+
improvement = data.get("improvement")
|
|
1388
|
+
|
|
1389
|
+
if accuracy is not None:
|
|
1390
|
+
msg = f"[{timestamp}] \u2728 New best: {accuracy:.4f}"
|
|
1391
|
+
if previous is not None and improvement is not None:
|
|
1392
|
+
msg += f" (+{improvement:.4f} from {previous:.4f})"
|
|
1393
|
+
elif previous is not None:
|
|
1394
|
+
msg += f" (was {previous:.4f})"
|
|
1395
|
+
self._write_log(msg)
|
|
1396
|
+
|
|
1397
|
+
def _handle_phase_changed(self, event_data: dict[str, Any]) -> None:
|
|
1398
|
+
"""Handle phase transition event.
|
|
1399
|
+
|
|
1400
|
+
Displays when the optimization transitions between phases
|
|
1401
|
+
(e.g., bootstrap -> optimization -> validation -> complete).
|
|
1402
|
+
|
|
1403
|
+
Args:
|
|
1404
|
+
event_data: Event data dictionary containing:
|
|
1405
|
+
- data.from_phase: Previous phase name
|
|
1406
|
+
- data.to_phase: New phase name
|
|
1407
|
+
- data.phase_summary: Optional summary of completed phase
|
|
1408
|
+
"""
|
|
1409
|
+
data = event_data.get("data", {})
|
|
1410
|
+
if not isinstance(data, dict):
|
|
1411
|
+
return
|
|
1412
|
+
|
|
1413
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1414
|
+
from_phase = data.get("from_phase") or "start"
|
|
1415
|
+
to_phase = data.get("to_phase")
|
|
1416
|
+
|
|
1417
|
+
if to_phase:
|
|
1418
|
+
self._write_log(f"[{timestamp}] Phase: {from_phase} \u2192 {to_phase}")
|
|
1419
|
+
|
|
1420
|
+
def _handle_stream_connected(self, event_data: dict[str, Any]) -> None:
|
|
1421
|
+
"""Handle SSE stream connection event.
|
|
1422
|
+
|
|
1423
|
+
Displays connection confirmation with cursor position for debugging.
|
|
1424
|
+
|
|
1425
|
+
Args:
|
|
1426
|
+
event_data: Event data dictionary containing:
|
|
1427
|
+
- data.cursor: Current sequence cursor position
|
|
1428
|
+
- data.heartbeat_interval_seconds: Heartbeat interval
|
|
1429
|
+
"""
|
|
1430
|
+
data = event_data.get("data", {})
|
|
1431
|
+
if not isinstance(data, dict):
|
|
1432
|
+
return
|
|
1433
|
+
|
|
1434
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1435
|
+
cursor = data.get("cursor", 0)
|
|
1436
|
+
self._write_log(f"[{timestamp}] Stream connected (cursor={cursor})")
|
|
1437
|
+
|
|
1438
|
+
def _handle_mipro_job_started(self, event_data: dict[str, Any]) -> None:
|
|
1439
|
+
"""Handle MIPRO job start event and extract configuration.
|
|
1440
|
+
|
|
1441
|
+
Captures initial MIPRO configuration from the job start event to enable
|
|
1442
|
+
progress tracking. Extracts num_iterations and num_trials_per_iteration
|
|
1443
|
+
to estimate total trials and rollouts.
|
|
1444
|
+
|
|
1445
|
+
Args:
|
|
1446
|
+
event_data: Event data dictionary containing:
|
|
1447
|
+
- data.num_iterations: Total number of optimization iterations
|
|
1448
|
+
- data.num_trials_per_iteration: Trials per iteration
|
|
1449
|
+
"""
|
|
1450
|
+
data = event_data.get("data", {})
|
|
1451
|
+
if not isinstance(data, dict):
|
|
1452
|
+
return
|
|
1453
|
+
|
|
1454
|
+
# Extract config values to estimate max rollouts
|
|
1455
|
+
num_iterations = data.get("num_iterations")
|
|
1456
|
+
num_trials_per_iteration = data.get("num_trials_per_iteration")
|
|
1457
|
+
|
|
1458
|
+
if num_iterations is not None:
|
|
1459
|
+
self.mipro_num_iterations = num_iterations
|
|
1460
|
+
if num_trials_per_iteration is not None:
|
|
1461
|
+
self.mipro_trials_per_iteration = num_trials_per_iteration
|
|
1462
|
+
|
|
1463
|
+
def _handle_mipro_iteration_start(self, event_data: dict[str, Any]) -> None:
|
|
1464
|
+
"""Handle MIPRO iteration start event and initialize progress tracking.
|
|
1465
|
+
|
|
1466
|
+
Called at the start of each MIPRO iteration. On the first iteration (0),
|
|
1467
|
+
initializes all progress tracking variables including:
|
|
1468
|
+
- Total iterations and trials per iteration
|
|
1469
|
+
- Batch size (for minibatch evaluations)
|
|
1470
|
+
- Max rollouts estimate (iterations * trials * batch_size)
|
|
1471
|
+
- Time and token budgets
|
|
1472
|
+
|
|
1473
|
+
Sets the start time for elapsed time tracking.
|
|
1474
|
+
|
|
1475
|
+
Args:
|
|
1476
|
+
event_data: Event data dictionary containing:
|
|
1477
|
+
- data.iteration: Current iteration number (0-indexed)
|
|
1478
|
+
- data.num_iterations: Total iterations
|
|
1479
|
+
- data.num_trials_per_iteration: Trials per iteration
|
|
1480
|
+
- data.batch_size: Minibatch size (N for minibatch scores)
|
|
1481
|
+
- data.max_trials: Maximum trials limit (optional)
|
|
1482
|
+
- data.max_rollouts: Maximum rollouts limit (optional)
|
|
1483
|
+
- data.max_time_seconds: Maximum time limit (optional)
|
|
1484
|
+
"""
|
|
1485
|
+
import time
|
|
1486
|
+
|
|
1487
|
+
data = event_data.get("data", {})
|
|
1488
|
+
if not isinstance(data, dict):
|
|
1489
|
+
return
|
|
1490
|
+
|
|
1491
|
+
iteration = data.get("iteration")
|
|
1492
|
+
if iteration == 0 and self.mipro_start_time is None:
|
|
1493
|
+
self.mipro_start_time = time.time()
|
|
1494
|
+
|
|
1495
|
+
# Extract total iterations and trials per iteration from first iteration
|
|
1496
|
+
if iteration == 0:
|
|
1497
|
+
self.mipro_num_iterations = data.get("num_iterations") or self.mipro_num_iterations
|
|
1498
|
+
self.mipro_trials_per_iteration = data.get("num_trials_per_iteration") or self.mipro_trials_per_iteration
|
|
1499
|
+
batch_size = data.get("batch_size")
|
|
1500
|
+
if batch_size is not None:
|
|
1501
|
+
self.mipro_batch_size = batch_size
|
|
1502
|
+
|
|
1503
|
+
if self.mipro_num_iterations and self.mipro_trials_per_iteration:
|
|
1504
|
+
self.mipro_total_trials = self.mipro_num_iterations * self.mipro_trials_per_iteration
|
|
1505
|
+
|
|
1506
|
+
# Extract max limits if available (from events, but TOML value takes precedence)
|
|
1507
|
+
# Only override if TOML value wasn't set
|
|
1508
|
+
max_trials = data.get("max_trials")
|
|
1509
|
+
max_rollouts_from_event = data.get("max_rollouts")
|
|
1510
|
+
if self.mipro_max_rollouts is None:
|
|
1511
|
+
if max_rollouts_from_event is not None:
|
|
1512
|
+
# Use event value if TOML value wasn't set
|
|
1513
|
+
self.mipro_max_rollouts = max_rollouts_from_event
|
|
1514
|
+
elif max_trials is not None:
|
|
1515
|
+
# Fallback: If max_trials is set, use it as max rollouts (approximation)
|
|
1516
|
+
self.mipro_max_rollouts = max_trials
|
|
1517
|
+
elif self.mipro_num_iterations and self.mipro_trials_per_iteration and self.mipro_batch_size:
|
|
1518
|
+
# Estimate max rollouts: iterations * trials_per_iteration * batch_size
|
|
1519
|
+
self.mipro_max_rollouts = self.mipro_num_iterations * self.mipro_trials_per_iteration * self.mipro_batch_size
|
|
1520
|
+
|
|
1521
|
+
max_time_seconds = data.get("max_time_seconds") or data.get("max_wall_clock_seconds")
|
|
1522
|
+
if max_time_seconds is not None and self.mipro_max_time_seconds is None:
|
|
1523
|
+
# Use event value only if TOML value wasn't set
|
|
1524
|
+
self.mipro_max_time_seconds = float(max_time_seconds)
|
|
1525
|
+
|
|
1526
|
+
self.mipro_current_iteration = iteration if iteration is not None else self.mipro_current_iteration
|
|
1527
|
+
|
|
1528
|
+
def _handle_mipro_iteration_complete(self, event_data: dict[str, Any]) -> None:
|
|
1529
|
+
"""Handle MIPRO iteration completion event.
|
|
1530
|
+
|
|
1531
|
+
Updates progress tracking when an iteration completes, including:
|
|
1532
|
+
- Cumulative trial count
|
|
1533
|
+
- Current iteration number
|
|
1534
|
+
|
|
1535
|
+
Emits a progress update showing overall progress, trials completed,
|
|
1536
|
+
iterations, rollouts, tokens, and time.
|
|
1537
|
+
|
|
1538
|
+
Args:
|
|
1539
|
+
event_data: Event data dictionary containing:
|
|
1540
|
+
- data.iteration: Completed iteration number
|
|
1541
|
+
- data.cumulative: Cumulative trial count across all iterations
|
|
1542
|
+
"""
|
|
1543
|
+
data = event_data.get("data", {})
|
|
1544
|
+
if not isinstance(data, dict):
|
|
1545
|
+
return
|
|
1546
|
+
|
|
1547
|
+
cumulative = data.get("cumulative")
|
|
1548
|
+
if cumulative is not None:
|
|
1549
|
+
self.mipro_completed_trials = cumulative
|
|
1550
|
+
|
|
1551
|
+
# Update current iteration
|
|
1552
|
+
iteration = data.get("iteration")
|
|
1553
|
+
if iteration is not None:
|
|
1554
|
+
self.mipro_current_iteration = iteration
|
|
1555
|
+
|
|
1556
|
+
# Emit progress update
|
|
1557
|
+
self._emit_mipro_progress()
|
|
1558
|
+
|
|
1559
|
+
def _handle_mipro_trial_complete(self, event_data: dict[str, Any]) -> None:
|
|
1560
|
+
"""Handle MIPRO trial completion event (minibatch evaluation).
|
|
1561
|
+
|
|
1562
|
+
Processes minibatch trial completion events, which occur frequently during
|
|
1563
|
+
MIPRO optimization. Tracks:
|
|
1564
|
+
- Completed trial count
|
|
1565
|
+
- Rollouts completed (from num_seeds)
|
|
1566
|
+
- Minibatch scores (displayed if show_trial_results is True)
|
|
1567
|
+
|
|
1568
|
+
Displays trial results in GEPA-like format: [Trial X] Score: Y (Best: Z) N=W
|
|
1569
|
+
where N is the minibatch size. Emits throttled progress updates.
|
|
1570
|
+
|
|
1571
|
+
Args:
|
|
1572
|
+
event_data: Event data dictionary containing:
|
|
1573
|
+
- data.minibatch_score: Score from minibatch evaluation
|
|
1574
|
+
- data.iteration: Current iteration number
|
|
1575
|
+
- data.trial: Trial number within iteration
|
|
1576
|
+
- data.num_seeds: Number of seeds evaluated (minibatch size N)
|
|
1577
|
+
"""
|
|
1578
|
+
data = event_data.get("data", {})
|
|
1579
|
+
if not isinstance(data, dict):
|
|
1580
|
+
return
|
|
1581
|
+
|
|
1582
|
+
# Increment completed trials counter
|
|
1583
|
+
self.mipro_completed_trials += 1
|
|
1584
|
+
|
|
1585
|
+
# Count rollouts from trial events
|
|
1586
|
+
num_seeds = data.get("num_seeds") or data.get("num_instances", 0)
|
|
1587
|
+
if num_seeds:
|
|
1588
|
+
self.mipro_rollouts_completed += num_seeds
|
|
1589
|
+
|
|
1590
|
+
# Show trial score (minibatch) - like GEPA trial format
|
|
1591
|
+
if self.show_trial_results:
|
|
1592
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1593
|
+
minibatch_score = data.get("minibatch_score")
|
|
1594
|
+
iteration = data.get("iteration")
|
|
1595
|
+
trial = data.get("trial")
|
|
1596
|
+
|
|
1597
|
+
if minibatch_score is not None:
|
|
1598
|
+
try:
|
|
1599
|
+
score_float = float(minibatch_score)
|
|
1600
|
+
# Calculate trial number for display
|
|
1601
|
+
if iteration is not None and trial is not None and self.mipro_trials_per_iteration:
|
|
1602
|
+
trial_num_display = (iteration * self.mipro_trials_per_iteration) + (trial + 1)
|
|
1603
|
+
else:
|
|
1604
|
+
trial_num_display = self.mipro_completed_trials
|
|
1605
|
+
|
|
1606
|
+
n_str = f" N={num_seeds}" if num_seeds else ""
|
|
1607
|
+
best_str = f" (Best: {self.mipro_best_score:.4f})" if self.mipro_best_score > 0 else ""
|
|
1608
|
+
|
|
1609
|
+
self._write_log(
|
|
1610
|
+
f"[{timestamp}] [Trial {trial_num_display}] Score: {score_float:.4f}{best_str}{n_str}"
|
|
1611
|
+
)
|
|
1612
|
+
except (ValueError, TypeError):
|
|
1613
|
+
pass
|
|
1614
|
+
|
|
1615
|
+
# Emit progress update after each trial (throttled internally)
|
|
1616
|
+
self._emit_mipro_progress()
|
|
1617
|
+
|
|
1618
|
+
def _handle_mipro_fulleval_complete(self, event_data: dict[str, Any]) -> None:
|
|
1619
|
+
"""Handle MIPRO full evaluation completion event.
|
|
1620
|
+
|
|
1621
|
+
Processes full evaluation events, which occur less frequently than minibatch
|
|
1622
|
+
trials. Full evaluations use the full validation set and are more expensive.
|
|
1623
|
+
Only displays results if the score is "promising":
|
|
1624
|
+
- Better than current best score, OR
|
|
1625
|
+
- At least 5% improvement over baseline
|
|
1626
|
+
|
|
1627
|
+
Tracks rollouts from full evaluations and updates best score. Displays
|
|
1628
|
+
results with baseline comparison and improvement percentage.
|
|
1629
|
+
|
|
1630
|
+
Args:
|
|
1631
|
+
event_data: Event data dictionary containing:
|
|
1632
|
+
- data.score: Full evaluation score
|
|
1633
|
+
- data.iteration: Current iteration number
|
|
1634
|
+
- data.trial: Trial number within iteration
|
|
1635
|
+
- data.num_seeds: Number of seeds evaluated (full eval size)
|
|
1636
|
+
- data.seeds: List of seed values (alternative to num_seeds)
|
|
1637
|
+
"""
|
|
1638
|
+
data = event_data.get("data", {})
|
|
1639
|
+
if not isinstance(data, dict):
|
|
1640
|
+
return
|
|
1641
|
+
|
|
1642
|
+
# Count rollouts from full eval
|
|
1643
|
+
num_seeds = data.get("num_seeds") or data.get("seeds", 0)
|
|
1644
|
+
if isinstance(num_seeds, list):
|
|
1645
|
+
num_seeds = len(num_seeds)
|
|
1646
|
+
if num_seeds:
|
|
1647
|
+
self.mipro_rollouts_completed += num_seeds
|
|
1648
|
+
|
|
1649
|
+
score = data.get("score")
|
|
1650
|
+
if score is None:
|
|
1651
|
+
return
|
|
1652
|
+
|
|
1653
|
+
try:
|
|
1654
|
+
score_float = float(score)
|
|
1655
|
+
except (ValueError, TypeError):
|
|
1656
|
+
return
|
|
1657
|
+
|
|
1658
|
+
# Initialize baseline if not set (use first score as baseline)
|
|
1659
|
+
if self.mipro_baseline_score is None:
|
|
1660
|
+
self.mipro_baseline_score = score_float
|
|
1661
|
+
|
|
1662
|
+
# Only show if score is promising:
|
|
1663
|
+
# - Better than current best, OR
|
|
1664
|
+
# - At least 5% improvement over baseline
|
|
1665
|
+
is_promising = False
|
|
1666
|
+
if score_float > self.mipro_best_score:
|
|
1667
|
+
self.mipro_best_score = score_float
|
|
1668
|
+
is_promising = True
|
|
1669
|
+
elif self.mipro_baseline_score is not None:
|
|
1670
|
+
improvement = score_float - self.mipro_baseline_score
|
|
1671
|
+
improvement_pct = (improvement / self.mipro_baseline_score * 100) if self.mipro_baseline_score > 0 else 0
|
|
1672
|
+
if improvement_pct >= 5.0: # At least 5% improvement over baseline
|
|
1673
|
+
is_promising = True
|
|
1674
|
+
|
|
1675
|
+
if is_promising:
|
|
1676
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1677
|
+
iteration = data.get("iteration")
|
|
1678
|
+
trial = data.get("trial")
|
|
1679
|
+
seeds = data.get("seeds") or data.get("num_seeds", 0)
|
|
1680
|
+
if isinstance(seeds, list):
|
|
1681
|
+
seeds = len(seeds)
|
|
1682
|
+
|
|
1683
|
+
# Format similar to GEPA trial results with N displayed
|
|
1684
|
+
iter_str = f" iter={iteration}" if iteration is not None else ""
|
|
1685
|
+
trial_str = f" trial={trial}" if trial is not None else ""
|
|
1686
|
+
n_str = f" N={seeds}" if seeds else ""
|
|
1687
|
+
|
|
1688
|
+
baseline_str = ""
|
|
1689
|
+
if self.mipro_baseline_score is not None:
|
|
1690
|
+
improvement = score_float - self.mipro_baseline_score
|
|
1691
|
+
improvement_pct = (improvement / self.mipro_baseline_score * 100) if self.mipro_baseline_score > 0 else 0
|
|
1692
|
+
baseline_str = f" (Baseline: {self.mipro_baseline_score:.4f}, +{improvement_pct:.1f}%)"
|
|
1693
|
+
|
|
1694
|
+
self._write_log(
|
|
1695
|
+
f"[{timestamp}] Full eval: Score={score_float:.4f} (Best: {self.mipro_best_score:.4f}){n_str}{baseline_str}{iter_str}{trial_str}"
|
|
1696
|
+
)
|
|
1697
|
+
|
|
1698
|
+
def _handle_mipro_new_incumbent(self, event_data: dict[str, Any]) -> None:
|
|
1699
|
+
"""Handle MIPRO new incumbent event (best candidate found).
|
|
1700
|
+
|
|
1701
|
+
Processes events when MIPRO finds a new best candidate (incumbent).
|
|
1702
|
+
Updates the optimization curve and displays the result in GEPA-like format
|
|
1703
|
+
for consistency. Tracks cumulative trial count for curve visualization.
|
|
1704
|
+
|
|
1705
|
+
Args:
|
|
1706
|
+
event_data: Event data dictionary containing:
|
|
1707
|
+
- data.minibatch_score: Minibatch score of the new incumbent
|
|
1708
|
+
- data.best_score: Overall best score
|
|
1709
|
+
- data.iteration: Current iteration number
|
|
1710
|
+
- data.trial: Trial number within iteration
|
|
1711
|
+
- data.cumulative_trials: Cumulative trial count across iterations
|
|
1712
|
+
- data.num_seeds: Minibatch size (N)
|
|
1713
|
+
"""
|
|
1714
|
+
data = event_data.get("data", {})
|
|
1715
|
+
if not isinstance(data, dict):
|
|
1716
|
+
return
|
|
1717
|
+
|
|
1718
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1719
|
+
minibatch_score = data.get("minibatch_score")
|
|
1720
|
+
best_score = data.get("best_score")
|
|
1721
|
+
iteration = data.get("iteration")
|
|
1722
|
+
trial = data.get("trial")
|
|
1723
|
+
num_seeds = data.get("num_seeds") # N for minibatch
|
|
1724
|
+
|
|
1725
|
+
if minibatch_score is None:
|
|
1726
|
+
return
|
|
1727
|
+
|
|
1728
|
+
try:
|
|
1729
|
+
score_float = float(minibatch_score)
|
|
1730
|
+
except (ValueError, TypeError):
|
|
1731
|
+
return
|
|
1732
|
+
|
|
1733
|
+
# Update best score if this is better
|
|
1734
|
+
if best_score is not None:
|
|
1735
|
+
best_float = float(best_score)
|
|
1736
|
+
if best_float > self.best_score_so_far:
|
|
1737
|
+
self.best_score_so_far = best_float
|
|
1738
|
+
elif score_float > self.best_score_so_far:
|
|
1739
|
+
self.best_score_so_far = score_float
|
|
1740
|
+
|
|
1741
|
+
# Track optimization curve
|
|
1742
|
+
if trial is not None:
|
|
1743
|
+
# Use cumulative trial count for x-axis
|
|
1744
|
+
cumulative_trials = data.get("cumulative_trials")
|
|
1745
|
+
if cumulative_trials is not None:
|
|
1746
|
+
trial_num = cumulative_trials
|
|
1747
|
+
else:
|
|
1748
|
+
# Estimate: (iteration * trials_per_iteration) + trial
|
|
1749
|
+
if iteration is not None and self.mipro_trials_per_iteration:
|
|
1750
|
+
trial_num = (iteration * self.mipro_trials_per_iteration) + (trial + 1)
|
|
1751
|
+
else:
|
|
1752
|
+
trial_num = self.trial_counter + 1
|
|
1753
|
+
|
|
1754
|
+
self.optimization_curve.append((trial_num, self.best_score_so_far))
|
|
1755
|
+
self.trial_counter = trial_num
|
|
1756
|
+
|
|
1757
|
+
# Format like GEPA: [Trial X] Score: X (Best: Y) N=Z
|
|
1758
|
+
trial_num_display = self.trial_counter if self.trial_counter > 0 else (trial + 1 if trial is not None else 1)
|
|
1759
|
+
n_str = f" N={num_seeds}" if num_seeds is not None else ""
|
|
1760
|
+
|
|
1761
|
+
click.echo(
|
|
1762
|
+
f"[{timestamp}] [Trial {trial_num_display}] Score: {score_float:.4f} (Best: {self.best_score_so_far:.4f}){n_str}"
|
|
1763
|
+
)
|
|
1764
|
+
|
|
1765
|
+
# Emit progress update after each trial (throttled internally)
|
|
1766
|
+
self._emit_mipro_progress()
|
|
1767
|
+
|
|
1768
|
+
def _handle_mipro_budget_update(self, event_data: dict[str, Any]) -> None:
|
|
1769
|
+
"""Handle MIPRO budget update events.
|
|
1770
|
+
|
|
1771
|
+
Tracks token usage and cost accumulation during optimization. Updates:
|
|
1772
|
+
- Total tokens consumed (all operations)
|
|
1773
|
+
- Policy tokens (rollout tokens only)
|
|
1774
|
+
- Total cost in USD
|
|
1775
|
+
- Max token and cost limits (if provided in event)
|
|
1776
|
+
|
|
1777
|
+
Emits throttled progress updates to show budget consumption.
|
|
1778
|
+
|
|
1779
|
+
Args:
|
|
1780
|
+
event_data: Event data dictionary containing:
|
|
1781
|
+
- data.total_tokens: Total tokens consumed
|
|
1782
|
+
- data.policy_tokens: Tokens used for rollouts (policy only)
|
|
1783
|
+
- data.total_cost_usd: Total cost in USD
|
|
1784
|
+
- data.max_token_limit: Maximum token budget (optional)
|
|
1785
|
+
- data.max_spend_usd: Maximum cost budget (optional)
|
|
1786
|
+
"""
|
|
1787
|
+
data = event_data.get("data", {})
|
|
1788
|
+
if not isinstance(data, dict):
|
|
1789
|
+
return
|
|
1790
|
+
|
|
1791
|
+
# Update token tracking
|
|
1792
|
+
total_tokens = data.get("total_tokens")
|
|
1793
|
+
if total_tokens is not None:
|
|
1794
|
+
self.mipro_total_tokens = total_tokens
|
|
1795
|
+
|
|
1796
|
+
# Track policy tokens separately (rollout tokens)
|
|
1797
|
+
policy_tokens = data.get("policy_tokens")
|
|
1798
|
+
if policy_tokens is not None:
|
|
1799
|
+
self.mipro_policy_tokens = policy_tokens
|
|
1800
|
+
|
|
1801
|
+
# Update cost tracking
|
|
1802
|
+
total_cost = data.get("total_cost_usd")
|
|
1803
|
+
if total_cost is not None:
|
|
1804
|
+
self.mipro_total_cost = total_cost
|
|
1805
|
+
|
|
1806
|
+
# Extract max limits if available in event data
|
|
1807
|
+
max_token_limit = data.get("max_token_limit")
|
|
1808
|
+
if max_token_limit is not None:
|
|
1809
|
+
self.mipro_max_tokens = max_token_limit
|
|
1810
|
+
|
|
1811
|
+
max_spend_usd = data.get("max_spend_usd")
|
|
1812
|
+
if max_spend_usd is not None:
|
|
1813
|
+
self.mipro_max_cost = max_spend_usd
|
|
1814
|
+
|
|
1815
|
+
# Emit progress update periodically (throttled)
|
|
1816
|
+
self._emit_mipro_progress()
|
|
1817
|
+
|
|
1818
|
+
def _emit_mipro_progress(self) -> None:
|
|
1819
|
+
"""Emit a comprehensive progress update for MIPRO (throttled).
|
|
1820
|
+
|
|
1821
|
+
Formats and displays MIPRO progress in a format similar to GEPA for consistency.
|
|
1822
|
+
Shows:
|
|
1823
|
+
- Overall completion percentage
|
|
1824
|
+
- Trial progress (completed/total with remaining)
|
|
1825
|
+
- Iteration progress (current/total)
|
|
1826
|
+
- Rollout progress (completed/max)
|
|
1827
|
+
- Token usage (used/budget in millions)
|
|
1828
|
+
- Cost (USD)
|
|
1829
|
+
- Elapsed time and ETA
|
|
1830
|
+
|
|
1831
|
+
Progress updates are throttled to emit at most every 5 seconds to avoid
|
|
1832
|
+
overwhelming the console. This method is called after significant events
|
|
1833
|
+
(trial completion, iteration completion, budget updates).
|
|
1834
|
+
|
|
1835
|
+
Note:
|
|
1836
|
+
Only emits if start_time is set (job has started) and sufficient time
|
|
1837
|
+
has passed since the last update.
|
|
1838
|
+
"""
|
|
1839
|
+
import time
|
|
1840
|
+
|
|
1841
|
+
if self.mipro_start_time is None:
|
|
1842
|
+
return
|
|
1843
|
+
|
|
1844
|
+
# Throttle progress updates - only emit every N seconds
|
|
1845
|
+
now = time.time()
|
|
1846
|
+
if self._last_progress_emit_time is not None:
|
|
1847
|
+
time_since_last = now - self._last_progress_emit_time
|
|
1848
|
+
if time_since_last < self._progress_emit_interval:
|
|
1849
|
+
return # Skip this update
|
|
1850
|
+
|
|
1851
|
+
self._last_progress_emit_time = now
|
|
1852
|
+
|
|
1853
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1854
|
+
elapsed = now - self.mipro_start_time
|
|
1855
|
+
|
|
1856
|
+
parts = []
|
|
1857
|
+
|
|
1858
|
+
# Overall progress percentage
|
|
1859
|
+
percent_overall = None
|
|
1860
|
+
if self.mipro_total_trials and self.mipro_completed_trials is not None:
|
|
1861
|
+
percent_overall = (self.mipro_completed_trials / self.mipro_total_trials) * 100
|
|
1862
|
+
parts.append(f"{int(percent_overall)}% complete")
|
|
1863
|
+
|
|
1864
|
+
# Trial progress (like rollouts in GEPA)
|
|
1865
|
+
if self.mipro_total_trials and self.mipro_completed_trials is not None:
|
|
1866
|
+
parts.append(f"trials={self.mipro_completed_trials}/{self.mipro_total_trials}")
|
|
1867
|
+
# Calculate remaining trials
|
|
1868
|
+
remaining_trials = self.mipro_total_trials - self.mipro_completed_trials
|
|
1869
|
+
if remaining_trials > 0:
|
|
1870
|
+
parts.append(f"rem={remaining_trials}")
|
|
1871
|
+
# Show percentage
|
|
1872
|
+
if percent_overall is not None:
|
|
1873
|
+
parts.append(f"({int(percent_overall)}%)")
|
|
1874
|
+
elif self.mipro_completed_trials is not None:
|
|
1875
|
+
parts.append(f"trials={self.mipro_completed_trials}")
|
|
1876
|
+
|
|
1877
|
+
# Iteration progress
|
|
1878
|
+
if self.mipro_num_iterations and self.mipro_current_iteration is not None:
|
|
1879
|
+
parts.append(f"iter={self.mipro_current_iteration + 1}/{self.mipro_num_iterations}")
|
|
1880
|
+
|
|
1881
|
+
# Rollouts completed vs max (like GEPA) - always show if we have any rollouts
|
|
1882
|
+
if self.mipro_rollouts_completed > 0:
|
|
1883
|
+
# Always try to show max if available (from TOML, event, or estimate)
|
|
1884
|
+
max_rollouts_to_show = self.mipro_max_rollouts
|
|
1885
|
+
if max_rollouts_to_show is None and self.mipro_total_trials and self.mipro_batch_size:
|
|
1886
|
+
# Estimate max rollouts from total trials if available
|
|
1887
|
+
max_rollouts_to_show = self.mipro_total_trials * self.mipro_batch_size
|
|
1888
|
+
|
|
1889
|
+
if max_rollouts_to_show:
|
|
1890
|
+
rollouts_pct = (self.mipro_rollouts_completed / max_rollouts_to_show) * 100
|
|
1891
|
+
parts.append(f"rollouts={self.mipro_rollouts_completed}/{max_rollouts_to_show} ({int(rollouts_pct)}%)")
|
|
1892
|
+
else:
|
|
1893
|
+
parts.append(f"rollouts={self.mipro_rollouts_completed}")
|
|
1894
|
+
|
|
1895
|
+
# Tokens (policy tokens only, like GEPA rollout_tokens) - always show max if available
|
|
1896
|
+
if self.mipro_policy_tokens > 0:
|
|
1897
|
+
rollout_tokens_millions = self.mipro_policy_tokens / 1_000_000.0
|
|
1898
|
+
if self.mipro_max_tokens:
|
|
1899
|
+
# Use max_tokens as budget for rollout tokens (approximation)
|
|
1900
|
+
budget_millions = self.mipro_max_tokens / 1_000_000.0
|
|
1901
|
+
tokens_pct = (self.mipro_policy_tokens / self.mipro_max_tokens * 100) if self.mipro_max_tokens > 0 else 0
|
|
1902
|
+
parts.append(f"tokens={rollout_tokens_millions:.2f}M/{budget_millions:.2f}M ({int(tokens_pct)}%)")
|
|
1903
|
+
else:
|
|
1904
|
+
parts.append(f"tokens={rollout_tokens_millions:.2f}M")
|
|
1905
|
+
|
|
1906
|
+
# Timing (elapsed out of max, like GEPA)
|
|
1907
|
+
elapsed_seconds = int(elapsed)
|
|
1908
|
+
if self.mipro_max_time_seconds:
|
|
1909
|
+
elapsed_pct = (elapsed / self.mipro_max_time_seconds * 100) if self.mipro_max_time_seconds > 0 else 0
|
|
1910
|
+
max_time_minutes = self.mipro_max_time_seconds / 60.0
|
|
1911
|
+
if elapsed_seconds >= 60:
|
|
1912
|
+
elapsed_str = f"{elapsed_seconds / 60:.1f}min/{max_time_minutes:.1f}min ({int(elapsed_pct)}%)"
|
|
1913
|
+
else:
|
|
1914
|
+
elapsed_str = f"{elapsed_seconds}s/{int(self.mipro_max_time_seconds)}s ({int(elapsed_pct)}%)"
|
|
1915
|
+
else:
|
|
1916
|
+
if elapsed_seconds >= 60:
|
|
1917
|
+
elapsed_str = f"{elapsed_seconds / 60:.1f}min"
|
|
1918
|
+
else:
|
|
1919
|
+
elapsed_str = f"{elapsed_seconds}s"
|
|
1920
|
+
parts.append(f"elapsed={elapsed_str}")
|
|
1921
|
+
|
|
1922
|
+
# ETA calculation (similar to GEPA) - always show if we have progress
|
|
1923
|
+
eta_seconds = None
|
|
1924
|
+
if self.mipro_completed_trials is not None and self.mipro_completed_trials > 0 and elapsed > 0:
|
|
1925
|
+
rate = self.mipro_completed_trials / elapsed
|
|
1926
|
+
if rate > 0:
|
|
1927
|
+
if self.mipro_total_trials:
|
|
1928
|
+
# Calculate ETA based on remaining trials
|
|
1929
|
+
remaining = self.mipro_total_trials - self.mipro_completed_trials
|
|
1930
|
+
if remaining > 0:
|
|
1931
|
+
eta_seconds = remaining / rate
|
|
1932
|
+
else:
|
|
1933
|
+
# Estimate based on iterations if we don't have total trials
|
|
1934
|
+
if self.mipro_num_iterations and self.mipro_current_iteration is not None:
|
|
1935
|
+
remaining_iterations = self.mipro_num_iterations - (self.mipro_current_iteration + 1)
|
|
1936
|
+
if remaining_iterations > 0 and self.mipro_trials_per_iteration:
|
|
1937
|
+
# Estimate: assume same rate for remaining iterations
|
|
1938
|
+
remaining_trials_estimate = remaining_iterations * self.mipro_trials_per_iteration
|
|
1939
|
+
eta_seconds = remaining_trials_estimate / rate
|
|
1940
|
+
|
|
1941
|
+
if eta_seconds is not None and eta_seconds > 0:
|
|
1942
|
+
eta_str = f"{eta_seconds / 60:.1f}min" if eta_seconds >= 60 else f"{int(eta_seconds)}s"
|
|
1943
|
+
parts.append(f"eta={eta_str}")
|
|
1944
|
+
|
|
1945
|
+
if parts:
|
|
1946
|
+
progress_msg = " ".join(parts)
|
|
1947
|
+
self._write_log(f"[{timestamp}] Progress: {progress_msg}")
|
|
1948
|
+
|
|
1949
|
+
def flush(self) -> None:
|
|
1950
|
+
"""Flush buffered output and close log file."""
|
|
1951
|
+
if self._log_file_handle:
|
|
1952
|
+
try:
|
|
1953
|
+
from datetime import datetime
|
|
1954
|
+
self._log_file_handle.write("\n" + "=" * 80 + "\n")
|
|
1955
|
+
self._log_file_handle.write(f"Ended: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
1956
|
+
self._log_file_handle.write("=" * 80 + "\n")
|
|
1957
|
+
self._log_file_handle.flush()
|
|
1958
|
+
self._log_file_handle.close()
|
|
1959
|
+
except Exception:
|
|
1960
|
+
pass
|
|
1961
|
+
finally:
|
|
1962
|
+
self._log_file_handle = None
|
|
1963
|
+
|
|
1964
|
+
def _handle_proposal_scored(self, event_data: dict[str, Any]) -> None:
|
|
1965
|
+
"""Handle GEPA proposal scored events (transformations).
|
|
1966
|
+
|
|
1967
|
+
Displays transformation/proposal scoring events from GEPA optimization.
|
|
1968
|
+
Only called if show_transformations is True (default: False) to avoid
|
|
1969
|
+
verbose output. Shows the score assigned to each proposed transformation.
|
|
1970
|
+
|
|
1971
|
+
Args:
|
|
1972
|
+
event_data: Event data dictionary containing:
|
|
1973
|
+
- data.score: Score assigned to the transformation/proposal
|
|
1974
|
+
"""
|
|
1975
|
+
# Only called if show_transformations=True
|
|
1976
|
+
data = event_data.get("data", {})
|
|
1977
|
+
if not isinstance(data, dict):
|
|
1978
|
+
return
|
|
1979
|
+
|
|
1980
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1981
|
+
score = data.get("score")
|
|
1982
|
+
if score is not None:
|
|
1983
|
+
click.echo(f"[{timestamp}] Proposal scored: {score:.4f}")
|
|
1984
|
+
|
|
1985
|
+
|
|
1986
|
+
__all__ = [
|
|
1987
|
+
"GraphGenHandler",
|
|
1988
|
+
"BufferedHandler",
|
|
1989
|
+
"CallbackHandler",
|
|
1990
|
+
"CLIHandler",
|
|
1991
|
+
"PromptLearningHandler",
|
|
1992
|
+
"JSONHandler",
|
|
1993
|
+
"IntegrationTestHandler",
|
|
1994
|
+
"LossCurveHandler",
|
|
1995
|
+
"RichHandler",
|
|
1996
|
+
"StreamHandler",
|
|
1997
|
+
]
|