PyPI - synth-ai - Versions diffs - 0.2.6.dev1__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

synth-ai 0.2.6.dev1py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (738) hide show

synth_ai/__init__.py +44 -24
synth_ai/__main__.py +30 -3
synth_ai/cli/__init__.py +103 -48
synth_ai/cli/__main__.py +42 -0
synth_ai/cli/_internal/__init__.py +5 -0
synth_ai/cli/_internal/modal_wrapper.py +31 -0
synth_ai/cli/_internal/storage.py +20 -0
synth_ai/cli/_internal/typer_patch.py +47 -0
synth_ai/cli/_internal/validate_task_app.py +29 -0
synth_ai/cli/agents/__init__.py +17 -0
synth_ai/cli/agents/claude.py +77 -0
synth_ai/cli/agents/codex.py +265 -0
synth_ai/cli/agents/opencode.py +253 -0
synth_ai/cli/commands/__init__.py +18 -0
synth_ai/cli/commands/artifacts/__init__.py +13 -0
synth_ai/cli/commands/artifacts/client.py +119 -0
synth_ai/cli/commands/artifacts/config.py +57 -0
synth_ai/cli/commands/artifacts/core.py +24 -0
synth_ai/cli/commands/artifacts/download.py +188 -0
synth_ai/cli/commands/artifacts/export.py +186 -0
synth_ai/cli/commands/artifacts/list.py +156 -0
synth_ai/cli/commands/artifacts/parsing.py +250 -0
synth_ai/cli/commands/artifacts/show.py +336 -0
synth_ai/cli/commands/demo/__init__.py +3 -0
synth_ai/cli/commands/demo/core.py +153 -0
synth_ai/cli/commands/eval/__init__.py +10 -0
synth_ai/cli/commands/eval/config.py +338 -0
synth_ai/cli/commands/eval/core.py +256 -0
synth_ai/cli/commands/eval/runner.py +704 -0
synth_ai/cli/commands/eval/validation.py +60 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +424 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +185 -0
synth_ai/cli/commands/help/core.py +72 -0
synth_ai/cli/commands/scan/__init__.py +19 -0
synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
synth_ai/cli/commands/scan/core.py +344 -0
synth_ai/cli/commands/scan/health_checker.py +242 -0
synth_ai/cli/commands/scan/local_scanner.py +278 -0
synth_ai/cli/commands/scan/models.py +83 -0
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1428 -0
synth_ai/cli/commands/status/__init__.py +3 -0
synth_ai/cli/commands/status/client.py +91 -0
synth_ai/cli/commands/status/config.py +12 -0
synth_ai/cli/commands/status/errors.py +11 -0
synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
synth_ai/cli/commands/status/subcommands/config.py +13 -0
synth_ai/cli/commands/status/subcommands/files.py +34 -0
synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
synth_ai/cli/commands/status/subcommands/models.py +35 -0
synth_ai/cli/commands/status/subcommands/runs.py +34 -0
synth_ai/cli/commands/status/subcommands/session.py +77 -0
synth_ai/cli/commands/status/subcommands/summary.py +39 -0
synth_ai/cli/commands/status/subcommands/utils.py +41 -0
synth_ai/cli/commands/status/utils.py +23 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +22 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +201 -0
synth_ai/cli/commands/train/judge_validation.py +305 -0
synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
synth_ai/cli/commands/train/validation.py +392 -0
synth_ai/cli/demo_apps/__init__.py +10 -0
synth_ai/cli/demo_apps/core/__init__.py +28 -0
synth_ai/cli/demo_apps/core/cli.py +1735 -0
synth_ai/cli/demo_apps/crafter/__init__.py +1 -0
synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
synth_ai/cli/demo_apps/demo_registry.py +176 -0
synth_ai/cli/demo_apps/demo_task_apps/__init__.py +7 -0
synth_ai/{demos → cli/demo_apps}/demo_task_apps/core.py +117 -51
synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
synth_ai/cli/demo_apps/demo_task_apps/math/_common.py +16 -0
synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +2 -1
synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +3 -6
synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
synth_ai/cli/demo_apps/math/__init__.py +1 -0
synth_ai/cli/demo_apps/math/_common.py +16 -0
synth_ai/cli/demo_apps/math/app.py +38 -0
synth_ai/cli/demo_apps/math/config.toml +75 -0
synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
synth_ai/cli/demo_apps/mipro/main.py +271 -0
synth_ai/cli/demo_apps/mipro/task_app.py +922 -0
synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
synth_ai/cli/demos/__init__.py +12 -0
synth_ai/cli/demos/demo.py +32 -0
synth_ai/cli/demos/rl_demo.py +254 -0
synth_ai/cli/deploy.py +216 -0
synth_ai/cli/infra/__init__.py +14 -0
synth_ai/cli/{balance.py → infra/balance.py} +21 -3
synth_ai/cli/infra/mcp.py +35 -0
synth_ai/cli/infra/modal_app.py +36 -0
synth_ai/cli/infra/setup.py +69 -0
synth_ai/cli/infra/status.py +16 -0
synth_ai/cli/infra/turso.py +77 -0
synth_ai/cli/lib/__init__.py +10 -0
synth_ai/cli/lib/agents.py +76 -0
synth_ai/cli/lib/apps/modal_app.py +101 -0
synth_ai/cli/lib/apps/task_app.py +642 -0
synth_ai/cli/lib/bin.py +39 -0
synth_ai/cli/lib/env.py +375 -0
synth_ai/cli/lib/errors.py +85 -0
synth_ai/cli/lib/modal.py +315 -0
synth_ai/cli/lib/plotting.py +126 -0
synth_ai/cli/lib/prompt_args.py +39 -0
synth_ai/cli/lib/prompts.py +284 -0
synth_ai/cli/lib/sqld.py +122 -0
synth_ai/cli/lib/task_app_discovery.py +884 -0
synth_ai/cli/lib/task_app_env.py +295 -0
synth_ai/cli/lib/train_cfgs.py +300 -0
synth_ai/cli/lib/tunnel_records.py +207 -0
synth_ai/cli/local/__init__.py +14 -0
synth_ai/cli/local/experiment_queue/__init__.py +72 -0
synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
synth_ai/cli/local/experiment_queue/config.py +128 -0
synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
synth_ai/cli/local/experiment_queue/database.py +175 -0
synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
synth_ai/cli/local/experiment_queue/models.py +231 -0
synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
synth_ai/cli/local/experiment_queue/results.py +373 -0
synth_ai/cli/local/experiment_queue/schemas.py +131 -0
synth_ai/cli/local/experiment_queue/service.py +344 -0
synth_ai/cli/local/experiment_queue/status.py +372 -0
synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
synth_ai/cli/local/experiment_queue/validation.py +157 -0
synth_ai/cli/local/session/__init__.py +92 -0
synth_ai/cli/local/session/client.py +383 -0
synth_ai/cli/local/session/constants.py +63 -0
synth_ai/cli/local/session/exceptions.py +105 -0
synth_ai/cli/local/session/manager.py +139 -0
synth_ai/cli/local/session/models.py +89 -0
synth_ai/cli/local/session/query.py +110 -0
synth_ai/cli/root.py +150 -102
synth_ai/cli/task_apps/__init__.py +37 -0
synth_ai/cli/task_apps/commands.py +3145 -0
synth_ai/cli/task_apps/deploy.py +7 -0
synth_ai/cli/task_apps/list.py +26 -0
synth_ai/cli/task_apps/main.py +36 -0
synth_ai/cli/task_apps/modal_serve.py +11 -0
synth_ai/cli/task_apps/serve.py +11 -0
synth_ai/cli/training/__init__.py +8 -0
synth_ai/cli/training/train.py +5 -0
synth_ai/cli/training/train_cfg.py +34 -0
synth_ai/cli/{watch.py → training/watch.py} +13 -18
synth_ai/cli/turso.py +52 -0
synth_ai/cli/utils/__init__.py +8 -0
synth_ai/cli/utils/experiments.py +235 -0
synth_ai/cli/utils/queue.py +504 -0
synth_ai/cli/{recent.py → utils/recent.py} +13 -7
synth_ai/cli/{traces.py → utils/traces.py} +9 -5
synth_ai/contracts/__init__.py +67 -0
synth_ai/core/__init__.py +100 -0
synth_ai/core/_utils/__init__.py +54 -0
synth_ai/core/_utils/base_url.py +10 -0
synth_ai/core/_utils/http.py +10 -0
synth_ai/core/_utils/prompts.py +14 -0
synth_ai/core/_utils/task_app_state.py +12 -0
synth_ai/core/_utils/user_config.py +10 -0
synth_ai/core/apps/common.py +116 -0
synth_ai/core/auth.py +95 -0
synth_ai/core/cfgs.py +240 -0
synth_ai/core/config/__init__.py +16 -0
synth_ai/core/config/base.py +168 -0
synth_ai/core/config/resolver.py +89 -0
synth_ai/core/env.py +231 -0
synth_ai/core/errors.py +126 -0
synth_ai/core/http.py +230 -0
synth_ai/core/integrations/__init__.py +11 -0
synth_ai/core/integrations/cloudflare.py +1710 -0
synth_ai/core/integrations/mcp/__init__.py +6 -0
synth_ai/core/integrations/mcp/__main__.py +8 -0
synth_ai/core/integrations/mcp/claude.py +36 -0
synth_ai/core/integrations/mcp/main.py +254 -0
synth_ai/core/integrations/mcp/setup.py +100 -0
synth_ai/core/integrations/modal.py +277 -0
synth_ai/core/json.py +72 -0
synth_ai/core/log_filter.py +99 -0
synth_ai/core/logging.py +82 -0
synth_ai/core/paths.py +107 -0
synth_ai/core/pricing.py +109 -0
synth_ai/core/process.py +233 -0
synth_ai/core/ssl.py +25 -0
synth_ai/core/storage/__init__.py +71 -0
synth_ai/core/task_app_state.py +318 -0
synth_ai/core/telemetry.py +282 -0
synth_ai/{tracing_v3 → core/tracing_v3}/__init__.py +5 -1
synth_ai/{tracing_v3 → core/tracing_v3}/abstractions.py +21 -4
synth_ai/core/tracing_v3/config.py +229 -0
synth_ai/core/tracing_v3/constants.py +21 -0
synth_ai/{tracing_v3 → core/tracing_v3}/db_config.py +42 -29
synth_ai/{tracing_v3 → core/tracing_v3}/decorators.py +80 -45
synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +15 -9
synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +6 -4
synth_ai/{tracing_v3 → core/tracing_v3}/llm_call_record_helpers.py +161 -61
synth_ai/{tracing_v3 → core/tracing_v3}/migration_helper.py +1 -2
synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +12 -7
synth_ai/core/tracing_v3/serialization.py +130 -0
synth_ai/{tracing_v3 → core/tracing_v3}/session_tracer.py +88 -21
synth_ai/{tracing_v3 → core/tracing_v3}/storage/base.py +99 -12
synth_ai/core/tracing_v3/storage/config.py +109 -0
synth_ai/{tracing_v3 → core/tracing_v3}/storage/factory.py +11 -9
synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +15 -11
synth_ai/core/tracing_v3/trace_utils.py +326 -0
synth_ai/core/tracing_v3/turso/__init__.py +12 -0
synth_ai/core/tracing_v3/turso/daemon.py +278 -0
synth_ai/{tracing_v3 → core/tracing_v3}/turso/models.py +7 -3
synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
synth_ai/{tracing_v3 → core/tracing_v3}/utils.py +5 -4
synth_ai/core/urls.py +18 -0
synth_ai/core/user_config.py +137 -0
synth_ai/core/uvicorn.py +222 -0
synth_ai/data/__init__.py +83 -0
synth_ai/data/enums.py +123 -0
synth_ai/data/rewards.py +152 -0
synth_ai/data/traces.py +35 -0
synth_ai/products/__init__.py +6 -0
synth_ai/products/graph_evolve/__init__.py +46 -0
synth_ai/products/graph_evolve/client.py +226 -0
synth_ai/products/graph_evolve/config.py +591 -0
synth_ai/products/graph_evolve/converters/__init__.py +42 -0
synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
synth_ai/products/graph_evolve/run.py +222 -0
synth_ai/products/graph_gepa/__init__.py +23 -0
synth_ai/products/graph_gepa/converters/__init__.py +19 -0
synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
synth_ai/sdk/__init__.py +123 -0
synth_ai/sdk/api/__init__.py +1 -0
synth_ai/sdk/api/models/supported.py +514 -0
synth_ai/sdk/api/research_agent/__init__.py +296 -0
synth_ai/sdk/api/train/__init__.py +85 -0
synth_ai/sdk/api/train/builders.py +895 -0
synth_ai/sdk/api/train/cli.py +2199 -0
synth_ai/sdk/api/train/config_finder.py +267 -0
synth_ai/sdk/api/train/configs/__init__.py +65 -0
synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
synth_ai/sdk/api/train/configs/rl.py +187 -0
synth_ai/sdk/api/train/configs/sft.py +99 -0
synth_ai/sdk/api/train/configs/shared.py +81 -0
synth_ai/sdk/api/train/context_learning.py +312 -0
synth_ai/sdk/api/train/env_resolver.py +418 -0
synth_ai/sdk/api/train/graph_validators.py +216 -0
synth_ai/sdk/api/train/graphgen.py +984 -0
synth_ai/sdk/api/train/graphgen_models.py +823 -0
synth_ai/sdk/api/train/graphgen_validators.py +109 -0
synth_ai/sdk/api/train/local_api.py +10 -0
synth_ai/sdk/api/train/pollers.py +124 -0
synth_ai/sdk/api/train/progress/__init__.py +97 -0
synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
synth_ai/sdk/api/train/progress/events.py +326 -0
synth_ai/sdk/api/train/progress/results.py +428 -0
synth_ai/sdk/api/train/progress/tracker.py +641 -0
synth_ai/sdk/api/train/prompt_learning.py +469 -0
synth_ai/sdk/api/train/rl.py +441 -0
synth_ai/sdk/api/train/sft.py +396 -0
synth_ai/sdk/api/train/summary.py +522 -0
synth_ai/sdk/api/train/supported_algos.py +147 -0
synth_ai/sdk/api/train/task_app.py +351 -0
synth_ai/sdk/api/train/utils.py +279 -0
synth_ai/sdk/api/train/validators.py +2424 -0
synth_ai/sdk/graphs/__init__.py +15 -0
synth_ai/sdk/graphs/completions.py +570 -0
synth_ai/{inference → sdk/inference}/__init__.py +0 -1
synth_ai/sdk/inference/client.py +128 -0
synth_ai/sdk/jobs/__init__.py +16 -0
synth_ai/sdk/jobs/client.py +371 -0
synth_ai/sdk/judging/__init__.py +14 -0
synth_ai/sdk/judging/base.py +24 -0
synth_ai/sdk/judging/client.py +40 -0
synth_ai/sdk/judging/schemas.py +222 -0
synth_ai/sdk/judging/types.py +42 -0
synth_ai/sdk/learning/__init__.py +99 -0
synth_ai/sdk/learning/algorithms.py +14 -0
synth_ai/{learning → sdk/learning}/client.py +121 -30
synth_ai/sdk/learning/config.py +5 -0
synth_ai/{learning → sdk/learning}/constants.py +0 -2
synth_ai/sdk/learning/context_learning_client.py +531 -0
synth_ai/sdk/learning/context_learning_types.py +292 -0
synth_ai/sdk/learning/ft_client.py +7 -0
synth_ai/{learning → sdk/learning}/health.py +15 -9
synth_ai/{learning → sdk/learning}/jobs.py +44 -47
synth_ai/sdk/learning/prompt_extraction.py +334 -0
synth_ai/sdk/learning/prompt_learning_client.py +455 -0
synth_ai/sdk/learning/prompt_learning_types.py +186 -0
synth_ai/{rl → sdk/learning/rl}/__init__.py +13 -8
synth_ai/{learning/rl_client.py → sdk/learning/rl/client.py} +89 -77
synth_ai/sdk/learning/rl/config.py +31 -0
synth_ai/{rl → sdk/learning/rl}/contracts.py +5 -14
synth_ai/{rl → sdk/learning/rl}/env_keys.py +45 -16
synth_ai/sdk/learning/rl/secrets.py +13 -0
synth_ai/sdk/learning/rl_client.py +5 -0
synth_ai/sdk/learning/sft/__init__.py +29 -0
synth_ai/sdk/learning/sft/client.py +95 -0
synth_ai/sdk/learning/sft/config.py +270 -0
synth_ai/sdk/learning/sft/data.py +698 -0
synth_ai/sdk/learning/sse.py +57 -0
synth_ai/sdk/learning/validators.py +52 -0
synth_ai/sdk/localapi/__init__.py +40 -0
synth_ai/sdk/localapi/apps/__init__.py +28 -0
synth_ai/sdk/localapi/client.py +10 -0
synth_ai/sdk/localapi/contracts.py +10 -0
synth_ai/sdk/localapi/helpers.py +519 -0
synth_ai/sdk/localapi/rollouts.py +87 -0
synth_ai/sdk/localapi/server.py +29 -0
synth_ai/sdk/localapi/template.py +70 -0
synth_ai/sdk/streaming/__init__.py +35 -0
synth_ai/sdk/streaming/config.py +94 -0
synth_ai/sdk/streaming/handlers.py +1997 -0
synth_ai/sdk/streaming/streamer.py +713 -0
synth_ai/sdk/streaming/types.py +112 -0
synth_ai/sdk/task/__init__.py +164 -0
synth_ai/sdk/task/apps/__init__.py +169 -0
synth_ai/sdk/task/auth.py +165 -0
synth_ai/sdk/task/client.py +175 -0
synth_ai/sdk/task/config.py +257 -0
synth_ai/sdk/task/contracts.py +219 -0
synth_ai/sdk/task/datasets.py +108 -0
synth_ai/sdk/task/errors.py +50 -0
synth_ai/sdk/task/health.py +34 -0
synth_ai/sdk/task/in_process.py +1190 -0
synth_ai/sdk/task/in_process_runner.py +314 -0
synth_ai/sdk/task/inference_api.py +299 -0
synth_ai/sdk/task/json.py +111 -0
synth_ai/sdk/task/proxy.py +287 -0
synth_ai/sdk/task/rubrics/__init__.py +55 -0
synth_ai/sdk/task/rubrics/loaders.py +156 -0
synth_ai/sdk/task/rubrics/models.py +57 -0
synth_ai/sdk/task/rubrics/scoring.py +116 -0
synth_ai/sdk/task/rubrics/strict.py +149 -0
synth_ai/sdk/task/rubrics.py +219 -0
synth_ai/sdk/task/server.py +631 -0
synth_ai/sdk/task/trace_correlation_helpers.py +539 -0
synth_ai/sdk/task/tracing_utils.py +95 -0
synth_ai/sdk/task/validators.py +441 -0
synth_ai/sdk/task/vendors.py +59 -0
synth_ai/sdk/training/__init__.py +102 -0
synth_ai/sdk/tunnels/__init__.py +83 -0
synth_ai/sdk/tunnels/cleanup.py +83 -0
synth_ai/sdk/tunnels/ports.py +120 -0
synth_ai/utils/__init__.py +213 -0
synth_ai-0.4.3.dist-info/METADATA +262 -0
synth_ai-0.4.3.dist-info/RECORD +370 -0
{synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/entry_points.txt +0 -1
synth_ai/cli/calc.py +0 -69
synth_ai/cli/demo.py +0 -131
synth_ai/cli/legacy_root_backup.py +0 -470
synth_ai/cli/man.py +0 -106
synth_ai/cli/rl_demo.py +0 -137
synth_ai/cli/status.py +0 -133
synth_ai/config/base_url.py +0 -98
synth_ai/core/experiment.py +0 -15
synth_ai/core/system.py +0 -15
synth_ai/demos/core/__init__.py +0 -1
synth_ai/demos/core/cli.py +0 -685
synth_ai/demos/demo_task_apps/__init__.py +0 -1
synth_ai/demos/demo_task_apps/math/config.toml +0 -44
synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
synth_ai/environments/__init__.py +0 -31
synth_ai/environments/environment/__init__.py +0 -1
synth_ai/environments/environment/artifacts/__init__.py +0 -1
synth_ai/environments/environment/artifacts/base.py +0 -52
synth_ai/environments/environment/core.py +0 -67
synth_ai/environments/environment/db/__init__.py +0 -1
synth_ai/environments/environment/db/sqlite.py +0 -45
synth_ai/environments/environment/registry.py +0 -233
synth_ai/environments/environment/resources/sqlite.py +0 -45
synth_ai/environments/environment/results.py +0 -1
synth_ai/environments/environment/rewards/__init__.py +0 -1
synth_ai/environments/environment/rewards/core.py +0 -29
synth_ai/environments/environment/shared_engine.py +0 -26
synth_ai/environments/environment/tools/__init__.py +0 -200
synth_ai/environments/examples/__init__.py +0 -1
synth_ai/environments/examples/bandit/__init__.py +0 -33
synth_ai/environments/examples/bandit/engine.py +0 -294
synth_ai/environments/examples/bandit/environment.py +0 -194
synth_ai/environments/examples/bandit/taskset.py +0 -200
synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -724
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
synth_ai/environments/examples/crafter_classic/engine.py +0 -579
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
synth_ai/environments/examples/crafter_classic/environment.py +0 -404
synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
synth_ai/environments/examples/crafter_custom/environment.py +0 -312
synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
synth_ai/environments/examples/enron/engine.py +0 -295
synth_ai/environments/examples/enron/environment.py +0 -166
synth_ai/environments/examples/enron/taskset.py +0 -112
synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
synth_ai/environments/examples/minigrid/__init__.py +0 -48
synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
synth_ai/environments/examples/minigrid/engine.py +0 -589
synth_ai/environments/examples/minigrid/environment.py +0 -274
synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
synth_ai/environments/examples/minigrid/taskset.py +0 -583
synth_ai/environments/examples/nethack/__init__.py +0 -7
synth_ai/environments/examples/nethack/achievements.py +0 -337
synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
synth_ai/environments/examples/nethack/engine.py +0 -739
synth_ai/environments/examples/nethack/environment.py +0 -256
synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
synth_ai/environments/examples/nethack/taskset.py +0 -323
synth_ai/environments/examples/red/__init__.py +0 -7
synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
synth_ai/environments/examples/red/config_logging.py +0 -110
synth_ai/environments/examples/red/engine.py +0 -694
synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -28
synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -140
synth_ai/environments/examples/red/environment.py +0 -238
synth_ai/environments/examples/red/taskset.py +0 -79
synth_ai/environments/examples/red/units/__init__.py +0 -1
synth_ai/environments/examples/sokoban/__init__.py +0 -1
synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
synth_ai/environments/examples/sokoban/engine.py +0 -678
synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
synth_ai/environments/examples/sokoban/environment.py +0 -229
synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
synth_ai/environments/examples/sokoban/taskset.py +0 -428
synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
synth_ai/environments/examples/tictactoe/__init__.py +0 -1
synth_ai/environments/examples/tictactoe/engine.py +0 -368
synth_ai/environments/examples/tictactoe/environment.py +0 -240
synth_ai/environments/examples/tictactoe/taskset.py +0 -215
synth_ai/environments/examples/verilog/__init__.py +0 -10
synth_ai/environments/examples/verilog/engine.py +0 -329
synth_ai/environments/examples/verilog/environment.py +0 -350
synth_ai/environments/examples/verilog/taskset.py +0 -420
synth_ai/environments/examples/wordle/__init__.py +0 -29
synth_ai/environments/examples/wordle/engine.py +0 -398
synth_ai/environments/examples/wordle/environment.py +0 -159
synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
synth_ai/environments/examples/wordle/taskset.py +0 -230
synth_ai/environments/reproducibility/core.py +0 -42
synth_ai/environments/reproducibility/helpers.py +0 -0
synth_ai/environments/reproducibility/tree.py +0 -364
synth_ai/environments/service/app.py +0 -91
synth_ai/environments/service/core_routes.py +0 -1020
synth_ai/environments/service/external_registry.py +0 -56
synth_ai/environments/service/registry.py +0 -9
synth_ai/environments/stateful/__init__.py +0 -1
synth_ai/environments/stateful/core.py +0 -163
synth_ai/environments/stateful/engine.py +0 -21
synth_ai/environments/stateful/state.py +0 -7
synth_ai/environments/tasks/api.py +0 -19
synth_ai/environments/tasks/core.py +0 -80
synth_ai/environments/tasks/filters.py +0 -41
synth_ai/environments/tasks/utils.py +0 -91
synth_ai/environments/v0_observability/history.py +0 -3
synth_ai/environments/v0_observability/log.py +0 -2
synth_ai/evals/base.py +0 -15
synth_ai/experimental/synth_oss.py +0 -446
synth_ai/http.py +0 -102
synth_ai/inference/client.py +0 -20
synth_ai/install_sqld.sh +0 -40
synth_ai/jobs/client.py +0 -246
synth_ai/learning/__init__.py +0 -24
synth_ai/learning/config.py +0 -43
synth_ai/learning/filtering.py +0 -0
synth_ai/learning/ft_client.py +0 -59
synth_ai/learning/offline/dpo.py +0 -0
synth_ai/learning/offline/providers.py +0 -7
synth_ai/learning/offline/sft.py +0 -0
synth_ai/learning/offline/shared.py +0 -0
synth_ai/learning/online/grpo.py +0 -0
synth_ai/learning/online/irft.py +0 -0
synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
synth_ai/learning/prompts/gepa.py +0 -0
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
synth_ai/learning/prompts/mipro.py +0 -289
synth_ai/learning/prompts/random_search.py +0 -246
synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
synth_ai/learning/sse.py +0 -58
synth_ai/learning/validators.py +0 -48
synth_ai/lm/__init__.py +0 -51
synth_ai/lm/caching/constants.py +0 -6
synth_ai/lm/caching/dbs.py +0 -0
synth_ai/lm/caching/ephemeral.py +0 -102
synth_ai/lm/caching/handler.py +0 -137
synth_ai/lm/caching/initialize.py +0 -11
synth_ai/lm/caching/persistent.py +0 -114
synth_ai/lm/config.py +0 -110
synth_ai/lm/constants.py +0 -32
synth_ai/lm/core/__init__.py +0 -8
synth_ai/lm/core/all.py +0 -73
synth_ai/lm/core/exceptions.py +0 -7
synth_ai/lm/core/main.py +0 -319
synth_ai/lm/core/main_v3.py +0 -594
synth_ai/lm/core/synth_models.py +0 -48
synth_ai/lm/core/vendor_clients.py +0 -188
synth_ai/lm/cost/__init__.py +0 -0
synth_ai/lm/cost/monitor.py +0 -1
synth_ai/lm/cost/statefulness.py +0 -1
synth_ai/lm/injection.py +0 -80
synth_ai/lm/overrides.py +0 -206
synth_ai/lm/provider_support/__init__.py +0 -8
synth_ai/lm/provider_support/anthropic.py +0 -972
synth_ai/lm/provider_support/openai.py +0 -1139
synth_ai/lm/provider_support/suppress_logging.py +0 -31
synth_ai/lm/structured_outputs/__init__.py +0 -0
synth_ai/lm/structured_outputs/handler.py +0 -440
synth_ai/lm/structured_outputs/inject.py +0 -297
synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
synth_ai/lm/tools/__init__.py +0 -3
synth_ai/lm/tools/base.py +0 -172
synth_ai/lm/unified_interface.py +0 -202
synth_ai/lm/vendors/__init__.py +0 -0
synth_ai/lm/vendors/base.py +0 -81
synth_ai/lm/vendors/core/__init__.py +0 -0
synth_ai/lm/vendors/core/anthropic_api.py +0 -387
synth_ai/lm/vendors/core/gemini_api.py +0 -292
synth_ai/lm/vendors/core/mistral_api.py +0 -322
synth_ai/lm/vendors/core/openai_api.py +0 -220
synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
synth_ai/lm/vendors/local/__init__.py +0 -0
synth_ai/lm/vendors/local/ollama.py +0 -0
synth_ai/lm/vendors/openai_standard.py +0 -780
synth_ai/lm/vendors/openai_standard_responses.py +0 -256
synth_ai/lm/vendors/retries.py +0 -22
synth_ai/lm/vendors/supported/__init__.py +0 -0
synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
synth_ai/lm/vendors/supported/deepseek.py +0 -69
synth_ai/lm/vendors/supported/grok.py +0 -75
synth_ai/lm/vendors/supported/groq.py +0 -16
synth_ai/lm/vendors/supported/ollama.py +0 -15
synth_ai/lm/vendors/supported/openrouter.py +0 -74
synth_ai/lm/vendors/supported/together.py +0 -11
synth_ai/lm/vendors/synth_client.py +0 -808
synth_ai/lm/warmup.py +0 -186
synth_ai/rl/secrets.py +0 -19
synth_ai/scripts/verify_rewards.py +0 -100
synth_ai/task/__init__.py +0 -10
synth_ai/task/contracts.py +0 -120
synth_ai/task/health.py +0 -28
synth_ai/task/validators.py +0 -12
synth_ai/tracing/__init__.py +0 -30
synth_ai/tracing_v1/__init__.py +0 -33
synth_ai/tracing_v3/config.py +0 -84
synth_ai/tracing_v3/storage/config.py +0 -62
synth_ai/tracing_v3/turso/__init__.py +0 -25
synth_ai/tracing_v3/turso/daemon.py +0 -144
synth_ai/tracing_v3/turso/manager.py +0 -760
synth_ai/v0/tracing/__init__.py +0 -0
synth_ai/v0/tracing/abstractions.py +0 -224
synth_ai/v0/tracing/base_client.py +0 -91
synth_ai/v0/tracing/client_manager.py +0 -131
synth_ai/v0/tracing/config.py +0 -140
synth_ai/v0/tracing/context.py +0 -146
synth_ai/v0/tracing/decorators.py +0 -680
synth_ai/v0/tracing/events/__init__.py +0 -0
synth_ai/v0/tracing/events/manage.py +0 -147
synth_ai/v0/tracing/events/scope.py +0 -86
synth_ai/v0/tracing/events/store.py +0 -228
synth_ai/v0/tracing/immediate_client.py +0 -151
synth_ai/v0/tracing/local.py +0 -18
synth_ai/v0/tracing/log_client_base.py +0 -73
synth_ai/v0/tracing/retry_queue.py +0 -186
synth_ai/v0/tracing/trackers.py +0 -515
synth_ai/v0/tracing/upload.py +0 -510
synth_ai/v0/tracing/utils.py +0 -9
synth_ai/v0/tracing_v1/__init__.py +0 -16
synth_ai/v0/tracing_v1/abstractions.py +0 -224
synth_ai/v0/tracing_v1/base_client.py +0 -91
synth_ai/v0/tracing_v1/client_manager.py +0 -131
synth_ai/v0/tracing_v1/config.py +0 -140
synth_ai/v0/tracing_v1/context.py +0 -146
synth_ai/v0/tracing_v1/decorators.py +0 -701
synth_ai/v0/tracing_v1/events/__init__.py +0 -0
synth_ai/v0/tracing_v1/events/manage.py +0 -147
synth_ai/v0/tracing_v1/events/scope.py +0 -86
synth_ai/v0/tracing_v1/events/store.py +0 -228
synth_ai/v0/tracing_v1/immediate_client.py +0 -151
synth_ai/v0/tracing_v1/local.py +0 -18
synth_ai/v0/tracing_v1/log_client_base.py +0 -73
synth_ai/v0/tracing_v1/retry_queue.py +0 -186
synth_ai/v0/tracing_v1/trackers.py +0 -515
synth_ai/v0/tracing_v1/upload.py +0 -525
synth_ai/v0/tracing_v1/utils.py +0 -9
synth_ai/zyk/__init__.py +0 -30
synth_ai-0.2.6.dev1.dist-info/METADATA +0 -106
synth_ai-0.2.6.dev1.dist-info/RECORD +0 -416
/synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
/synth_ai/{lm/caching → core/apps}/__init__.py +0 -0
/synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
/synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
/synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
/synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
/synth_ai/{compound/cais.py → py.typed} +0 -0
/synth_ai/{learning → sdk/learning}/core.py +0 -0
/synth_ai/{learning → sdk/learning}/gateway.py +0 -0
{synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/WHEEL +0 -0
{synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/top_level.txt +0 -0

synth_ai/cli/demo_apps/mipro/task_app.py ADDED Viewed

@@ -0,0 +1,922 @@
+"""Banking77 intent classification task app for Synth prompt optimization benchmarks."""
+import contextlib
+import inspect
+import json
+import os
+import socket
+from collections.abc import Iterable, Sequence
+from pathlib import Path
+from typing import Any, Mapping, cast
+from urllib.parse import urlparse
+from fastapi import APIRouter, HTTPException, Request
+from fastapi.exceptions import RequestValidationError
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from starlette.requests import Request as StarletteRequest
+from synth_ai.sdk.task.auth import is_api_key_header_authorized, normalize_environment_api_key
+from synth_ai.sdk.task.contracts import (
+    RolloutMetrics,
+    RolloutRequest,
+    RolloutResponse,
+    TaskInfo,
+)
+from synth_ai.sdk.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
+from synth_ai.sdk.task.rubrics import Rubric, load_rubric
+from synth_ai.sdk.task.server import (
+    ProxyConfig,
+    RubricBundle,
+    TaskAppConfig,
+    create_task_app,
+    run_task_app,
+)
+from synth_ai.sdk.task.trace_correlation_helpers import (
+    build_trace_payload,
+    extract_trace_correlation_id,
+)
+from synth_ai.sdk.task.vendors import normalize_vendor_keys
+# Dataset configuration
+DATASET_NAME = os.getenv("BANKING77_DATASET_NAME", "banking77")
+DEFAULT_SPLIT = "train"
+AVAILABLE_SPLITS: tuple[str, ...] = ("train", "test")
+TOOL_NAME = "banking77_classify"
+def get_current_module_code():
+    """Extract source code for the caller's module using inspect."""
+    frame = inspect.currentframe()
+    try:
+        if frame is None:
+            return None
+        caller_frame = frame.f_back
+        if caller_frame is None:
+            return None
+        module = inspect.getmodule(caller_frame)
+        if module is None:
+            return None
+        try:
+            return inspect.getsource(module)
+        except (OSError, TypeError):
+            return None
+    finally:
+        del frame
+class Banking77Dataset:
+    """Lazy Hugging Face dataset loader for Banking77."""
+    def __init__(self) -> None:
+        self._cache: dict[str, Any] = {}
+        self._label_names: list[str] | None = None
+    def _load_split(self, split: str):
+        if split not in AVAILABLE_SPLITS:
+            raise ValueError(f"Unknown split: {split}. Available: {AVAILABLE_SPLITS}")
+        if split not in self._cache:
+            try:
+                from datasets import load_dataset as _load_dataset  # lazy import
+                ds = _load_dataset(DATASET_NAME, split=split, trust_remote_code=False)
+                self._cache[split] = ds
+                label_feature = ds.features.get("label")  # type: ignore[attr-defined]
+                if self._label_names is None and label_feature is not None and hasattr(label_feature, "names"):
+                    self._label_names = label_feature.names
+            except Exception as exc:
+                raise RuntimeError(
+                    f"Dataset preparation failed: {split}: Failed to download Banking77 dataset from Hugging Face. "
+                    f"Dataset: {DATASET_NAME} | Split: {split}"
+                ) from exc
+        return self._cache[split]
+    def ensure_ready(self, splits: Sequence[str]) -> None:
+        for split in splits:
+            self._load_split(split)
+    def size(self, split: str) -> int:
+        dataset = self._load_split(split)
+        return len(dataset)
+    def sample(self, *, split: str, index: int) -> dict[str, Any]:
+        dataset = self._load_split(split)
+        size = len(dataset)
+        if size == 0:
+            raise RuntimeError(f"Banking77 split '{split}' is empty")
+        idx = int(index) % size
+        row = dataset[int(idx)]
+        label_idx = int(row.get("label", 0))
+        label_text = self.get_label_name(label_idx)
+        return {
+            "index": idx,
+            "split": split,
+            "text": str(row.get("text", "")),
+            "label": label_text,
+            "label_idx": label_idx,
+        }
+    def get_label_name(self, label_idx: int) -> str:
+        if self._label_names is None:
+            self._load_split(DEFAULT_SPLIT)
+        if self._label_names and 0 <= label_idx < len(self._label_names):
+            return self._label_names[label_idx]
+        return f"label_{label_idx}"
+    @property
+    def label_names(self) -> list[str]:
+        if self._label_names is None:
+            self._load_split(DEFAULT_SPLIT)
+        return self._label_names or []
+banking77_router = APIRouter()
+BANKING77_DATASET_SPEC = TaskDatasetSpec(
+    id="banking77",
+    name="Banking77 Intent Classification",
+    version="1.0.0",
+    splits=list(AVAILABLE_SPLITS),
+    default_split=DEFAULT_SPLIT,
+    description="Banking customer query intent classification with 77 intent categories.",
+)
+class ClassifyReq(BaseModel):
+    query: str
+class ClassifyRes(BaseModel):
+    intent: str
+    confidence: float | None = None
+@banking77_router.post("/classify", response_model=ClassifyRes)
+async def classify_endpoint(req: ClassifyReq, request: Request):
+    _ = request.app.state.banking77_dataset  # Dataset loaded but not used in this stub endpoint
+    return ClassifyRes(intent="activate_my_card", confidence=None)
+async def call_chat_completion(
+    policy_config: dict[str, Any],
+    placeholders: dict[str, Any],
+    default_messages: list[dict[str, str]],
+    api_key: str | None = None,
+) -> tuple[str, dict[str, Any] | None, list[dict[str, Any]]]:
+    # STRICT: require all policy fields to come from TOML (no defaults)
+    missing_fields: list[str] = []
+    # Always require model; provider optional when routing via proxy
+    model_val = policy_config.get("model")
+    if not isinstance(model_val, str) or not model_val.strip():
+        missing_fields.append("model")
+    # Resolve routing base - ALWAYS prioritize inference_url if provided (trainer-provided interceptor URL)
+    # If inference_url is set, use it exclusively and ignore api_base/base_url
+    inference_url_raw = policy_config.get("inference_url")
+    api_base_raw = policy_config.get("api_base")
+    base_url_raw = policy_config.get("base_url")
+    if inference_url_raw:
+        # Trainer provided inference_url (interceptor URL) - use it exclusively
+        route_base = str(inference_url_raw).strip()
+        if api_base_raw or base_url_raw:
+            # Log warning if api_base/base_url are also present (they'll be ignored)
+            with contextlib.suppress(Exception):
+                print(
+                    f"[TASK_APP] ⚠️  inference_url is set ({route_base}), ignoring api_base/base_url",
+                    flush=True,
+                )
+    else:
+        # Fallback: use api_base or base_url if inference_url not provided
+        route_base = (
+            (api_base_raw or "").strip()
+            or (base_url_raw or "").strip()
+        )
+    if not route_base:
+        missing_fields.append("inference_url")
+    if missing_fields:
+        raise HTTPException(
+            status_code=400,
+            detail=(
+                "Missing policy fields in TOML [prompt_learning.policy]: " + ", ".join(missing_fields)
+            ),
+        )
+    model = policy_config["model"].strip()
+    lowered = route_base.lower()
+    is_provider_host = ("api.openai.com" in lowered) or ("api.groq.com" in lowered)
+    # Normalize inference URL: allow bases like .../v1 and auto-append /chat/completions
+    # Properly handles query strings and interceptor URLs with trial IDs
+    # Matches the pattern used in gepa_benchmarks/common.py for consistency
+    def _normalize_chat_url(url: str) -> str:
+        from urllib.parse import urlparse, urlunparse
+        u = (url or "").rstrip("/")
+        if not u:
+            return "/chat/completions"
+        # Parse URL to separate path from query parameters
+        parsed = urlparse(u)
+        path = parsed.path.rstrip("/")
+        query = parsed.query
+        fragment = parsed.fragment
+        # Already complete
+        if path.endswith("/v1/chat/completions") or path.endswith("/chat/completions"):
+            return u
+        # Check if this looks like an interceptor URL with trial_id
+        # Interceptor URLs have /v1/ followed by an identifier (e.g., /v1/cli-mipro-..., /v1/gepa-...)
+        # These URLs already have /v1/{trial_id} in them, so we should append /chat/completions
+        if "/v1/" in path and not path.endswith("/v1"):
+            # This is likely an interceptor URL with trial_id - append /chat/completions to path
+            new_path = f"{path}/chat/completions"
+            # Reconstruct URL with query parameters preserved
+            result = urlunparse((parsed.scheme, parsed.netloc, new_path, parsed.params, query, fragment))
+            return result
+        # Standard case: append /v1/chat/completions
+        if path.endswith("/v1"):
+            new_path = f"{path}/chat/completions"
+        elif path.endswith("/completions"):
+            new_path = path.rsplit("/", 1)[0] + "/chat/completions"
+        else:
+            new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
+        # Reconstruct URL with query parameters preserved
+        result = urlunparse((parsed.scheme, parsed.netloc, new_path, parsed.params, query, fragment))
+        return result
+    inference_url = _normalize_chat_url(str(route_base))
+    temperature = policy_config.get("temperature", 0.7)
+    max_tokens = policy_config.get("max_completion_tokens", 100)
+    # Loud route log
+    with contextlib.suppress(Exception):
+        print(f"[TASK_APP] POLICY ROUTE → {inference_url}", flush=True)
+    messages = []
+    for msg_template in default_messages:
+        role = msg_template.get("role", "user")
+        pattern = msg_template.get("pattern", "")
+        content = pattern.format(**placeholders)
+        messages.append({"role": role, "content": content})
+    # Loud logging of rendered messages (trim for safety)
+    preview = [
+        {"role": m.get("role"), "len": len(m.get("content", "")), "head": (m.get("content", "")[:160])}
+        for m in messages
+    ]
+    print(f"[TASK_APP] MESSAGES: {preview}", flush=True)
+    # Assert we are NOT hitting a provider host directly for policy
+    if is_provider_host:
+        # Print full policy config for forensics
+        with contextlib.suppress(Exception):
+            print(
+                f"[TASK_APP] POLICY_CONFIG: {json.dumps(policy_config, ensure_ascii=False)}",
+                flush=True,
+            )
+        raise HTTPException(status_code=502, detail=f"Direct provider URL not allowed for policy: {route_base}")
+    # If routing to proxy/interceptor, include task app API key if provided
+    headers: dict[str, str]
+    headers = {"Content-Type": "application/json"}
+    if api_key:
+        headers["X-API-Key"] = api_key
+        with contextlib.suppress(Exception):
+            print(f"[TASK_APP] 🔐 PROXY ROUTING with API key: {api_key[:12]}...{api_key[-4:]} (len={len(api_key)})", flush=True)
+            print(f"[TASK_APP] 🔐 Headers being sent to proxy: {list(headers.keys())}", flush=True)
+            # Verify the key is actually in the headers
+            assert "X-API-Key" in headers, "X-API-Key missing from headers!"
+            assert headers["X-API-Key"] == api_key, "X-API-Key value mismatch!"
+            print("[TASK_APP] ✅ Header validation passed: X-API-Key present", flush=True)
+    else:
+        with contextlib.suppress(Exception):
+            print("[TASK_APP] ⚠️  PROXY ROUTING (NO API KEY PROVIDED!)", flush=True)
+            print("[TASK_APP] ⚠️  This will likely fail auth at the proxy endpoint", flush=True)
+    # Define tool schema for banking77 classification (no enum to keep payload small)
+    classify_tool = {
+        "type": "function",
+        "function": {
+            "name": TOOL_NAME,
+            "description": "Return the predicted banking77 intent label in the `intent` field.",
+            "parameters": {
+                "type": "object",
+                "properties": {"intent": {"type": "string"}},
+                "required": ["intent"],
+            },
+        },
+    }
+    payload = {
+        "model": model,
+        "messages": messages,
+        "temperature": temperature,
+        "max_tokens": max_tokens,
+        "tools": [classify_tool],
+        "tool_choice": "required" if classify_tool else None,
+    }
+    print(
+        f"[TASK_APP] OUTBOUND: model={model} temp={temperature} max={max_tokens} tools=1 choice={TOOL_NAME}",
+        flush=True,
+    )
+    # Lazy import httpx to avoid top-level import during modal code gen
+    try:
+        import httpx  # type: ignore
+    except Exception as _exc:  # pragma: no cover
+        raise HTTPException(status_code=500, detail=f"httpx unavailable: {_exc}") from _exc
+    # Proxy target diagnostics (no preflight health; we go straight to POST)
+    try:
+        parsed = urlparse(inference_url)
+        host = parsed.hostname or ""
+        port = parsed.port or (443 if parsed.scheme == "https" else 80)
+        print(f"[TASK_APP] PROXY_TARGET: scheme={parsed.scheme} host={host} port={port} path={parsed.path}", flush=True)
+        addrinfo = socket.getaddrinfo(host, None)
+        ips = sorted({ai[4][0] for ai in addrinfo})
+        print(f"[TASK_APP] PROXY_DNS: ips={ips}", flush=True)
+    except Exception as e:
+        print(f"[TASK_APP] PROXY_DNS_ERROR: {e}", flush=True)
+    async with httpx.AsyncClient(timeout=30.0) as client:
+        # Log the actual request about to be sent
+        with contextlib.suppress(Exception):
+            headers_log = {k: (f"{v[:15]}..." if k == "X-API-Key" and len(v) > 15 else v) for k, v in headers.items()}
+            print(f"[TASK_APP] 📤 Sending POST to: {inference_url}", flush=True)
+            print(f"[TASK_APP] 📤 With headers: {headers_log}", flush=True)
+            print(f"[TASK_APP] 📤 Payload keys: {list(payload.keys())}", flush=True)
+            # Final assertion before sending
+            if "X-API-Key" in headers:
+                print(f"[TASK_APP] ✅ X-API-Key IS in headers (len={len(headers['X-API-Key'])})", flush=True)
+            else:
+                print("[TASK_APP] ❌ X-API-Key NOT in headers!", flush=True)
+        try:
+            response = await client.post(inference_url, json=payload, headers=headers)
+        except Exception as e:
+            print(f"[TASK_APP] POST_EXCEPTION: {type(e).__name__}: {e}", flush=True)
+            raise HTTPException(status_code=502, detail=f"Proxy POST failed: {e}") from e
+        # Always print status/headers/body BEFORE any error is raised
+        print(f"[TASK_APP] RESPONSE_STATUS: {response.status_code}", flush=True)
+        print(f"[TASK_APP] RESPONSE_HEADERS: {dict(response.headers)}", flush=True)
+        # Handle error responses from interceptor/provider
+        if response.status_code != 200:
+            try:
+                error_json = response.json()
+                error_msg = str(error_json.get("error", {}).get("message", error_json.get("error", "Unknown error")))  # type: ignore[misc]
+                print(f"[TASK_APP] ❌ Error response from interceptor: {error_msg}", flush=True)
+                raise HTTPException(
+                    status_code=response.status_code,
+                    detail=f"Interceptor/provider error: {error_msg}"
+                )
+            except HTTPException:
+                raise
+            except Exception as e:
+                error_text = response.text[:500]
+                print(f"[TASK_APP] ❌ Non-JSON error response: {error_text}", flush=True)
+                raise HTTPException(
+                    status_code=response.status_code,
+                    detail=f"Interceptor/provider returned error: {error_text}"
+                ) from e
+        # Try JSON, fallback to text
+        try:
+            response_json = response.json()
+            raw = json.dumps(response_json, ensure_ascii=False)
+            print(f"[TASK_APP] RESPONSE_JSON ({len(raw)} bytes): {raw}", flush=True)
+        except Exception:
+            response_text = response.text
+            print(f"[TASK_APP] RESPONSE_TEXT ({len(response_text)} bytes): {response_text}", flush=True)
+            response.raise_for_status()
+            # If we got here, raise_for_status didn't throw; keep an empty JSON
+            response_json = {}
+        # After logging, surface HTTP errors (shouldn't reach here if status != 200)
+        response.raise_for_status()
+    with contextlib.suppress(Exception):
+        usage = response_json.get("usage", {}) if isinstance(response_json, dict) else {}  # type: ignore[misc]
+        ch = (response_json.get("choices") or [{}])[0]  # type: ignore[misc]
+        txt = (ch.get("message", {}) or {}).get("content", "")  # type: ignore[misc]
+        tc = (ch.get("message", {}) or {}).get("tool_calls", [])  # type: ignore[misc]
+        print(
+            f"[TASK_APP] RESPONSE: usage={usage} choices={len(response_json.get('choices', []))} first_len={len(txt)} tool_calls={len(tc)}",
+            flush=True,
+        )
+    # Hard assertions: require either tool_calls or non-empty content
+    try:
+        choices = response_json.get("choices") or []  # type: ignore[misc]
+        first_msg = (choices[0] or {}).get("message", {}) if choices else {}  # type: ignore[misc]
+        tool_calls = first_msg.get("tool_calls", []) or []  # type: ignore[misc]
+        content_text = str(first_msg.get("content", ""))  # type: ignore[misc]
+        if not tool_calls and not content_text.strip():
+            raise HTTPException(status_code=502, detail="Empty model output: no tool_calls and no content")
+        # If tool_calls present, validate schema
+        if tool_calls:
+            for call in tool_calls:
+                fn = (call or {}).get("function", {}) or {}  # type: ignore[misc]
+                if fn.get("name") != TOOL_NAME:  # type: ignore[misc]
+                    raise HTTPException(status_code=502, detail=f"Unexpected tool name: {fn.get('name')}")  # type: ignore[misc]
+                args_raw = fn.get("arguments", "{}")  # type: ignore[misc]
+                try:
+                    args = json.loads(args_raw)
+                except Exception as e:
+                    raise HTTPException(status_code=502, detail="Tool call arguments not valid JSON") from e
+                if not str(args.get("intent", "")).strip():  # type: ignore[misc]
+                    raise HTTPException(status_code=502, detail="Tool call missing 'intent'")
+    except HTTPException:
+        raise
+    except Exception as exc:
+        # Convert unexpected errors to HTTP for visibility
+        raise HTTPException(status_code=500, detail=f"Response validation failed: {exc}") from exc
+    response_text = ""
+    tool_calls = []
+    if "choices" in response_json and len(response_json["choices"]) > 0:
+        choice = response_json["choices"][0]
+        message = choice.get("message", {})
+        response_text = message.get("content", "")
+        if "tool_calls" in message and message["tool_calls"]:
+            for tc in message["tool_calls"]:
+                tool_calls.append({
+                    "id": tc.get("id", ""),
+                    "type": tc.get("type", "function"),
+                    "function": {
+                        "name": tc.get("function", {}).get("name", ""),
+                        "arguments": tc.get("function", {}).get("arguments", "{}"),
+                    }
+                })
+    return response_text, response_json, tool_calls
+async def rollout_executor(request: RolloutRequest, fastapi_request: Request) -> RolloutResponse:
+    dataset: Banking77Dataset = fastapi_request.app.state.banking77_dataset
+    # Inbound snapshot from GEPA
+    with contextlib.suppress(Exception):
+        cfg = (request.policy.config or {})
+        print(
+            f"[TASK_APP] INBOUND_ROLLOUT: run_id={request.run_id} seed={request.env.seed} env={request.env.env_name} "
+            f"policy.model={cfg.get('model')} provider={cfg.get('provider')} api_base={cfg.get('inference_url') or cfg.get('api_base') or cfg.get('base_url')}",
+            flush=True,
+        )
+    split = str(((request.env.config or {}).get("split")) or DEFAULT_SPLIT)
+    seed = request.env.seed or 0
+    sample = dataset.sample(split=split, index=seed)
+    observation = {
+        "query": sample["text"],
+        "index": sample["index"],
+        "split": sample["split"],
+        "available_intents": dataset.label_names,
+    }
+    # Format available intents as a numbered list for the prompt
+    intents_list = "\n".join(f"{i+1}. {label}" for i, label in enumerate(dataset.label_names))
+    placeholders = {
+        "query": sample["text"],
+        "available_intents": intents_list,
+    }
+    default_messages = [
+        {
+            "role": "system",
+            "pattern": (
+                "You are an expert banking assistant that classifies customer queries into banking intents. "
+                "Given a customer message, respond with exactly one intent label from the provided list using the `banking77_classify` tool."
+            ),
+        },
+        {
+            "role": "user",
+            "pattern": "Customer Query: {query}\n\nAvailable Intents:\n{available_intents}\n\nClassify this query into one of the above banking intents using the tool call.",
+        },
+    ]
+    response_json: dict[str, Any] | None = None
+    response_text = ""
+    tool_calls = []
+    # Render baseline messages for validation/introspection
+    rendered_messages: list[dict[str, str]] = []
+    for msg_template in default_messages:
+        role = msg_template.get("role", "user")
+        pattern = msg_template.get("pattern", "")
+        content = pattern.format(**placeholders)
+        rendered_messages.append({"role": role, "content": content})
+    error_info: dict[str, Any] = {}
+    # Extract API key from request headers for forwarding to proxy
+    api_key = (
+        fastapi_request.headers.get("X-API-Key")
+        or fastapi_request.headers.get("x-api-key")
+        or (fastapi_request.headers.get("Authorization", "").replace("Bearer ", "").strip() if fastapi_request.headers.get("Authorization") else None)
+        or None
+    )
+    # Call proxy - HARD FAILS on any invalid/empty responses. No soft handling.
+    response_text, response_json, tool_calls = await call_chat_completion(
+        request.policy.config or {},
+        placeholders,
+        default_messages,
+        api_key=api_key,
+    )
+    # Full upstream JSON must be present and non-empty
+    try:
+        raw_upstream = json.dumps(response_json, ensure_ascii=False)
+    except Exception:
+        raw_upstream = str(response_json)
+    print(f"[TASK_APP] UPSTREAM_RESPONSE_JSON ({len(raw_upstream)} bytes): {raw_upstream}", flush=True)
+    if not isinstance(response_json, dict) or not response_json:
+        raise RuntimeError("Proxy returned missing/empty JSON")
+    # Must have choices
+    choices = response_json.get("choices") or []
+    if not isinstance(choices, list) or len(choices) == 0:
+        raise RuntimeError("Proxy JSON missing choices")
+    first_msg = (choices[0] or {}).get("message", {}) if choices else {}
+    if not isinstance(first_msg, dict):
+        raise RuntimeError("Proxy JSON message malformed")
+    tc_list = first_msg.get("tool_calls") or []
+    content_text = str(first_msg.get("content", ""))
+    if not tc_list and not content_text.strip():
+        raise RuntimeError("Proxy JSON has neither tool_calls nor content")
+    print(f"[TASK_APP] RAW_TOOL_CALLS: {tool_calls}", flush=True)
+    predicted_intent = ""
+    if tool_calls:
+        for tc in tool_calls:
+            if tc.get("function", {}).get("name") == TOOL_NAME:
+                args_str = tc.get("function", {}).get("arguments", "{}")
+                try:
+                    args = json.loads(args_str)
+                    predicted_intent = args.get("intent", "")
+                    print(f"[TASK_APP] PARSED_TOOL_INTENT: {predicted_intent}", flush=True)
+                except Exception:
+                    print(f"[TASK_APP] TOOL_PARSE_ERROR: {args_str}", flush=True)
+    elif response_text:
+        predicted_intent = response_text.strip().split()[0] if response_text.strip() else ""
+        print(f"[TASK_APP] CONTENT_FALLBACK_INTENT: {predicted_intent} text_len={len(response_text or '')}", flush=True)
+    # Hard-crash if no prediction produced at this point
+    if not str(predicted_intent or "").strip():
+        raise RuntimeError("No prediction produced from proxy response")
+    expected_intent = sample["label"]
+    is_correct = (predicted_intent.lower().replace("_", " ") == expected_intent.lower().replace("_", " "))
+    reward = 1.0 if is_correct else 0.0
+    print(
+        f"[TASK_APP] PREDICTION: expected={expected_intent} predicted={predicted_intent} correct={is_correct}",
+        flush=True,
+    )
+    info_payload = {
+        "expected_intent": expected_intent,
+        "predicted_intent": predicted_intent,
+        "response_json": response_json,
+        "tool_calls": tool_calls,
+        "correct": is_correct,
+        # Provide messages so pattern validation can extract them reliably
+        "messages": rendered_messages,
+        **error_info,
+    }
+    with contextlib.suppress(Exception):
+        print(
+            f"[BANKING77_ROLLOUT] run_id={request.run_id} split={sample['split']} "
+            f"index={sample['index']} expected={expected_intent} predicted={predicted_intent} "
+            f"reward={reward}",
+            flush=True,
+        )
+    inference_url = (request.policy.config or {}).get("inference_url")
+    metrics = RolloutMetrics(
+        episode_returns=[reward],
+        mean_return=reward,
+        num_steps=1,
+        num_episodes=1,
+        outcome_score=reward,
+        events_score=reward,
+        details={"correct": is_correct},
+    )
+    policy_config = request.policy.config or {}
+    trace_correlation_id = extract_trace_correlation_id(
+        policy_config=policy_config,
+        inference_url=str(inference_url or ""),
+        mode=request.mode,
+    )
+    trace_metadata = {
+        "env": "banking77",
+        "split": sample["split"],
+        "index": sample["index"],
+        "correct": is_correct,
+    }
+    trace_payload = build_trace_payload(
+        messages=rendered_messages,
+        response=response_json if isinstance(response_json, dict) else None,
+        correlation_id=trace_correlation_id,
+        metadata=trace_metadata,
+    )
+    pipeline_metadata = {"inference_url": str(inference_url or "")}
+    if trace_correlation_id:
+        pipeline_metadata["trace_correlation_id"] = trace_correlation_id
+    return RolloutResponse(
+        run_id=request.run_id,
+        branches={},
+        metrics=metrics,
+        aborted=False,
+        trace_correlation_id=trace_correlation_id,
+        trace=trace_payload,
+        pipeline_metadata=pipeline_metadata,
+    )
+def build_dataset() -> tuple[TaskDatasetRegistry, Banking77Dataset]:
+    registry = TaskDatasetRegistry()
+    dataset = Banking77Dataset()
+    # Lazy load dataset on first use to avoid cold-start latency/timeouts
+    registry.register(BANKING77_DATASET_SPEC, lambda _spec: dataset, cache=True)
+    return registry, dataset
+def _base_task_info() -> TaskInfo:
+    return TaskInfo(  # type: ignore[call-overload]
+        task={  # type: ignore[arg-type]
+            "id": "banking77",
+            "name": "Banking77 Intent Classification",
+            "version": "1.0.0",
+            "action_space": {
+                "type": "tool_call",
+                "tool_name": TOOL_NAME,
+                "description": "Classify banking queries into one of 77 intent categories.",
+            },
+        },
+        environment="banking77",
+        dataset={  # type: ignore[arg-type]
+            **BANKING77_DATASET_SPEC.model_dump(),
+            "hf_dataset": DATASET_NAME,
+        },
+        rubric={  # type: ignore[arg-type]
+            "version": "1",
+            "criteria_count": 1,
+            "source": "inline",
+        },
+        inference={  # type: ignore[arg-type]
+            "supports_proxy": True,
+            "tool": TOOL_NAME,
+        },
+        limits={"max_turns": 1},  # type: ignore[arg-type]
+        task_metadata={"format": "tool_call"},  # type: ignore[arg-type]
+    )
+def describe_taskset(dataset: Banking77Dataset) -> Mapping[str, Any]:
+    return {
+        **BANKING77_DATASET_SPEC.model_dump(),
+        "hf_dataset": DATASET_NAME,
+        "num_labels": len(dataset.label_names),
+        "sizes": {split: dataset.size(split) for split in AVAILABLE_SPLITS},
+    }
+def provide_task_instances(dataset: Banking77Dataset, seeds: Sequence[int]) -> Iterable[TaskInfo]:
+    base_info = _base_task_info()
+    # Convert pydantic models to dicts for spreading
+    base_dataset = base_info.dataset.model_dump() if hasattr(base_info.dataset, 'model_dump') else dict(base_info.dataset)
+    base_metadata = base_info.task_metadata.model_dump() if hasattr(base_info.task_metadata, 'model_dump') else dict(base_info.task_metadata)
+    for seed in seeds:
+        sample = dataset.sample(split=DEFAULT_SPLIT, index=seed)
+        yield TaskInfo(  # type: ignore[call-overload]
+            task=base_info.task,
+            environment=base_info.environment,
+            dataset={  # type: ignore[arg-type]
+                **base_dataset,
+                "split": sample["split"],
+                "index": sample["index"],
+            },
+            rubric=base_info.rubric,
+            inference=base_info.inference,
+            limits=base_info.limits,
+            task_metadata={
+                **base_metadata,
+                "query": sample["text"],
+            },
+        )
+OUTCOME_RUBRIC: Rubric = cast(
+    Rubric,
+    load_rubric(
+        {
+            "version": "1",
+            "goal_text": "Classify banking customer queries into the correct intent category.",
+            "aggregation": "weighted_sum",
+            "criteria": [
+                {
+                    "id": "intent_accuracy",
+                    "description": "Correctly classify the customer query into the appropriate banking intent.",
+                    "weight": 1.0,
+                }
+            ],
+        }
+    ),
+)
+EVENTS_RUBRIC: Rubric = cast(
+    Rubric,
+    load_rubric(
+        {
+            "version": "1",
+            "goal_text": "Use the banking77_classify tool correctly.",
+            "aggregation": "weighted_sum",
+            "criteria": [
+                {
+                    "id": "tool_usage",
+                    "description": "Properly invoke the banking77_classify tool with the correct format.",
+                    "weight": 1.0,
+                }
+            ],
+        }
+    ),
+)
+def build_config() -> TaskAppConfig:
+    registry, dataset = build_dataset()
+    base_info = _base_task_info()
+    proxy_keys = normalize_vendor_keys()
+    proxy_config = ProxyConfig(
+        enable_openai=proxy_keys.get("OPENAI_API_KEY") is not None,
+        enable_groq=proxy_keys.get("GROQ_API_KEY") is not None,
+        system_hint="Use the banking77_classify tool to classify the customer query.",
+    )
+    config = TaskAppConfig(
+        app_id="banking77",
+        name="Banking77 Intent Classification Task",
+        description="Banking77 dataset task app for classifying customer queries into banking intents.",
+        base_task_info=base_info,
+        describe_taskset=lambda: describe_taskset(dataset),
+        provide_task_instances=lambda seeds: provide_task_instances(dataset, seeds),
+        rollout=rollout_executor,
+        dataset_registry=registry,
+        rubrics=RubricBundle(outcome=OUTCOME_RUBRIC, events=EVENTS_RUBRIC),
+        proxy=proxy_config,
+        routers=(banking77_router,),
+        app_state={"banking77_dataset": dataset},
+        cors_origins=["*"],
+    )
+    return config
+def fastapi_app():
+    """Return the FastAPI application for Modal or other ASGI hosts."""
+    app = create_task_app(build_config())
+    # Replace default health endpoints with auth-tolerant handlers
+    # FastAPI matches routes in order, so we need to remove old routes and add new ones
+    # Access the router's route registry directly
+    routes_to_remove = []
+    for route in list(app.router.routes):
+        # Check if this is a route (not middleware or other components)
+        if hasattr(route, "path") and hasattr(route, "methods"):
+            path = getattr(route, "path", None)
+            methods = getattr(route, "methods", set()) or set()
+            if path in {"/health", "/health/rollout"} and "GET" in methods:
+                routes_to_remove.append(route)
+    # Remove routes from router
+    for route in routes_to_remove:
+        app.router.routes.remove(route)
+        print(f"[banking77] Removed default route: {getattr(route, 'path', 'unknown')}", flush=True)
+    def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
+        if not env_key:
+            return None
+        prefix = env_key[: max(1, len(env_key) // 2)]
+        print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
+        return prefix
+    @app.get("/health")
+    async def health(request: StarletteRequest):
+        env_key = normalize_environment_api_key()
+        if not env_key:
+            return JSONResponse(
+                status_code=503,
+                content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
+            )
+        if not is_api_key_header_authorized(request):
+            prefix = _log_env_key_prefix("health", env_key)
+            content = {"status": "healthy", "authorized": False}
+            if prefix:
+                content["expected_api_key_prefix"] = prefix
+            return JSONResponse(status_code=200, content=content)
+        return {"status": "healthy", "authorized": True}
+    @app.get("/health/rollout")
+    async def health_rollout(request: StarletteRequest):
+        env_key = normalize_environment_api_key()
+        if not env_key:
+            return JSONResponse(
+                status_code=503,
+                content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
+            )
+        if not is_api_key_header_authorized(request):
+            prefix = _log_env_key_prefix("health/rollout", env_key)
+            content = {"status": "healthy", "authorized": False}
+            if prefix:
+                content["expected_api_key_prefix"] = prefix
+            return JSONResponse(status_code=200, content=content)
+        return {"ok": True, "authorized": True}
+    @app.get("/metadata")
+    async def get_metadata(request: StarletteRequest):
+        """Return program code and metadata for proposer use.
+        This endpoint allows task apps to self-extract their own code using inspect,
+        keeping the architecture self-contained.
+        """
+        # Extract code using inspect
+        program_code = get_current_module_code()
+        # Get module path
+        import inspect
+        frame = inspect.currentframe()
+        try:
+            if frame is None:
+                module_path = None
+            else:
+                caller_frame = frame.f_back
+                if caller_frame is None:
+                    module_path = None
+                else:
+                    module = inspect.getmodule(caller_frame)
+                    module_path = module.__name__ if module else None
+        finally:
+            del frame
+        return {
+            "program_code": program_code,  # Full source code of task app
+            "module_path": module_path,    # Module path (e.g., "examples.task_apps.banking77.banking77_task_app")
+            "extraction_method": "inspect", # How code was extracted
+        }
+    @app.exception_handler(RequestValidationError)
+    async def _on_validation_error(request: StarletteRequest, exc: RequestValidationError):
+        try:
+            hdr = request.headers
+            snapshot = {
+                "path": str(request.url.path),
+                "have_x_api_key": bool(hdr.get("x-api-key")),  # type: ignore[misc]
+                "have_x_api_keys": bool(hdr.get("x-api-keys")),  # type: ignore[misc]
+                "have_authorization": bool(hdr.get("authorization")),  # type: ignore[misc]
+                "errors": exc.errors()[:5],
+            }
+            print("[422] validation", snapshot, flush=True)
+        except Exception:
+            pass
+        return JSONResponse(
+            status_code=422,
+            content={"status": "invalid", "detail": exc.errors()[:5]},
+        )
+    return app
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Run the Banking77 task app locally")
+    parser.add_argument("--host", default="0.0.0.0")
+    parser.add_argument("--port", type=int, default=8102)
+    parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
+    parser.add_argument(
+        "--env-file",
+        action="append",
+        default=[],
+        help="Additional .env files to load before startup",
+    )
+    args = parser.parse_args()
+    # Look for .env at repo root (3 levels up: banking77/ -> task_apps/ -> examples/ -> repo_root/)
+    default_env = Path(__file__).resolve().parents[3] / ".env"
+    env_files = [str(default_env)] if default_env.exists() else []
+    env_files.extend(args.env_file or [])
+    run_task_app(
+        build_config,
+        host=args.host,
+        port=args.port,
+        reload=args.reload,
+        env_files=env_files,
+    )

synth-ai 0.2.6.dev1__py3-none-any.whl → 0.4.3__py3-none-any.whl

synth-ai 0.2.6.dev1py3-none-any.whl → 0.4.3py3-none-any.whl