synth-ai 0.2.8.dev2__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +44 -24
- synth_ai/__main__.py +30 -3
- synth_ai/cli/__init__.py +103 -48
- synth_ai/cli/__main__.py +42 -0
- synth_ai/cli/_internal/__init__.py +5 -0
- synth_ai/cli/_internal/modal_wrapper.py +31 -0
- synth_ai/cli/_internal/storage.py +20 -0
- synth_ai/cli/_internal/typer_patch.py +47 -0
- synth_ai/cli/_internal/validate_task_app.py +29 -0
- synth_ai/cli/agents/__init__.py +17 -0
- synth_ai/cli/agents/claude.py +77 -0
- synth_ai/cli/agents/codex.py +265 -0
- synth_ai/cli/agents/opencode.py +253 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/artifacts/__init__.py +13 -0
- synth_ai/cli/commands/artifacts/client.py +119 -0
- synth_ai/cli/commands/artifacts/config.py +57 -0
- synth_ai/cli/commands/artifacts/core.py +24 -0
- synth_ai/cli/commands/artifacts/download.py +188 -0
- synth_ai/cli/commands/artifacts/export.py +186 -0
- synth_ai/cli/commands/artifacts/list.py +156 -0
- synth_ai/cli/commands/artifacts/parsing.py +250 -0
- synth_ai/cli/commands/artifacts/show.py +336 -0
- synth_ai/cli/commands/demo/__init__.py +3 -0
- synth_ai/cli/commands/demo/core.py +153 -0
- synth_ai/cli/commands/eval/__init__.py +10 -0
- synth_ai/cli/commands/eval/config.py +338 -0
- synth_ai/cli/commands/eval/core.py +256 -0
- synth_ai/cli/commands/eval/runner.py +704 -0
- synth_ai/cli/commands/eval/validation.py +60 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/scan/__init__.py +19 -0
- synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
- synth_ai/cli/commands/scan/core.py +344 -0
- synth_ai/cli/commands/scan/health_checker.py +242 -0
- synth_ai/cli/commands/scan/local_scanner.py +278 -0
- synth_ai/cli/commands/scan/models.py +83 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1428 -0
- synth_ai/cli/commands/status/__init__.py +3 -0
- synth_ai/cli/commands/status/client.py +91 -0
- synth_ai/cli/commands/status/config.py +12 -0
- synth_ai/cli/commands/status/errors.py +11 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
- synth_ai/cli/commands/status/subcommands/config.py +13 -0
- synth_ai/cli/commands/status/subcommands/files.py +34 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
- synth_ai/cli/commands/status/subcommands/models.py +35 -0
- synth_ai/cli/commands/status/subcommands/runs.py +34 -0
- synth_ai/cli/commands/status/subcommands/session.py +77 -0
- synth_ai/cli/commands/status/subcommands/summary.py +39 -0
- synth_ai/cli/commands/status/subcommands/utils.py +41 -0
- synth_ai/cli/commands/status/utils.py +23 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +22 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +201 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
- synth_ai/cli/commands/train/validation.py +392 -0
- synth_ai/cli/demo_apps/__init__.py +10 -0
- synth_ai/cli/demo_apps/core/__init__.py +28 -0
- synth_ai/{demos → cli/demo_apps}/core/cli.py +783 -441
- synth_ai/cli/demo_apps/crafter/__init__.py +1 -0
- synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
- synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/cli/demo_apps/demo_registry.py +176 -0
- synth_ai/cli/demo_apps/demo_task_apps/__init__.py +7 -0
- synth_ai/{demos → cli/demo_apps}/demo_task_apps/core.py +75 -37
- synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +1 -2
- synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +2 -1
- synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
- synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
- synth_ai/cli/demo_apps/math/__init__.py +1 -0
- synth_ai/cli/demo_apps/math/_common.py +16 -0
- synth_ai/cli/demo_apps/math/app.py +38 -0
- synth_ai/cli/demo_apps/math/config.toml +75 -0
- synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
- synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
- synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
- synth_ai/cli/demo_apps/mipro/main.py +271 -0
- synth_ai/cli/demo_apps/mipro/task_app.py +922 -0
- synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
- synth_ai/cli/demos/__init__.py +12 -0
- synth_ai/cli/demos/demo.py +32 -0
- synth_ai/cli/demos/rl_demo.py +254 -0
- synth_ai/cli/deploy.py +216 -0
- synth_ai/cli/infra/__init__.py +14 -0
- synth_ai/cli/{balance.py → infra/balance.py} +16 -4
- synth_ai/cli/infra/mcp.py +35 -0
- synth_ai/cli/infra/modal_app.py +36 -0
- synth_ai/cli/infra/setup.py +69 -0
- synth_ai/cli/infra/status.py +16 -0
- synth_ai/cli/infra/turso.py +77 -0
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/agents.py +76 -0
- synth_ai/cli/lib/apps/modal_app.py +101 -0
- synth_ai/cli/lib/apps/task_app.py +642 -0
- synth_ai/cli/lib/bin.py +39 -0
- synth_ai/cli/lib/env.py +375 -0
- synth_ai/cli/lib/errors.py +85 -0
- synth_ai/cli/lib/modal.py +315 -0
- synth_ai/cli/lib/plotting.py +126 -0
- synth_ai/cli/lib/prompt_args.py +39 -0
- synth_ai/cli/lib/prompts.py +284 -0
- synth_ai/cli/lib/sqld.py +122 -0
- synth_ai/cli/lib/task_app_discovery.py +884 -0
- synth_ai/cli/lib/task_app_env.py +295 -0
- synth_ai/cli/lib/train_cfgs.py +300 -0
- synth_ai/cli/lib/tunnel_records.py +207 -0
- synth_ai/cli/local/__init__.py +14 -0
- synth_ai/cli/local/experiment_queue/__init__.py +72 -0
- synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
- synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
- synth_ai/cli/local/experiment_queue/config.py +128 -0
- synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
- synth_ai/cli/local/experiment_queue/database.py +175 -0
- synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
- synth_ai/cli/local/experiment_queue/models.py +231 -0
- synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
- synth_ai/cli/local/experiment_queue/results.py +373 -0
- synth_ai/cli/local/experiment_queue/schemas.py +131 -0
- synth_ai/cli/local/experiment_queue/service.py +344 -0
- synth_ai/cli/local/experiment_queue/status.py +372 -0
- synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
- synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
- synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
- synth_ai/cli/local/experiment_queue/validation.py +157 -0
- synth_ai/cli/local/session/__init__.py +92 -0
- synth_ai/cli/local/session/client.py +383 -0
- synth_ai/cli/local/session/constants.py +63 -0
- synth_ai/cli/local/session/exceptions.py +105 -0
- synth_ai/cli/local/session/manager.py +139 -0
- synth_ai/cli/local/session/models.py +89 -0
- synth_ai/cli/local/session/query.py +110 -0
- synth_ai/cli/root.py +150 -108
- synth_ai/cli/task_apps/__init__.py +37 -0
- synth_ai/cli/task_apps/commands.py +3145 -0
- synth_ai/cli/task_apps/deploy.py +7 -0
- synth_ai/cli/task_apps/list.py +26 -0
- synth_ai/cli/task_apps/main.py +36 -0
- synth_ai/cli/task_apps/modal_serve.py +11 -0
- synth_ai/cli/task_apps/serve.py +11 -0
- synth_ai/cli/training/__init__.py +8 -0
- synth_ai/cli/training/train.py +5 -0
- synth_ai/cli/training/train_cfg.py +34 -0
- synth_ai/cli/{watch.py → training/watch.py} +13 -18
- synth_ai/cli/turso.py +52 -0
- synth_ai/cli/utils/__init__.py +8 -0
- synth_ai/cli/utils/experiments.py +235 -0
- synth_ai/cli/utils/queue.py +504 -0
- synth_ai/cli/{recent.py → utils/recent.py} +13 -7
- synth_ai/cli/{traces.py → utils/traces.py} +9 -5
- synth_ai/contracts/__init__.py +67 -0
- synth_ai/core/__init__.py +100 -0
- synth_ai/core/_utils/__init__.py +54 -0
- synth_ai/core/_utils/base_url.py +10 -0
- synth_ai/core/_utils/http.py +10 -0
- synth_ai/core/_utils/prompts.py +14 -0
- synth_ai/core/_utils/task_app_state.py +12 -0
- synth_ai/core/_utils/user_config.py +10 -0
- synth_ai/core/apps/common.py +116 -0
- synth_ai/core/auth.py +95 -0
- synth_ai/core/cfgs.py +240 -0
- synth_ai/core/config/__init__.py +16 -0
- synth_ai/core/config/base.py +168 -0
- synth_ai/core/config/resolver.py +89 -0
- synth_ai/core/env.py +231 -0
- synth_ai/core/errors.py +126 -0
- synth_ai/core/http.py +230 -0
- synth_ai/core/integrations/__init__.py +11 -0
- synth_ai/core/integrations/cloudflare.py +1710 -0
- synth_ai/core/integrations/mcp/__init__.py +6 -0
- synth_ai/core/integrations/mcp/__main__.py +8 -0
- synth_ai/core/integrations/mcp/claude.py +36 -0
- synth_ai/core/integrations/mcp/main.py +254 -0
- synth_ai/core/integrations/mcp/setup.py +100 -0
- synth_ai/core/integrations/modal.py +277 -0
- synth_ai/core/json.py +72 -0
- synth_ai/core/log_filter.py +99 -0
- synth_ai/core/logging.py +82 -0
- synth_ai/core/paths.py +107 -0
- synth_ai/core/pricing.py +109 -0
- synth_ai/core/process.py +233 -0
- synth_ai/core/ssl.py +25 -0
- synth_ai/core/storage/__init__.py +71 -0
- synth_ai/core/task_app_state.py +318 -0
- synth_ai/core/telemetry.py +282 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/__init__.py +5 -1
- synth_ai/{tracing_v3 → core/tracing_v3}/abstractions.py +21 -4
- synth_ai/core/tracing_v3/config.py +229 -0
- synth_ai/core/tracing_v3/constants.py +21 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/db_config.py +42 -29
- synth_ai/{tracing_v3 → core/tracing_v3}/decorators.py +80 -45
- synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +15 -9
- synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +6 -4
- synth_ai/{tracing_v3 → core/tracing_v3}/llm_call_record_helpers.py +161 -61
- synth_ai/{tracing_v3 → core/tracing_v3}/migration_helper.py +1 -2
- synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +12 -7
- synth_ai/core/tracing_v3/serialization.py +130 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/session_tracer.py +88 -21
- synth_ai/{tracing_v3 → core/tracing_v3}/storage/base.py +99 -12
- synth_ai/core/tracing_v3/storage/config.py +109 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/storage/factory.py +11 -9
- synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +15 -11
- synth_ai/core/tracing_v3/trace_utils.py +326 -0
- synth_ai/core/tracing_v3/turso/__init__.py +12 -0
- synth_ai/core/tracing_v3/turso/daemon.py +278 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/turso/models.py +7 -3
- synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/utils.py +5 -4
- synth_ai/core/urls.py +18 -0
- synth_ai/core/user_config.py +137 -0
- synth_ai/core/uvicorn.py +222 -0
- synth_ai/data/__init__.py +83 -0
- synth_ai/data/enums.py +123 -0
- synth_ai/data/rewards.py +152 -0
- synth_ai/data/traces.py +35 -0
- synth_ai/products/__init__.py +6 -0
- synth_ai/products/graph_evolve/__init__.py +46 -0
- synth_ai/products/graph_evolve/client.py +226 -0
- synth_ai/products/graph_evolve/config.py +591 -0
- synth_ai/products/graph_evolve/converters/__init__.py +42 -0
- synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
- synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
- synth_ai/products/graph_evolve/run.py +222 -0
- synth_ai/products/graph_gepa/__init__.py +23 -0
- synth_ai/products/graph_gepa/converters/__init__.py +19 -0
- synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
- synth_ai/sdk/__init__.py +123 -0
- synth_ai/sdk/api/__init__.py +1 -0
- synth_ai/sdk/api/models/supported.py +514 -0
- synth_ai/sdk/api/research_agent/__init__.py +296 -0
- synth_ai/sdk/api/train/__init__.py +85 -0
- synth_ai/sdk/api/train/builders.py +895 -0
- synth_ai/sdk/api/train/cli.py +2199 -0
- synth_ai/sdk/api/train/config_finder.py +267 -0
- synth_ai/sdk/api/train/configs/__init__.py +65 -0
- synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
- synth_ai/sdk/api/train/configs/rl.py +187 -0
- synth_ai/sdk/api/train/configs/sft.py +99 -0
- synth_ai/sdk/api/train/configs/shared.py +81 -0
- synth_ai/sdk/api/train/context_learning.py +312 -0
- synth_ai/sdk/api/train/env_resolver.py +418 -0
- synth_ai/sdk/api/train/graph_validators.py +216 -0
- synth_ai/sdk/api/train/graphgen.py +984 -0
- synth_ai/sdk/api/train/graphgen_models.py +823 -0
- synth_ai/sdk/api/train/graphgen_validators.py +109 -0
- synth_ai/sdk/api/train/local_api.py +10 -0
- synth_ai/sdk/api/train/pollers.py +124 -0
- synth_ai/sdk/api/train/progress/__init__.py +97 -0
- synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
- synth_ai/sdk/api/train/progress/events.py +326 -0
- synth_ai/sdk/api/train/progress/results.py +428 -0
- synth_ai/sdk/api/train/progress/tracker.py +641 -0
- synth_ai/sdk/api/train/prompt_learning.py +469 -0
- synth_ai/sdk/api/train/rl.py +441 -0
- synth_ai/sdk/api/train/sft.py +396 -0
- synth_ai/sdk/api/train/summary.py +522 -0
- synth_ai/sdk/api/train/supported_algos.py +147 -0
- synth_ai/sdk/api/train/task_app.py +351 -0
- synth_ai/sdk/api/train/utils.py +279 -0
- synth_ai/sdk/api/train/validators.py +2424 -0
- synth_ai/sdk/graphs/__init__.py +15 -0
- synth_ai/sdk/graphs/completions.py +570 -0
- synth_ai/{inference → sdk/inference}/__init__.py +0 -1
- synth_ai/sdk/inference/client.py +128 -0
- synth_ai/sdk/jobs/__init__.py +16 -0
- synth_ai/sdk/jobs/client.py +371 -0
- synth_ai/sdk/judging/__init__.py +14 -0
- synth_ai/sdk/judging/base.py +24 -0
- synth_ai/sdk/judging/client.py +40 -0
- synth_ai/sdk/judging/schemas.py +222 -0
- synth_ai/sdk/judging/types.py +42 -0
- synth_ai/sdk/learning/__init__.py +99 -0
- synth_ai/sdk/learning/algorithms.py +14 -0
- synth_ai/{learning → sdk/learning}/client.py +121 -30
- synth_ai/sdk/learning/config.py +5 -0
- synth_ai/{learning → sdk/learning}/constants.py +0 -2
- synth_ai/sdk/learning/context_learning_client.py +531 -0
- synth_ai/sdk/learning/context_learning_types.py +292 -0
- synth_ai/sdk/learning/ft_client.py +7 -0
- synth_ai/{learning → sdk/learning}/health.py +15 -9
- synth_ai/{learning → sdk/learning}/jobs.py +44 -47
- synth_ai/sdk/learning/prompt_extraction.py +334 -0
- synth_ai/sdk/learning/prompt_learning_client.py +455 -0
- synth_ai/sdk/learning/prompt_learning_types.py +186 -0
- synth_ai/{rl → sdk/learning/rl}/__init__.py +13 -8
- synth_ai/{learning/rl_client.py → sdk/learning/rl/client.py} +89 -77
- synth_ai/sdk/learning/rl/config.py +31 -0
- synth_ai/{rl → sdk/learning/rl}/contracts.py +5 -14
- synth_ai/{rl → sdk/learning/rl}/env_keys.py +45 -16
- synth_ai/sdk/learning/rl/secrets.py +13 -0
- synth_ai/sdk/learning/rl_client.py +5 -0
- synth_ai/sdk/learning/sft/__init__.py +29 -0
- synth_ai/sdk/learning/sft/client.py +95 -0
- synth_ai/sdk/learning/sft/config.py +270 -0
- synth_ai/sdk/learning/sft/data.py +698 -0
- synth_ai/sdk/learning/sse.py +57 -0
- synth_ai/sdk/learning/validators.py +52 -0
- synth_ai/sdk/localapi/__init__.py +40 -0
- synth_ai/sdk/localapi/apps/__init__.py +28 -0
- synth_ai/sdk/localapi/client.py +10 -0
- synth_ai/sdk/localapi/contracts.py +10 -0
- synth_ai/sdk/localapi/helpers.py +519 -0
- synth_ai/sdk/localapi/rollouts.py +87 -0
- synth_ai/sdk/localapi/server.py +29 -0
- synth_ai/sdk/localapi/template.py +70 -0
- synth_ai/sdk/streaming/__init__.py +35 -0
- synth_ai/sdk/streaming/config.py +94 -0
- synth_ai/sdk/streaming/handlers.py +1997 -0
- synth_ai/sdk/streaming/streamer.py +713 -0
- synth_ai/sdk/streaming/types.py +112 -0
- synth_ai/sdk/task/__init__.py +164 -0
- synth_ai/sdk/task/apps/__init__.py +169 -0
- synth_ai/sdk/task/auth.py +165 -0
- synth_ai/sdk/task/client.py +175 -0
- synth_ai/sdk/task/config.py +257 -0
- synth_ai/sdk/task/contracts.py +219 -0
- synth_ai/sdk/task/datasets.py +108 -0
- synth_ai/sdk/task/errors.py +50 -0
- synth_ai/sdk/task/health.py +34 -0
- synth_ai/sdk/task/in_process.py +1190 -0
- synth_ai/sdk/task/in_process_runner.py +314 -0
- synth_ai/sdk/task/inference_api.py +299 -0
- synth_ai/sdk/task/json.py +111 -0
- synth_ai/sdk/task/proxy.py +287 -0
- synth_ai/sdk/task/rubrics/__init__.py +55 -0
- synth_ai/sdk/task/rubrics/loaders.py +156 -0
- synth_ai/sdk/task/rubrics/models.py +57 -0
- synth_ai/sdk/task/rubrics/scoring.py +116 -0
- synth_ai/sdk/task/rubrics/strict.py +149 -0
- synth_ai/sdk/task/rubrics.py +219 -0
- synth_ai/sdk/task/server.py +631 -0
- synth_ai/sdk/task/trace_correlation_helpers.py +539 -0
- synth_ai/sdk/task/tracing_utils.py +95 -0
- synth_ai/sdk/task/validators.py +441 -0
- synth_ai/sdk/task/vendors.py +59 -0
- synth_ai/sdk/training/__init__.py +102 -0
- synth_ai/sdk/tunnels/__init__.py +83 -0
- synth_ai/sdk/tunnels/cleanup.py +83 -0
- synth_ai/sdk/tunnels/ports.py +120 -0
- synth_ai/utils/__init__.py +213 -0
- synth_ai-0.4.3.dist-info/METADATA +262 -0
- synth_ai-0.4.3.dist-info/RECORD +370 -0
- {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/entry_points.txt +0 -1
- synth_ai/cli/calc.py +0 -69
- synth_ai/cli/demo.py +0 -144
- synth_ai/cli/legacy_root_backup.py +0 -470
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/rl_demo.py +0 -202
- synth_ai/cli/status.py +0 -133
- synth_ai/config/base_url.py +0 -107
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/demos/core/__init__.py +0 -1
- synth_ai/demos/demo_task_apps/__init__.py +0 -1
- synth_ai/demos/demo_task_apps/math/config.toml +0 -129
- synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -415
- synth_ai/environments/__init__.py +0 -31
- synth_ai/environments/environment/__init__.py +0 -1
- synth_ai/environments/environment/artifacts/__init__.py +0 -1
- synth_ai/environments/environment/artifacts/base.py +0 -52
- synth_ai/environments/environment/core.py +0 -67
- synth_ai/environments/environment/db/__init__.py +0 -1
- synth_ai/environments/environment/db/sqlite.py +0 -45
- synth_ai/environments/environment/registry.py +0 -233
- synth_ai/environments/environment/resources/sqlite.py +0 -45
- synth_ai/environments/environment/results.py +0 -1
- synth_ai/environments/environment/rewards/__init__.py +0 -1
- synth_ai/environments/environment/rewards/core.py +0 -29
- synth_ai/environments/environment/shared_engine.py +0 -26
- synth_ai/environments/environment/tools/__init__.py +0 -200
- synth_ai/environments/examples/__init__.py +0 -1
- synth_ai/environments/examples/bandit/__init__.py +0 -33
- synth_ai/environments/examples/bandit/engine.py +0 -294
- synth_ai/environments/examples/bandit/environment.py +0 -194
- synth_ai/environments/examples/bandit/taskset.py +0 -200
- synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
- synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
- synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
- synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
- synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
- synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
- synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
- synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
- synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
- synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
- synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
- synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
- synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
- synth_ai/environments/examples/crafter_classic/engine.py +0 -579
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
- synth_ai/environments/examples/crafter_classic/environment.py +0 -404
- synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
- synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
- synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
- synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
- synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
- synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
- synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
- synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
- synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
- synth_ai/environments/examples/crafter_custom/environment.py +0 -312
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
- synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
- synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
- synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
- synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
- synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
- synth_ai/environments/examples/enron/engine.py +0 -295
- synth_ai/environments/examples/enron/environment.py +0 -166
- synth_ai/environments/examples/enron/taskset.py +0 -112
- synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
- synth_ai/environments/examples/minigrid/__init__.py +0 -48
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
- synth_ai/environments/examples/minigrid/engine.py +0 -589
- synth_ai/environments/examples/minigrid/environment.py +0 -274
- synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
- synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
- synth_ai/environments/examples/minigrid/taskset.py +0 -583
- synth_ai/environments/examples/nethack/__init__.py +0 -7
- synth_ai/environments/examples/nethack/achievements.py +0 -337
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
- synth_ai/environments/examples/nethack/engine.py +0 -739
- synth_ai/environments/examples/nethack/environment.py +0 -256
- synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
- synth_ai/environments/examples/nethack/taskset.py +0 -323
- synth_ai/environments/examples/red/__init__.py +0 -7
- synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
- synth_ai/environments/examples/red/config_logging.py +0 -110
- synth_ai/environments/examples/red/engine.py +0 -694
- synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -28
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -140
- synth_ai/environments/examples/red/environment.py +0 -238
- synth_ai/environments/examples/red/taskset.py +0 -79
- synth_ai/environments/examples/red/units/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
- synth_ai/environments/examples/sokoban/engine.py +0 -678
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
- synth_ai/environments/examples/sokoban/environment.py +0 -229
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
- synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
- synth_ai/environments/examples/sokoban/taskset.py +0 -428
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/environments/examples/tictactoe/__init__.py +0 -1
- synth_ai/environments/examples/tictactoe/engine.py +0 -368
- synth_ai/environments/examples/tictactoe/environment.py +0 -240
- synth_ai/environments/examples/tictactoe/taskset.py +0 -215
- synth_ai/environments/examples/verilog/__init__.py +0 -10
- synth_ai/environments/examples/verilog/engine.py +0 -329
- synth_ai/environments/examples/verilog/environment.py +0 -350
- synth_ai/environments/examples/verilog/taskset.py +0 -420
- synth_ai/environments/examples/wordle/__init__.py +0 -29
- synth_ai/environments/examples/wordle/engine.py +0 -398
- synth_ai/environments/examples/wordle/environment.py +0 -159
- synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
- synth_ai/environments/examples/wordle/taskset.py +0 -230
- synth_ai/environments/reproducibility/core.py +0 -42
- synth_ai/environments/reproducibility/helpers.py +0 -0
- synth_ai/environments/reproducibility/tree.py +0 -364
- synth_ai/environments/service/app.py +0 -98
- synth_ai/environments/service/core_routes.py +0 -1020
- synth_ai/environments/service/external_registry.py +0 -56
- synth_ai/environments/service/registry.py +0 -9
- synth_ai/environments/stateful/__init__.py +0 -1
- synth_ai/environments/stateful/core.py +0 -163
- synth_ai/environments/stateful/engine.py +0 -21
- synth_ai/environments/stateful/state.py +0 -7
- synth_ai/environments/tasks/api.py +0 -19
- synth_ai/environments/tasks/core.py +0 -80
- synth_ai/environments/tasks/filters.py +0 -41
- synth_ai/environments/tasks/utils.py +0 -91
- synth_ai/environments/v0_observability/history.py +0 -3
- synth_ai/environments/v0_observability/log.py +0 -2
- synth_ai/evals/base.py +0 -15
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -63
- synth_ai/http.py +0 -26
- synth_ai/http_client.py +0 -104
- synth_ai/inference/client.py +0 -20
- synth_ai/install_sqld.sh +0 -40
- synth_ai/jobs/client.py +0 -246
- synth_ai/learning/__init__.py +0 -24
- synth_ai/learning/config.py +0 -43
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/ft_client.py +0 -59
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/learning/sse.py +0 -58
- synth_ai/learning/validators.py +0 -48
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/__init__.py +0 -0
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/__init__.py +0 -0
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/__init__.py +0 -0
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/__init__.py +0 -0
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/__init__.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/task/__init__.py +0 -10
- synth_ai/task/contracts.py +0 -120
- synth_ai/task/health.py +0 -28
- synth_ai/task/validators.py +0 -12
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/config.py +0 -84
- synth_ai/tracing_v3/storage/config.py +0 -62
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/daemon.py +0 -144
- synth_ai/tracing_v3/turso/manager.py +0 -760
- synth_ai/v0/tracing/__init__.py +0 -0
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.8.dev2.dist-info/METADATA +0 -129
- synth_ai-0.2.8.dev2.dist-info/RECORD +0 -420
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
- /synth_ai/{lm/caching → core/apps}/__init__.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
- /synth_ai/{compound/cais.py → py.typed} +0 -0
- /synth_ai/{learning → sdk/learning}/core.py +0 -0
- /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
- {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Shared JSON sanitisation helpers for Task Apps."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping, Sequence
|
|
6
|
+
from dataclasses import asdict, is_dataclass
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
try: # numpy is optional at runtime; degrade gracefully if absent
|
|
11
|
+
import numpy as _np # type: ignore
|
|
12
|
+
except Exception: # pragma: no cover - handled at runtime
|
|
13
|
+
_np = None # type: ignore
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _mask_numpy_array(arr: Any) -> str:
|
|
17
|
+
shape = getattr(arr, "shape", None)
|
|
18
|
+
dtype = getattr(arr, "dtype", None)
|
|
19
|
+
return f"<ndarray shape={shape} dtype={dtype}>"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def to_jsonable(
|
|
23
|
+
value: Any,
|
|
24
|
+
*,
|
|
25
|
+
_visited: set[int] | None = None,
|
|
26
|
+
_depth: int = 0,
|
|
27
|
+
_max_depth: int = 32,
|
|
28
|
+
) -> Any:
|
|
29
|
+
"""Convert `value` into structures compatible with JSON serialisation.
|
|
30
|
+
|
|
31
|
+
- numpy scalars are converted to their Python counterparts
|
|
32
|
+
- numpy arrays are represented by a compact descriptor string
|
|
33
|
+
- dataclasses, Enums, and pydantic models are unwrapped recursively
|
|
34
|
+
- sets and tuples are converted to lists
|
|
35
|
+
- non-serialisable objects fall back to `repr`
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
if _visited is None:
|
|
39
|
+
_visited = set()
|
|
40
|
+
|
|
41
|
+
if _depth > _max_depth:
|
|
42
|
+
return f"<max_depth type={type(value).__name__}>"
|
|
43
|
+
|
|
44
|
+
if value is None or isinstance(value, str | bool | int | float):
|
|
45
|
+
return value
|
|
46
|
+
|
|
47
|
+
# numpy scalars / arrays
|
|
48
|
+
if _np is not None:
|
|
49
|
+
if isinstance(value, _np.integer):
|
|
50
|
+
return int(value)
|
|
51
|
+
if isinstance(value, _np.floating):
|
|
52
|
+
return float(value)
|
|
53
|
+
if isinstance(value, _np.bool_):
|
|
54
|
+
return bool(value)
|
|
55
|
+
if isinstance(value, _np.ndarray):
|
|
56
|
+
return _mask_numpy_array(value)
|
|
57
|
+
|
|
58
|
+
if isinstance(value, Enum):
|
|
59
|
+
return to_jsonable(value.value, _visited=_visited, _depth=_depth + 1, _max_depth=_max_depth)
|
|
60
|
+
|
|
61
|
+
if is_dataclass(value):
|
|
62
|
+
return to_jsonable(
|
|
63
|
+
asdict(value), _visited=_visited, _depth=_depth + 1, _max_depth=_max_depth
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# pydantic BaseModel / attrs objects
|
|
67
|
+
for attr in ("model_dump", "dict", "to_dict", "to_json"):
|
|
68
|
+
if hasattr(value, attr) and callable(getattr(value, attr, None)):
|
|
69
|
+
try:
|
|
70
|
+
dumped = getattr(value, attr)() # type: ignore[misc]
|
|
71
|
+
except TypeError:
|
|
72
|
+
dumped = getattr(value, attr)(exclude_none=False) # pragma: no cover
|
|
73
|
+
return to_jsonable(
|
|
74
|
+
dumped, _visited=_visited, _depth=_depth + 1, _max_depth=_max_depth
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
obj_id = id(value)
|
|
78
|
+
if obj_id in _visited:
|
|
79
|
+
return f"<circular type={type(value).__name__}>"
|
|
80
|
+
|
|
81
|
+
if isinstance(value, Mapping):
|
|
82
|
+
_visited.add(obj_id)
|
|
83
|
+
return {
|
|
84
|
+
str(k): to_jsonable(v, _visited=_visited, _depth=_depth + 1, _max_depth=_max_depth)
|
|
85
|
+
for k, v in value.items()
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if isinstance(value, set | tuple):
|
|
89
|
+
_visited.add(obj_id)
|
|
90
|
+
return [
|
|
91
|
+
to_jsonable(v, _visited=_visited, _depth=_depth + 1, _max_depth=_max_depth)
|
|
92
|
+
for v in value
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
if isinstance(value, Sequence) and not isinstance(value, str | bytes | bytearray):
|
|
96
|
+
_visited.add(obj_id)
|
|
97
|
+
return [
|
|
98
|
+
to_jsonable(v, _visited=_visited, _depth=_depth + 1, _max_depth=_max_depth)
|
|
99
|
+
for v in value
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
if isinstance(value, bytes | bytearray):
|
|
103
|
+
return f"<bytes len={len(value)}>"
|
|
104
|
+
|
|
105
|
+
if hasattr(value, "__dict__"):
|
|
106
|
+
_visited.add(obj_id)
|
|
107
|
+
return to_jsonable(
|
|
108
|
+
vars(value), _visited=_visited, _depth=_depth + 1, _max_depth=_max_depth
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return repr(value)
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"""Shared helpers for Task App proxy endpoints (OpenAI, Groq, etc.).
|
|
2
|
+
|
|
3
|
+
The proxy is tool-agnostic - each task app provides its own tools schema.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import copy
|
|
9
|
+
import json
|
|
10
|
+
import re
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
_REMOVE_FIELDS = {
|
|
14
|
+
"stop_after_tool_calls",
|
|
15
|
+
"thinking_mode",
|
|
16
|
+
"thinking_budget",
|
|
17
|
+
"reasoning",
|
|
18
|
+
}
|
|
19
|
+
_REMOVE_SAMPLING_FIELDS = {"temperature", "top_p"}
|
|
20
|
+
_GPT5_MIN_COMPLETION_TOKENS = 16000
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def prepare_for_openai(model: str | None, payload: dict[str, Any]) -> dict[str, Any]:
|
|
24
|
+
"""Sanitise an OpenAI chat completions payload for Task App usage.
|
|
25
|
+
|
|
26
|
+
The task app is responsible for providing tools in the payload.
|
|
27
|
+
This function only handles model-specific parameter normalization.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
sanitized = copy.deepcopy(payload)
|
|
31
|
+
for field in _REMOVE_FIELDS:
|
|
32
|
+
sanitized.pop(field, None)
|
|
33
|
+
|
|
34
|
+
if model and "gpt-5" in model:
|
|
35
|
+
max_tokens = sanitized.pop("max_tokens", None)
|
|
36
|
+
if "max_completion_tokens" not in sanitized and isinstance(max_tokens, int):
|
|
37
|
+
sanitized["max_completion_tokens"] = max_tokens
|
|
38
|
+
elif max_tokens is not None:
|
|
39
|
+
sanitized.setdefault("max_completion_tokens", max_tokens)
|
|
40
|
+
for field in _REMOVE_SAMPLING_FIELDS:
|
|
41
|
+
sanitized.pop(field, None)
|
|
42
|
+
mct = sanitized.get("max_completion_tokens")
|
|
43
|
+
if not isinstance(mct, int) or mct < _GPT5_MIN_COMPLETION_TOKENS:
|
|
44
|
+
sanitized["max_completion_tokens"] = _GPT5_MIN_COMPLETION_TOKENS
|
|
45
|
+
|
|
46
|
+
# Set tool_choice to first provided tool (task app must provide tools)
|
|
47
|
+
# If tool_choice not already set and tools are provided, use the first one
|
|
48
|
+
if "tool_choice" not in sanitized:
|
|
49
|
+
tools = sanitized.get("tools", [])
|
|
50
|
+
if isinstance(tools, list) and tools:
|
|
51
|
+
first_func = tools[0].get("function", {})
|
|
52
|
+
if isinstance(first_func, dict) and "name" in first_func:
|
|
53
|
+
sanitized["tool_choice"] = {"type": "function", "function": {"name": first_func["name"]}}
|
|
54
|
+
|
|
55
|
+
sanitized["parallel_tool_calls"] = False
|
|
56
|
+
|
|
57
|
+
return sanitized
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def prepare_for_groq(model: str | None, payload: dict[str, Any]) -> dict[str, Any]:
|
|
61
|
+
"""Groq uses the OpenAI schema; reuse most normalisation rules."""
|
|
62
|
+
|
|
63
|
+
sanitized = prepare_for_openai(model, payload)
|
|
64
|
+
# Groq supports `max_tokens`; prefer their native parameter when present
|
|
65
|
+
if (
|
|
66
|
+
model
|
|
67
|
+
and "gpt-5" not in model
|
|
68
|
+
and "max_completion_tokens" in sanitized
|
|
69
|
+
and "max_tokens" not in payload
|
|
70
|
+
):
|
|
71
|
+
sanitized["max_tokens"] = sanitized.pop("max_completion_tokens")
|
|
72
|
+
|
|
73
|
+
# Normalize response_format for Groq
|
|
74
|
+
# Groq supports json_schema on newer models (llama-3.3-70b, etc.)
|
|
75
|
+
# For older models, fall back to json_object
|
|
76
|
+
normalize_response_format_for_groq(model, sanitized)
|
|
77
|
+
|
|
78
|
+
return sanitized
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# Models that support json_schema response_format
|
|
82
|
+
_GROQ_JSON_SCHEMA_MODELS = {
|
|
83
|
+
"llama-3.3-70b",
|
|
84
|
+
"llama-3.1-70b-versatile",
|
|
85
|
+
"llama-70b",
|
|
86
|
+
"mixtral-8x7b",
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def normalize_response_format_for_groq(model: str | None, payload: dict[str, Any]) -> None:
|
|
91
|
+
"""Normalize response_format for Groq provider compatibility.
|
|
92
|
+
|
|
93
|
+
Groq supports json_schema on newer models (llama-3.3-70b, etc.)
|
|
94
|
+
For older models, fall back to json_object.
|
|
95
|
+
"""
|
|
96
|
+
response_format = payload.get("response_format")
|
|
97
|
+
if not response_format or not isinstance(response_format, dict):
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
model_lower = (model or "").lower()
|
|
101
|
+
supports_json_schema = any(
|
|
102
|
+
supported in model_lower
|
|
103
|
+
for supported in _GROQ_JSON_SCHEMA_MODELS
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# If model doesn't support json_schema and we're using it, convert to json_object
|
|
107
|
+
if not supports_json_schema and response_format.get("type") == "json_schema":
|
|
108
|
+
payload["response_format"] = {"type": "json_object"}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def inject_system_hint(payload: dict[str, Any], hint: str) -> dict[str, Any]:
|
|
112
|
+
"""Insert or augment a system message with the provided hint (idempotent)."""
|
|
113
|
+
|
|
114
|
+
if not hint:
|
|
115
|
+
return payload
|
|
116
|
+
cloned = copy.deepcopy(payload)
|
|
117
|
+
messages = cloned.get("messages")
|
|
118
|
+
if not isinstance(messages, list):
|
|
119
|
+
return cloned
|
|
120
|
+
if messages and isinstance(messages[0], dict) and messages[0].get("role") == "system":
|
|
121
|
+
content = messages[0].get("content")
|
|
122
|
+
if isinstance(content, str) and hint not in content:
|
|
123
|
+
messages[0] = dict(messages[0])
|
|
124
|
+
messages[0]["content"] = content.rstrip() + ("\n\n" if content else "") + hint
|
|
125
|
+
else:
|
|
126
|
+
messages.insert(0, {"role": "system", "content": hint})
|
|
127
|
+
cloned["messages"] = messages
|
|
128
|
+
return cloned
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def extract_message_text(message: Any) -> str:
|
|
132
|
+
"""Return best-effort text from an OpenAI-style message structure."""
|
|
133
|
+
|
|
134
|
+
if message is None:
|
|
135
|
+
return ""
|
|
136
|
+
if isinstance(message, str):
|
|
137
|
+
return message
|
|
138
|
+
if isinstance(message, list):
|
|
139
|
+
parts = [extract_message_text(part) for part in message]
|
|
140
|
+
return "\n".join(part for part in parts if part)
|
|
141
|
+
if isinstance(message, dict):
|
|
142
|
+
content = message.get("content")
|
|
143
|
+
if isinstance(content, str):
|
|
144
|
+
return content
|
|
145
|
+
if isinstance(content, list):
|
|
146
|
+
parts = []
|
|
147
|
+
for item in content:
|
|
148
|
+
text = extract_message_text(item)
|
|
149
|
+
if text:
|
|
150
|
+
parts.append(text)
|
|
151
|
+
return "\n".join(parts)
|
|
152
|
+
if "text" in message and isinstance(message["text"], str):
|
|
153
|
+
return message["text"]
|
|
154
|
+
return str(message)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _parse_actions_from_json_candidate(candidate: Any) -> tuple[list[str], str]:
|
|
158
|
+
actions: list[str] = []
|
|
159
|
+
reasoning = ""
|
|
160
|
+
if isinstance(candidate, dict):
|
|
161
|
+
potential_actions = candidate.get("actions")
|
|
162
|
+
if isinstance(potential_actions, list):
|
|
163
|
+
actions = [str(a).strip() for a in potential_actions if str(a).strip()]
|
|
164
|
+
elif isinstance(potential_actions, str):
|
|
165
|
+
actions = [a.strip() for a in potential_actions.split(";") if a.strip()]
|
|
166
|
+
if "reasoning" in candidate and isinstance(candidate["reasoning"], str):
|
|
167
|
+
reasoning = candidate["reasoning"].strip()
|
|
168
|
+
return actions, reasoning
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def parse_tool_call_from_text(text: str) -> tuple[list[str], str]:
|
|
172
|
+
"""Derive tool-call actions and reasoning from assistant text."""
|
|
173
|
+
|
|
174
|
+
text = (text or "").strip()
|
|
175
|
+
if not text:
|
|
176
|
+
return [], ""
|
|
177
|
+
|
|
178
|
+
# Attempt to interpret the entire payload as JSON
|
|
179
|
+
try:
|
|
180
|
+
data = json.loads(text)
|
|
181
|
+
except Exception:
|
|
182
|
+
data = None
|
|
183
|
+
else:
|
|
184
|
+
actions, reasoning = _parse_actions_from_json_candidate(data)
|
|
185
|
+
if actions:
|
|
186
|
+
return actions, reasoning or text
|
|
187
|
+
|
|
188
|
+
# Look for embedded JSON objects containing an "actions" field
|
|
189
|
+
json_like_matches = re.findall(r"\{[^{}]*actions[^{}]*\}", text, flags=re.IGNORECASE)
|
|
190
|
+
for fragment in json_like_matches:
|
|
191
|
+
try:
|
|
192
|
+
data = json.loads(fragment)
|
|
193
|
+
except Exception:
|
|
194
|
+
continue
|
|
195
|
+
actions, reasoning = _parse_actions_from_json_candidate(data)
|
|
196
|
+
if actions:
|
|
197
|
+
return actions, reasoning or text
|
|
198
|
+
|
|
199
|
+
# Patterns like "Actions: move_right, jump"
|
|
200
|
+
m = re.search(r"actions?\s*:\s*([^\n]+)", text, flags=re.IGNORECASE)
|
|
201
|
+
if m:
|
|
202
|
+
items = [part.strip() for part in m.group(1).split(",") if part.strip()]
|
|
203
|
+
if items:
|
|
204
|
+
reasoning = text[: m.start()].strip()
|
|
205
|
+
return items, reasoning
|
|
206
|
+
|
|
207
|
+
# Patterns like "Action 1: move_right"
|
|
208
|
+
actions: list[str] = []
|
|
209
|
+
reasoning_lines: list[str] = []
|
|
210
|
+
for line in text.splitlines():
|
|
211
|
+
stripped = line.strip()
|
|
212
|
+
if not stripped:
|
|
213
|
+
continue
|
|
214
|
+
match = re.match(r"action\s*\d*\s*[:\-]\s*(.+)", stripped, flags=re.IGNORECASE)
|
|
215
|
+
if match:
|
|
216
|
+
candidate = match.group(1).strip()
|
|
217
|
+
if candidate:
|
|
218
|
+
actions.append(candidate)
|
|
219
|
+
else:
|
|
220
|
+
reasoning_lines.append(stripped)
|
|
221
|
+
if actions:
|
|
222
|
+
return actions, "\n".join(reasoning_lines).strip()
|
|
223
|
+
|
|
224
|
+
return [], text
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def synthesize_tool_call_if_missing(
|
|
228
|
+
openai_response: dict[str, Any],
|
|
229
|
+
fallback_tool_name: str = "interact"
|
|
230
|
+
) -> dict[str, Any]:
|
|
231
|
+
"""Ensure the first choice carries a tool_call derived from text if absent.
|
|
232
|
+
|
|
233
|
+
This is a fallback for models that don't properly support tool calling.
|
|
234
|
+
Task apps can specify their preferred fallback tool name (e.g., "interact", "execute_sequence").
|
|
235
|
+
|
|
236
|
+
DEPRECATED: Task apps should prefer models with native tool calling support.
|
|
237
|
+
This function will be removed in a future version.
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
if not isinstance(openai_response, dict):
|
|
241
|
+
return openai_response
|
|
242
|
+
choices = openai_response.get("choices")
|
|
243
|
+
if not isinstance(choices, list) or not choices:
|
|
244
|
+
return openai_response
|
|
245
|
+
first = choices[0]
|
|
246
|
+
if not isinstance(first, dict):
|
|
247
|
+
return openai_response
|
|
248
|
+
message = first.get("message")
|
|
249
|
+
if not isinstance(message, dict):
|
|
250
|
+
return openai_response
|
|
251
|
+
tool_calls = message.get("tool_calls")
|
|
252
|
+
if isinstance(tool_calls, list) and tool_calls:
|
|
253
|
+
return openai_response
|
|
254
|
+
|
|
255
|
+
text = extract_message_text(message)
|
|
256
|
+
actions, reasoning = parse_tool_call_from_text(text)
|
|
257
|
+
if not actions:
|
|
258
|
+
return openai_response
|
|
259
|
+
|
|
260
|
+
# Build a fallback tool call using the provided tool name
|
|
261
|
+
payload = {
|
|
262
|
+
"actions": [str(a).strip() for a in actions if str(a).strip()],
|
|
263
|
+
}
|
|
264
|
+
if reasoning.strip():
|
|
265
|
+
payload["reasoning"] = reasoning.strip()
|
|
266
|
+
|
|
267
|
+
tool_call = {
|
|
268
|
+
"id": f"tool_{fallback_tool_name}_fallback",
|
|
269
|
+
"type": "function",
|
|
270
|
+
"function": {
|
|
271
|
+
"name": fallback_tool_name,
|
|
272
|
+
"arguments": json.dumps(payload, ensure_ascii=False),
|
|
273
|
+
},
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
new_message = copy.deepcopy(message)
|
|
277
|
+
new_message["tool_calls"] = [tool_call]
|
|
278
|
+
if "content" not in new_message:
|
|
279
|
+
new_message["content"] = None
|
|
280
|
+
|
|
281
|
+
new_first = copy.deepcopy(first)
|
|
282
|
+
new_first["message"] = new_message
|
|
283
|
+
new_choices = [new_first] + choices[1:]
|
|
284
|
+
|
|
285
|
+
result = copy.deepcopy(openai_response)
|
|
286
|
+
result["choices"] = new_choices
|
|
287
|
+
return result
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Rubric schema, loading, and scoring helpers for Task Apps.
|
|
2
|
+
|
|
3
|
+
This module provides:
|
|
4
|
+
- Flexible rubric models (Criterion, Rubric) for general task app use
|
|
5
|
+
- Strict validators (StrictCriterion, StrictRubric) for step-wise judges
|
|
6
|
+
- Loading utilities supporting JSON, YAML, and HTTP sources
|
|
7
|
+
- Blending utilities for composing rubrics
|
|
8
|
+
- Scoring utilities for events and outcomes
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
# Core models (flexible validation)
|
|
12
|
+
# Loading and blending
|
|
13
|
+
from .loaders import blend_rubrics, load_rubric
|
|
14
|
+
from .models import Criterion, Rubric
|
|
15
|
+
|
|
16
|
+
# Scoring
|
|
17
|
+
from .scoring import score_events_against_rubric, score_outcome_against_rubric
|
|
18
|
+
|
|
19
|
+
# Strict validators (for judge configs)
|
|
20
|
+
from .strict import (
|
|
21
|
+
StrictCriterion,
|
|
22
|
+
StrictRubric,
|
|
23
|
+
ValidationError,
|
|
24
|
+
validate_rubric_dict,
|
|
25
|
+
validate_rubric_file,
|
|
26
|
+
validate_rubric_files,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
# Flexible models
|
|
31
|
+
"Criterion",
|
|
32
|
+
"Rubric",
|
|
33
|
+
# Loaders
|
|
34
|
+
"load_rubric",
|
|
35
|
+
"blend_rubrics",
|
|
36
|
+
# Scoring
|
|
37
|
+
"score_events_against_rubric",
|
|
38
|
+
"score_outcome_against_rubric",
|
|
39
|
+
# Strict validators
|
|
40
|
+
"StrictCriterion",
|
|
41
|
+
"StrictRubric",
|
|
42
|
+
"ValidationError",
|
|
43
|
+
"validate_rubric_dict",
|
|
44
|
+
"validate_rubric_file",
|
|
45
|
+
"validate_rubric_files",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
# Maintain backwards compatibility
|
|
49
|
+
# Old code may import these names expecting the flexible variants
|
|
50
|
+
RubricCriterion = StrictCriterion
|
|
51
|
+
RubricSpec = StrictRubric
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Rubric loading and blending utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from .models import Criterion, Rubric
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _load_text(source: str) -> tuple[str, str | None]:
|
|
13
|
+
"""Load text from file path or return as-is."""
|
|
14
|
+
path = Path(source)
|
|
15
|
+
if path.exists():
|
|
16
|
+
return path.read_text(encoding="utf-8"), path.suffix.lower()
|
|
17
|
+
return source, None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _parse_structured(text: str, suffix: str | None) -> dict[str, Any]:
|
|
21
|
+
"""Parse JSON or YAML text into a dictionary."""
|
|
22
|
+
text = text.strip()
|
|
23
|
+
if not text:
|
|
24
|
+
raise ValueError("Rubric source is empty")
|
|
25
|
+
if suffix in (".yaml", ".yml"):
|
|
26
|
+
try:
|
|
27
|
+
import yaml # type: ignore
|
|
28
|
+
except Exception as exc: # pragma: no cover - optional dependency
|
|
29
|
+
raise RuntimeError("PyYAML is required to load YAML rubrics") from exc
|
|
30
|
+
data = yaml.safe_load(text)
|
|
31
|
+
if not isinstance(data, dict):
|
|
32
|
+
raise ValueError("Rubric YAML must produce a mapping") from None
|
|
33
|
+
return data
|
|
34
|
+
if text.startswith("{"):
|
|
35
|
+
return json.loads(text)
|
|
36
|
+
if text.startswith("http://") or text.startswith("https://"):
|
|
37
|
+
import requests # type: ignore
|
|
38
|
+
|
|
39
|
+
response = requests.get(text, timeout=15)
|
|
40
|
+
response.raise_for_status()
|
|
41
|
+
return _parse_structured(response.text, suffix)
|
|
42
|
+
try:
|
|
43
|
+
return json.loads(text)
|
|
44
|
+
except json.JSONDecodeError:
|
|
45
|
+
try:
|
|
46
|
+
import yaml # type: ignore
|
|
47
|
+
except Exception as exc: # pragma: no cover - optional dependency
|
|
48
|
+
raise RuntimeError("PyYAML is required to load rubric text") from exc
|
|
49
|
+
data = yaml.safe_load(text)
|
|
50
|
+
if not isinstance(data, dict):
|
|
51
|
+
raise ValueError("Rubric text must decode to a mapping") from None
|
|
52
|
+
return data
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def load_rubric(source: str | dict[str, Any] | Rubric | None) -> Rubric | None:
|
|
56
|
+
"""Load rubric from file path, dict, or return existing Rubric.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
source: File path (JSON/YAML), dict, existing Rubric, or None
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Parsed Rubric instance or None if source is None
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
ValueError: If the rubric format is incorrect (e.g., backend judge format)
|
|
66
|
+
ValidationError: If the rubric fails schema validation
|
|
67
|
+
"""
|
|
68
|
+
if source is None:
|
|
69
|
+
return None
|
|
70
|
+
if isinstance(source, Rubric):
|
|
71
|
+
return source
|
|
72
|
+
|
|
73
|
+
# Load and parse the data
|
|
74
|
+
if isinstance(source, dict):
|
|
75
|
+
data = source
|
|
76
|
+
else:
|
|
77
|
+
text, suffix = _load_text(str(source))
|
|
78
|
+
data = _parse_structured(text, suffix)
|
|
79
|
+
|
|
80
|
+
# Check if this looks like a backend judge rubric (wrong format)
|
|
81
|
+
if (
|
|
82
|
+
isinstance(data, dict)
|
|
83
|
+
and "event" in data
|
|
84
|
+
and "outcome" in data
|
|
85
|
+
and "version" not in data
|
|
86
|
+
and "goal_text" not in data
|
|
87
|
+
and "criteria" not in data
|
|
88
|
+
):
|
|
89
|
+
source_hint = f" ({source})" if isinstance(source, str) else ""
|
|
90
|
+
raise ValueError(
|
|
91
|
+
f"Rubric appears to be in backend judge format (has 'event'/'outcome' keys){source_hint}. "
|
|
92
|
+
f"Task apps require rubrics with 'version', 'goal_text', and 'criteria' fields. "
|
|
93
|
+
f"Backend judge rubrics should be named '*_backend_judge.json' and loaded by judge functions."
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return Rubric.model_validate(data)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _merge_weights(base: Criterion, override: Criterion) -> float:
|
|
100
|
+
"""Merge criterion weights from base and override rubrics."""
|
|
101
|
+
if override.weight != 1.0 and base.weight != 1.0:
|
|
102
|
+
return base.weight * override.weight
|
|
103
|
+
if override.weight != 1.0:
|
|
104
|
+
return override.weight
|
|
105
|
+
return base.weight
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def blend_rubrics(base: Rubric | None, override: Rubric | None) -> Rubric | None:
|
|
109
|
+
"""Blend two rubrics by merging criteria and inheriting properties.
|
|
110
|
+
|
|
111
|
+
Override rubric takes precedence for descriptions and settings.
|
|
112
|
+
Weights are merged multiplicatively when both are non-default.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
base: Base rubric providing defaults
|
|
116
|
+
override: Override rubric with specific customizations
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Blended rubric or None if both inputs are None
|
|
120
|
+
"""
|
|
121
|
+
if override is None and base is None:
|
|
122
|
+
return None
|
|
123
|
+
if base is None:
|
|
124
|
+
return override
|
|
125
|
+
if override is None:
|
|
126
|
+
return base
|
|
127
|
+
|
|
128
|
+
base_map = {criterion.id: criterion for criterion in base.criteria}
|
|
129
|
+
merged: list[Criterion] = []
|
|
130
|
+
|
|
131
|
+
for ov in override.criteria:
|
|
132
|
+
if ov.id in base_map:
|
|
133
|
+
existing = base_map.pop(ov.id)
|
|
134
|
+
merged.append(
|
|
135
|
+
Criterion(
|
|
136
|
+
id=ov.id,
|
|
137
|
+
description=ov.description or existing.description,
|
|
138
|
+
weight=_merge_weights(existing, ov),
|
|
139
|
+
required=ov.required if ov.required is not None else existing.required,
|
|
140
|
+
)
|
|
141
|
+
)
|
|
142
|
+
else:
|
|
143
|
+
merged.append(ov)
|
|
144
|
+
|
|
145
|
+
merged.extend(base_map.values())
|
|
146
|
+
|
|
147
|
+
aggregation = override.aggregation
|
|
148
|
+
if aggregation == "inherit":
|
|
149
|
+
aggregation = base.aggregation
|
|
150
|
+
|
|
151
|
+
return Rubric(
|
|
152
|
+
version=override.version or base.version,
|
|
153
|
+
goal_text=override.goal_text or base.goal_text,
|
|
154
|
+
criteria=merged,
|
|
155
|
+
aggregation=aggregation,
|
|
156
|
+
)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Rubric and Criterion data models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field, field_validator
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Criterion(BaseModel):
|
|
9
|
+
"""Single scoring criterion within a rubric.
|
|
10
|
+
|
|
11
|
+
Flexible variant allowing weights > 1.0 and no normalization requirement.
|
|
12
|
+
Used by task apps for general rubric scoring.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
id: str
|
|
16
|
+
description: str
|
|
17
|
+
weight: float = 1.0
|
|
18
|
+
required: bool = False
|
|
19
|
+
|
|
20
|
+
@field_validator("weight")
|
|
21
|
+
@classmethod
|
|
22
|
+
def _validate_weight(cls, value: float) -> float:
|
|
23
|
+
if value <= 0:
|
|
24
|
+
raise ValueError("criterion weight must be positive")
|
|
25
|
+
return value
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Rubric(BaseModel):
|
|
29
|
+
"""Rubric definition for scoring task app outcomes.
|
|
30
|
+
|
|
31
|
+
Supports flexible aggregation and blending. Criteria weights do not need
|
|
32
|
+
to sum to 1.0, making this suitable for general task app usage.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
version: str
|
|
36
|
+
goal_text: str | None = None
|
|
37
|
+
criteria: list[Criterion] = Field(default_factory=list)
|
|
38
|
+
aggregation: str = "weighted_sum"
|
|
39
|
+
|
|
40
|
+
@field_validator("aggregation")
|
|
41
|
+
@classmethod
|
|
42
|
+
def _validate_aggregation(cls, value: str) -> str:
|
|
43
|
+
allowed = {"sum", "weighted_sum", "custom", "inherit"}
|
|
44
|
+
if value not in allowed:
|
|
45
|
+
raise ValueError(f"aggregation must be one of {sorted(allowed)}")
|
|
46
|
+
return value
|
|
47
|
+
|
|
48
|
+
@field_validator("criteria")
|
|
49
|
+
@classmethod
|
|
50
|
+
def _validate_criteria(cls, criteria: list[Criterion]) -> list[Criterion]:
|
|
51
|
+
seen = set()
|
|
52
|
+
for criterion in criteria:
|
|
53
|
+
if criterion.id in seen:
|
|
54
|
+
raise ValueError(f"duplicate criterion id: {criterion.id}")
|
|
55
|
+
seen.add(criterion.id)
|
|
56
|
+
return criteria
|
|
57
|
+
|