synth-ai 0.2.6.dev1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +44 -24
- synth_ai/__main__.py +30 -3
- synth_ai/cli/__init__.py +103 -48
- synth_ai/cli/__main__.py +42 -0
- synth_ai/cli/_internal/__init__.py +5 -0
- synth_ai/cli/_internal/modal_wrapper.py +31 -0
- synth_ai/cli/_internal/storage.py +20 -0
- synth_ai/cli/_internal/typer_patch.py +47 -0
- synth_ai/cli/_internal/validate_task_app.py +29 -0
- synth_ai/cli/agents/__init__.py +17 -0
- synth_ai/cli/agents/claude.py +77 -0
- synth_ai/cli/agents/codex.py +265 -0
- synth_ai/cli/agents/opencode.py +253 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/artifacts/__init__.py +13 -0
- synth_ai/cli/commands/artifacts/client.py +119 -0
- synth_ai/cli/commands/artifacts/config.py +57 -0
- synth_ai/cli/commands/artifacts/core.py +24 -0
- synth_ai/cli/commands/artifacts/download.py +188 -0
- synth_ai/cli/commands/artifacts/export.py +186 -0
- synth_ai/cli/commands/artifacts/list.py +156 -0
- synth_ai/cli/commands/artifacts/parsing.py +250 -0
- synth_ai/cli/commands/artifacts/show.py +336 -0
- synth_ai/cli/commands/demo/__init__.py +3 -0
- synth_ai/cli/commands/demo/core.py +153 -0
- synth_ai/cli/commands/eval/__init__.py +10 -0
- synth_ai/cli/commands/eval/config.py +338 -0
- synth_ai/cli/commands/eval/core.py +256 -0
- synth_ai/cli/commands/eval/runner.py +704 -0
- synth_ai/cli/commands/eval/validation.py +60 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/scan/__init__.py +19 -0
- synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
- synth_ai/cli/commands/scan/core.py +344 -0
- synth_ai/cli/commands/scan/health_checker.py +242 -0
- synth_ai/cli/commands/scan/local_scanner.py +278 -0
- synth_ai/cli/commands/scan/models.py +83 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1428 -0
- synth_ai/cli/commands/status/__init__.py +3 -0
- synth_ai/cli/commands/status/client.py +91 -0
- synth_ai/cli/commands/status/config.py +12 -0
- synth_ai/cli/commands/status/errors.py +11 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
- synth_ai/cli/commands/status/subcommands/config.py +13 -0
- synth_ai/cli/commands/status/subcommands/files.py +34 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
- synth_ai/cli/commands/status/subcommands/models.py +35 -0
- synth_ai/cli/commands/status/subcommands/runs.py +34 -0
- synth_ai/cli/commands/status/subcommands/session.py +77 -0
- synth_ai/cli/commands/status/subcommands/summary.py +39 -0
- synth_ai/cli/commands/status/subcommands/utils.py +41 -0
- synth_ai/cli/commands/status/utils.py +23 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +22 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +201 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
- synth_ai/cli/commands/train/validation.py +392 -0
- synth_ai/cli/demo_apps/__init__.py +10 -0
- synth_ai/cli/demo_apps/core/__init__.py +28 -0
- synth_ai/cli/demo_apps/core/cli.py +1735 -0
- synth_ai/cli/demo_apps/crafter/__init__.py +1 -0
- synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
- synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/cli/demo_apps/demo_registry.py +176 -0
- synth_ai/cli/demo_apps/demo_task_apps/__init__.py +7 -0
- synth_ai/{demos → cli/demo_apps}/demo_task_apps/core.py +117 -51
- synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/_common.py +16 -0
- synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +2 -1
- synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
- synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
- synth_ai/cli/demo_apps/math/__init__.py +1 -0
- synth_ai/cli/demo_apps/math/_common.py +16 -0
- synth_ai/cli/demo_apps/math/app.py +38 -0
- synth_ai/cli/demo_apps/math/config.toml +75 -0
- synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
- synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
- synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
- synth_ai/cli/demo_apps/mipro/main.py +271 -0
- synth_ai/cli/demo_apps/mipro/task_app.py +922 -0
- synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
- synth_ai/cli/demos/__init__.py +12 -0
- synth_ai/cli/demos/demo.py +32 -0
- synth_ai/cli/demos/rl_demo.py +254 -0
- synth_ai/cli/deploy.py +216 -0
- synth_ai/cli/infra/__init__.py +14 -0
- synth_ai/cli/{balance.py → infra/balance.py} +21 -3
- synth_ai/cli/infra/mcp.py +35 -0
- synth_ai/cli/infra/modal_app.py +36 -0
- synth_ai/cli/infra/setup.py +69 -0
- synth_ai/cli/infra/status.py +16 -0
- synth_ai/cli/infra/turso.py +77 -0
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/agents.py +76 -0
- synth_ai/cli/lib/apps/modal_app.py +101 -0
- synth_ai/cli/lib/apps/task_app.py +642 -0
- synth_ai/cli/lib/bin.py +39 -0
- synth_ai/cli/lib/env.py +375 -0
- synth_ai/cli/lib/errors.py +85 -0
- synth_ai/cli/lib/modal.py +315 -0
- synth_ai/cli/lib/plotting.py +126 -0
- synth_ai/cli/lib/prompt_args.py +39 -0
- synth_ai/cli/lib/prompts.py +284 -0
- synth_ai/cli/lib/sqld.py +122 -0
- synth_ai/cli/lib/task_app_discovery.py +884 -0
- synth_ai/cli/lib/task_app_env.py +295 -0
- synth_ai/cli/lib/train_cfgs.py +300 -0
- synth_ai/cli/lib/tunnel_records.py +207 -0
- synth_ai/cli/local/__init__.py +14 -0
- synth_ai/cli/local/experiment_queue/__init__.py +72 -0
- synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
- synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
- synth_ai/cli/local/experiment_queue/config.py +128 -0
- synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
- synth_ai/cli/local/experiment_queue/database.py +175 -0
- synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
- synth_ai/cli/local/experiment_queue/models.py +231 -0
- synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
- synth_ai/cli/local/experiment_queue/results.py +373 -0
- synth_ai/cli/local/experiment_queue/schemas.py +131 -0
- synth_ai/cli/local/experiment_queue/service.py +344 -0
- synth_ai/cli/local/experiment_queue/status.py +372 -0
- synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
- synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
- synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
- synth_ai/cli/local/experiment_queue/validation.py +157 -0
- synth_ai/cli/local/session/__init__.py +92 -0
- synth_ai/cli/local/session/client.py +383 -0
- synth_ai/cli/local/session/constants.py +63 -0
- synth_ai/cli/local/session/exceptions.py +105 -0
- synth_ai/cli/local/session/manager.py +139 -0
- synth_ai/cli/local/session/models.py +89 -0
- synth_ai/cli/local/session/query.py +110 -0
- synth_ai/cli/root.py +150 -102
- synth_ai/cli/task_apps/__init__.py +37 -0
- synth_ai/cli/task_apps/commands.py +3145 -0
- synth_ai/cli/task_apps/deploy.py +7 -0
- synth_ai/cli/task_apps/list.py +26 -0
- synth_ai/cli/task_apps/main.py +36 -0
- synth_ai/cli/task_apps/modal_serve.py +11 -0
- synth_ai/cli/task_apps/serve.py +11 -0
- synth_ai/cli/training/__init__.py +8 -0
- synth_ai/cli/training/train.py +5 -0
- synth_ai/cli/training/train_cfg.py +34 -0
- synth_ai/cli/{watch.py → training/watch.py} +13 -18
- synth_ai/cli/turso.py +52 -0
- synth_ai/cli/utils/__init__.py +8 -0
- synth_ai/cli/utils/experiments.py +235 -0
- synth_ai/cli/utils/queue.py +504 -0
- synth_ai/cli/{recent.py → utils/recent.py} +13 -7
- synth_ai/cli/{traces.py → utils/traces.py} +9 -5
- synth_ai/contracts/__init__.py +67 -0
- synth_ai/core/__init__.py +100 -0
- synth_ai/core/_utils/__init__.py +54 -0
- synth_ai/core/_utils/base_url.py +10 -0
- synth_ai/core/_utils/http.py +10 -0
- synth_ai/core/_utils/prompts.py +14 -0
- synth_ai/core/_utils/task_app_state.py +12 -0
- synth_ai/core/_utils/user_config.py +10 -0
- synth_ai/core/apps/common.py +116 -0
- synth_ai/core/auth.py +95 -0
- synth_ai/core/cfgs.py +240 -0
- synth_ai/core/config/__init__.py +16 -0
- synth_ai/core/config/base.py +168 -0
- synth_ai/core/config/resolver.py +89 -0
- synth_ai/core/env.py +231 -0
- synth_ai/core/errors.py +126 -0
- synth_ai/core/http.py +230 -0
- synth_ai/core/integrations/__init__.py +11 -0
- synth_ai/core/integrations/cloudflare.py +1710 -0
- synth_ai/core/integrations/mcp/__init__.py +6 -0
- synth_ai/core/integrations/mcp/__main__.py +8 -0
- synth_ai/core/integrations/mcp/claude.py +36 -0
- synth_ai/core/integrations/mcp/main.py +254 -0
- synth_ai/core/integrations/mcp/setup.py +100 -0
- synth_ai/core/integrations/modal.py +277 -0
- synth_ai/core/json.py +72 -0
- synth_ai/core/log_filter.py +99 -0
- synth_ai/core/logging.py +82 -0
- synth_ai/core/paths.py +107 -0
- synth_ai/core/pricing.py +109 -0
- synth_ai/core/process.py +233 -0
- synth_ai/core/ssl.py +25 -0
- synth_ai/core/storage/__init__.py +71 -0
- synth_ai/core/task_app_state.py +318 -0
- synth_ai/core/telemetry.py +282 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/__init__.py +5 -1
- synth_ai/{tracing_v3 → core/tracing_v3}/abstractions.py +21 -4
- synth_ai/core/tracing_v3/config.py +229 -0
- synth_ai/core/tracing_v3/constants.py +21 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/db_config.py +42 -29
- synth_ai/{tracing_v3 → core/tracing_v3}/decorators.py +80 -45
- synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +15 -9
- synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +6 -4
- synth_ai/{tracing_v3 → core/tracing_v3}/llm_call_record_helpers.py +161 -61
- synth_ai/{tracing_v3 → core/tracing_v3}/migration_helper.py +1 -2
- synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +12 -7
- synth_ai/core/tracing_v3/serialization.py +130 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/session_tracer.py +88 -21
- synth_ai/{tracing_v3 → core/tracing_v3}/storage/base.py +99 -12
- synth_ai/core/tracing_v3/storage/config.py +109 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/storage/factory.py +11 -9
- synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +15 -11
- synth_ai/core/tracing_v3/trace_utils.py +326 -0
- synth_ai/core/tracing_v3/turso/__init__.py +12 -0
- synth_ai/core/tracing_v3/turso/daemon.py +278 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/turso/models.py +7 -3
- synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/utils.py +5 -4
- synth_ai/core/urls.py +18 -0
- synth_ai/core/user_config.py +137 -0
- synth_ai/core/uvicorn.py +222 -0
- synth_ai/data/__init__.py +83 -0
- synth_ai/data/enums.py +123 -0
- synth_ai/data/rewards.py +152 -0
- synth_ai/data/traces.py +35 -0
- synth_ai/products/__init__.py +6 -0
- synth_ai/products/graph_evolve/__init__.py +46 -0
- synth_ai/products/graph_evolve/client.py +226 -0
- synth_ai/products/graph_evolve/config.py +591 -0
- synth_ai/products/graph_evolve/converters/__init__.py +42 -0
- synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
- synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
- synth_ai/products/graph_evolve/run.py +222 -0
- synth_ai/products/graph_gepa/__init__.py +23 -0
- synth_ai/products/graph_gepa/converters/__init__.py +19 -0
- synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
- synth_ai/sdk/__init__.py +123 -0
- synth_ai/sdk/api/__init__.py +1 -0
- synth_ai/sdk/api/models/supported.py +514 -0
- synth_ai/sdk/api/research_agent/__init__.py +296 -0
- synth_ai/sdk/api/train/__init__.py +85 -0
- synth_ai/sdk/api/train/builders.py +895 -0
- synth_ai/sdk/api/train/cli.py +2199 -0
- synth_ai/sdk/api/train/config_finder.py +267 -0
- synth_ai/sdk/api/train/configs/__init__.py +65 -0
- synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
- synth_ai/sdk/api/train/configs/rl.py +187 -0
- synth_ai/sdk/api/train/configs/sft.py +99 -0
- synth_ai/sdk/api/train/configs/shared.py +81 -0
- synth_ai/sdk/api/train/context_learning.py +312 -0
- synth_ai/sdk/api/train/env_resolver.py +418 -0
- synth_ai/sdk/api/train/graph_validators.py +216 -0
- synth_ai/sdk/api/train/graphgen.py +984 -0
- synth_ai/sdk/api/train/graphgen_models.py +823 -0
- synth_ai/sdk/api/train/graphgen_validators.py +109 -0
- synth_ai/sdk/api/train/local_api.py +10 -0
- synth_ai/sdk/api/train/pollers.py +124 -0
- synth_ai/sdk/api/train/progress/__init__.py +97 -0
- synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
- synth_ai/sdk/api/train/progress/events.py +326 -0
- synth_ai/sdk/api/train/progress/results.py +428 -0
- synth_ai/sdk/api/train/progress/tracker.py +641 -0
- synth_ai/sdk/api/train/prompt_learning.py +469 -0
- synth_ai/sdk/api/train/rl.py +441 -0
- synth_ai/sdk/api/train/sft.py +396 -0
- synth_ai/sdk/api/train/summary.py +522 -0
- synth_ai/sdk/api/train/supported_algos.py +147 -0
- synth_ai/sdk/api/train/task_app.py +351 -0
- synth_ai/sdk/api/train/utils.py +279 -0
- synth_ai/sdk/api/train/validators.py +2424 -0
- synth_ai/sdk/graphs/__init__.py +15 -0
- synth_ai/sdk/graphs/completions.py +570 -0
- synth_ai/{inference → sdk/inference}/__init__.py +0 -1
- synth_ai/sdk/inference/client.py +128 -0
- synth_ai/sdk/jobs/__init__.py +16 -0
- synth_ai/sdk/jobs/client.py +371 -0
- synth_ai/sdk/judging/__init__.py +14 -0
- synth_ai/sdk/judging/base.py +24 -0
- synth_ai/sdk/judging/client.py +40 -0
- synth_ai/sdk/judging/schemas.py +222 -0
- synth_ai/sdk/judging/types.py +42 -0
- synth_ai/sdk/learning/__init__.py +99 -0
- synth_ai/sdk/learning/algorithms.py +14 -0
- synth_ai/{learning → sdk/learning}/client.py +121 -30
- synth_ai/sdk/learning/config.py +5 -0
- synth_ai/{learning → sdk/learning}/constants.py +0 -2
- synth_ai/sdk/learning/context_learning_client.py +531 -0
- synth_ai/sdk/learning/context_learning_types.py +292 -0
- synth_ai/sdk/learning/ft_client.py +7 -0
- synth_ai/{learning → sdk/learning}/health.py +15 -9
- synth_ai/{learning → sdk/learning}/jobs.py +44 -47
- synth_ai/sdk/learning/prompt_extraction.py +334 -0
- synth_ai/sdk/learning/prompt_learning_client.py +455 -0
- synth_ai/sdk/learning/prompt_learning_types.py +186 -0
- synth_ai/{rl → sdk/learning/rl}/__init__.py +13 -8
- synth_ai/{learning/rl_client.py → sdk/learning/rl/client.py} +89 -77
- synth_ai/sdk/learning/rl/config.py +31 -0
- synth_ai/{rl → sdk/learning/rl}/contracts.py +5 -14
- synth_ai/{rl → sdk/learning/rl}/env_keys.py +45 -16
- synth_ai/sdk/learning/rl/secrets.py +13 -0
- synth_ai/sdk/learning/rl_client.py +5 -0
- synth_ai/sdk/learning/sft/__init__.py +29 -0
- synth_ai/sdk/learning/sft/client.py +95 -0
- synth_ai/sdk/learning/sft/config.py +270 -0
- synth_ai/sdk/learning/sft/data.py +698 -0
- synth_ai/sdk/learning/sse.py +57 -0
- synth_ai/sdk/learning/validators.py +52 -0
- synth_ai/sdk/localapi/__init__.py +40 -0
- synth_ai/sdk/localapi/apps/__init__.py +28 -0
- synth_ai/sdk/localapi/client.py +10 -0
- synth_ai/sdk/localapi/contracts.py +10 -0
- synth_ai/sdk/localapi/helpers.py +519 -0
- synth_ai/sdk/localapi/rollouts.py +87 -0
- synth_ai/sdk/localapi/server.py +29 -0
- synth_ai/sdk/localapi/template.py +70 -0
- synth_ai/sdk/streaming/__init__.py +35 -0
- synth_ai/sdk/streaming/config.py +94 -0
- synth_ai/sdk/streaming/handlers.py +1997 -0
- synth_ai/sdk/streaming/streamer.py +713 -0
- synth_ai/sdk/streaming/types.py +112 -0
- synth_ai/sdk/task/__init__.py +164 -0
- synth_ai/sdk/task/apps/__init__.py +169 -0
- synth_ai/sdk/task/auth.py +165 -0
- synth_ai/sdk/task/client.py +175 -0
- synth_ai/sdk/task/config.py +257 -0
- synth_ai/sdk/task/contracts.py +219 -0
- synth_ai/sdk/task/datasets.py +108 -0
- synth_ai/sdk/task/errors.py +50 -0
- synth_ai/sdk/task/health.py +34 -0
- synth_ai/sdk/task/in_process.py +1190 -0
- synth_ai/sdk/task/in_process_runner.py +314 -0
- synth_ai/sdk/task/inference_api.py +299 -0
- synth_ai/sdk/task/json.py +111 -0
- synth_ai/sdk/task/proxy.py +287 -0
- synth_ai/sdk/task/rubrics/__init__.py +55 -0
- synth_ai/sdk/task/rubrics/loaders.py +156 -0
- synth_ai/sdk/task/rubrics/models.py +57 -0
- synth_ai/sdk/task/rubrics/scoring.py +116 -0
- synth_ai/sdk/task/rubrics/strict.py +149 -0
- synth_ai/sdk/task/rubrics.py +219 -0
- synth_ai/sdk/task/server.py +631 -0
- synth_ai/sdk/task/trace_correlation_helpers.py +539 -0
- synth_ai/sdk/task/tracing_utils.py +95 -0
- synth_ai/sdk/task/validators.py +441 -0
- synth_ai/sdk/task/vendors.py +59 -0
- synth_ai/sdk/training/__init__.py +102 -0
- synth_ai/sdk/tunnels/__init__.py +83 -0
- synth_ai/sdk/tunnels/cleanup.py +83 -0
- synth_ai/sdk/tunnels/ports.py +120 -0
- synth_ai/utils/__init__.py +213 -0
- synth_ai-0.4.3.dist-info/METADATA +262 -0
- synth_ai-0.4.3.dist-info/RECORD +370 -0
- {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/entry_points.txt +0 -1
- synth_ai/cli/calc.py +0 -69
- synth_ai/cli/demo.py +0 -131
- synth_ai/cli/legacy_root_backup.py +0 -470
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/rl_demo.py +0 -137
- synth_ai/cli/status.py +0 -133
- synth_ai/config/base_url.py +0 -98
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/demos/core/__init__.py +0 -1
- synth_ai/demos/core/cli.py +0 -685
- synth_ai/demos/demo_task_apps/__init__.py +0 -1
- synth_ai/demos/demo_task_apps/math/config.toml +0 -44
- synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
- synth_ai/environments/__init__.py +0 -31
- synth_ai/environments/environment/__init__.py +0 -1
- synth_ai/environments/environment/artifacts/__init__.py +0 -1
- synth_ai/environments/environment/artifacts/base.py +0 -52
- synth_ai/environments/environment/core.py +0 -67
- synth_ai/environments/environment/db/__init__.py +0 -1
- synth_ai/environments/environment/db/sqlite.py +0 -45
- synth_ai/environments/environment/registry.py +0 -233
- synth_ai/environments/environment/resources/sqlite.py +0 -45
- synth_ai/environments/environment/results.py +0 -1
- synth_ai/environments/environment/rewards/__init__.py +0 -1
- synth_ai/environments/environment/rewards/core.py +0 -29
- synth_ai/environments/environment/shared_engine.py +0 -26
- synth_ai/environments/environment/tools/__init__.py +0 -200
- synth_ai/environments/examples/__init__.py +0 -1
- synth_ai/environments/examples/bandit/__init__.py +0 -33
- synth_ai/environments/examples/bandit/engine.py +0 -294
- synth_ai/environments/examples/bandit/environment.py +0 -194
- synth_ai/environments/examples/bandit/taskset.py +0 -200
- synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
- synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -724
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
- synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
- synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
- synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
- synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
- synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
- synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
- synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
- synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
- synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
- synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
- synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
- synth_ai/environments/examples/crafter_classic/engine.py +0 -579
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
- synth_ai/environments/examples/crafter_classic/environment.py +0 -404
- synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
- synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
- synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
- synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
- synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
- synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
- synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
- synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
- synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
- synth_ai/environments/examples/crafter_custom/environment.py +0 -312
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
- synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
- synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
- synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
- synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
- synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
- synth_ai/environments/examples/enron/engine.py +0 -295
- synth_ai/environments/examples/enron/environment.py +0 -166
- synth_ai/environments/examples/enron/taskset.py +0 -112
- synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
- synth_ai/environments/examples/minigrid/__init__.py +0 -48
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
- synth_ai/environments/examples/minigrid/engine.py +0 -589
- synth_ai/environments/examples/minigrid/environment.py +0 -274
- synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
- synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
- synth_ai/environments/examples/minigrid/taskset.py +0 -583
- synth_ai/environments/examples/nethack/__init__.py +0 -7
- synth_ai/environments/examples/nethack/achievements.py +0 -337
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
- synth_ai/environments/examples/nethack/engine.py +0 -739
- synth_ai/environments/examples/nethack/environment.py +0 -256
- synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
- synth_ai/environments/examples/nethack/taskset.py +0 -323
- synth_ai/environments/examples/red/__init__.py +0 -7
- synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
- synth_ai/environments/examples/red/config_logging.py +0 -110
- synth_ai/environments/examples/red/engine.py +0 -694
- synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -28
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -140
- synth_ai/environments/examples/red/environment.py +0 -238
- synth_ai/environments/examples/red/taskset.py +0 -79
- synth_ai/environments/examples/red/units/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
- synth_ai/environments/examples/sokoban/engine.py +0 -678
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
- synth_ai/environments/examples/sokoban/environment.py +0 -229
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
- synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
- synth_ai/environments/examples/sokoban/taskset.py +0 -428
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/environments/examples/tictactoe/__init__.py +0 -1
- synth_ai/environments/examples/tictactoe/engine.py +0 -368
- synth_ai/environments/examples/tictactoe/environment.py +0 -240
- synth_ai/environments/examples/tictactoe/taskset.py +0 -215
- synth_ai/environments/examples/verilog/__init__.py +0 -10
- synth_ai/environments/examples/verilog/engine.py +0 -329
- synth_ai/environments/examples/verilog/environment.py +0 -350
- synth_ai/environments/examples/verilog/taskset.py +0 -420
- synth_ai/environments/examples/wordle/__init__.py +0 -29
- synth_ai/environments/examples/wordle/engine.py +0 -398
- synth_ai/environments/examples/wordle/environment.py +0 -159
- synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
- synth_ai/environments/examples/wordle/taskset.py +0 -230
- synth_ai/environments/reproducibility/core.py +0 -42
- synth_ai/environments/reproducibility/helpers.py +0 -0
- synth_ai/environments/reproducibility/tree.py +0 -364
- synth_ai/environments/service/app.py +0 -91
- synth_ai/environments/service/core_routes.py +0 -1020
- synth_ai/environments/service/external_registry.py +0 -56
- synth_ai/environments/service/registry.py +0 -9
- synth_ai/environments/stateful/__init__.py +0 -1
- synth_ai/environments/stateful/core.py +0 -163
- synth_ai/environments/stateful/engine.py +0 -21
- synth_ai/environments/stateful/state.py +0 -7
- synth_ai/environments/tasks/api.py +0 -19
- synth_ai/environments/tasks/core.py +0 -80
- synth_ai/environments/tasks/filters.py +0 -41
- synth_ai/environments/tasks/utils.py +0 -91
- synth_ai/environments/v0_observability/history.py +0 -3
- synth_ai/environments/v0_observability/log.py +0 -2
- synth_ai/evals/base.py +0 -15
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/http.py +0 -102
- synth_ai/inference/client.py +0 -20
- synth_ai/install_sqld.sh +0 -40
- synth_ai/jobs/client.py +0 -246
- synth_ai/learning/__init__.py +0 -24
- synth_ai/learning/config.py +0 -43
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/ft_client.py +0 -59
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/learning/sse.py +0 -58
- synth_ai/learning/validators.py +0 -48
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/__init__.py +0 -0
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/__init__.py +0 -0
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/__init__.py +0 -0
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/__init__.py +0 -0
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -220
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/__init__.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/task/__init__.py +0 -10
- synth_ai/task/contracts.py +0 -120
- synth_ai/task/health.py +0 -28
- synth_ai/task/validators.py +0 -12
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/config.py +0 -84
- synth_ai/tracing_v3/storage/config.py +0 -62
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/daemon.py +0 -144
- synth_ai/tracing_v3/turso/manager.py +0 -760
- synth_ai/v0/tracing/__init__.py +0 -0
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -140
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -680
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -510
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -140
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -701
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -525
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.6.dev1.dist-info/METADATA +0 -106
- synth_ai-0.2.6.dev1.dist-info/RECORD +0 -416
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
- /synth_ai/{lm/caching → core/apps}/__init__.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
- /synth_ai/{compound/cais.py → py.typed} +0 -0
- /synth_ai/{learning → sdk/learning}/core.py +0 -0
- /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
- {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
"""Client utilities for querying prompt learning job results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
from synth_ai.core._utils.http import AsyncHttpClient
|
|
8
|
+
|
|
9
|
+
from .prompt_learning_types import PromptResults
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _validate_job_id(job_id: str) -> None:
|
|
13
|
+
"""Validate that job_id has the expected prompt learning format.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
job_id: Job ID to validate
|
|
17
|
+
|
|
18
|
+
Raises:
|
|
19
|
+
ValueError: If job_id doesn't start with 'pl_'
|
|
20
|
+
"""
|
|
21
|
+
if not job_id.startswith("pl_"):
|
|
22
|
+
raise ValueError(
|
|
23
|
+
f"Invalid prompt learning job ID format: {job_id!r}. "
|
|
24
|
+
f"Expected format: 'pl_<identifier>' (e.g., 'pl_9c58b711c2644083')"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class PromptLearningClient:
|
|
29
|
+
"""Client for interacting with prompt learning jobs and retrieving results."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, base_url: str, api_key: str, *, timeout: float = 30.0) -> None:
|
|
32
|
+
"""Initialize the prompt learning client.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
base_url: Base URL of the backend API (e.g., "http://localhost:8000" or "http://localhost:8000/api")
|
|
36
|
+
api_key: API key for authentication
|
|
37
|
+
timeout: Request timeout in seconds
|
|
38
|
+
"""
|
|
39
|
+
base_url = base_url.rstrip("/")
|
|
40
|
+
# Validate base_url format - warn if it already ends with /api (will be handled by AsyncHttpClient)
|
|
41
|
+
if base_url.endswith("/api"):
|
|
42
|
+
# This is OK - AsyncHttpClient._abs() will handle double /api/api paths
|
|
43
|
+
pass
|
|
44
|
+
self._base_url = base_url
|
|
45
|
+
self._api_key = api_key
|
|
46
|
+
self._timeout = timeout
|
|
47
|
+
|
|
48
|
+
async def get_job(self, job_id: str) -> Dict[str, Any]:
|
|
49
|
+
"""Get job metadata and status.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
job_id: Job ID (e.g., "pl_9c58b711c2644083")
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Job metadata including status, best_score, created_at, etc.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
ValueError: If job_id format is invalid
|
|
59
|
+
"""
|
|
60
|
+
_validate_job_id(job_id)
|
|
61
|
+
async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
|
|
62
|
+
return await http.get(f"/api/prompt-learning/online/jobs/{job_id}")
|
|
63
|
+
|
|
64
|
+
async def get_events(
|
|
65
|
+
self, job_id: str, *, since_seq: int = 0, limit: int = 5000
|
|
66
|
+
) -> List[Dict[str, Any]]:
|
|
67
|
+
"""Get events for a prompt learning job.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
job_id: Job ID
|
|
71
|
+
since_seq: Return events after this sequence number
|
|
72
|
+
limit: Maximum number of events to return
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List of event dictionaries with type, message, data, etc.
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: If job_id format is invalid or response structure is unexpected
|
|
79
|
+
"""
|
|
80
|
+
_validate_job_id(job_id)
|
|
81
|
+
params = {"since_seq": since_seq, "limit": limit}
|
|
82
|
+
async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
|
|
83
|
+
js = await http.get(
|
|
84
|
+
f"/api/prompt-learning/online/jobs/{job_id}/events",
|
|
85
|
+
params=params
|
|
86
|
+
)
|
|
87
|
+
if isinstance(js, dict) and isinstance(js.get("events"), list):
|
|
88
|
+
return js["events"]
|
|
89
|
+
# Handle case where response is directly a list
|
|
90
|
+
if isinstance(js, list):
|
|
91
|
+
return js
|
|
92
|
+
# Unexpected response structure - raise instead of silently returning empty list
|
|
93
|
+
raise ValueError(
|
|
94
|
+
f"Unexpected response structure from events endpoint. "
|
|
95
|
+
f"Expected dict with 'events' list or list directly, got: {type(js).__name__}"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def _extract_full_text_from_template(self, template: Dict[str, Any]) -> str:
|
|
99
|
+
"""Extract full text from a serialized template dict.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
template: Serialized template dict with 'sections' field
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Formatted full text string matching backend format
|
|
106
|
+
"""
|
|
107
|
+
sections = template.get("sections", [])
|
|
108
|
+
if not sections:
|
|
109
|
+
# Try alternative structure: prompt_sections (from to_dict format)
|
|
110
|
+
sections = template.get("prompt_sections", [])
|
|
111
|
+
|
|
112
|
+
full_text_parts = []
|
|
113
|
+
for sec in sections:
|
|
114
|
+
if not isinstance(sec, dict):
|
|
115
|
+
continue
|
|
116
|
+
sec_name = sec.get("name", "")
|
|
117
|
+
sec_role = sec.get("role", "")
|
|
118
|
+
sec_content = str(sec.get("content", ""))
|
|
119
|
+
full_text_parts.append(f"[{sec_role} | {sec_name}]\n{sec_content}")
|
|
120
|
+
|
|
121
|
+
return "\n\n".join(full_text_parts)
|
|
122
|
+
|
|
123
|
+
def _extract_full_text_from_object(self, obj: Dict[str, Any]) -> Optional[str]:
|
|
124
|
+
"""Extract full text from a candidate's object field.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
obj: Candidate object dict (may have 'data' with 'sections' or 'text_replacements')
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Formatted full text string or None if extraction fails
|
|
131
|
+
"""
|
|
132
|
+
# Try to get sections from object.data.sections (template format)
|
|
133
|
+
data = obj.get("data", {})
|
|
134
|
+
if isinstance(data, dict):
|
|
135
|
+
sections = data.get("sections", [])
|
|
136
|
+
if sections:
|
|
137
|
+
full_text_parts = []
|
|
138
|
+
for sec in sections:
|
|
139
|
+
if not isinstance(sec, dict):
|
|
140
|
+
continue
|
|
141
|
+
sec_name = sec.get("name", "")
|
|
142
|
+
sec_role = sec.get("role", "")
|
|
143
|
+
sec_content = str(sec.get("content", ""))
|
|
144
|
+
full_text_parts.append(f"[{sec_role} | {sec_name}]\n{sec_content}")
|
|
145
|
+
return "\n\n".join(full_text_parts)
|
|
146
|
+
|
|
147
|
+
# Try text_replacements format (transformation format)
|
|
148
|
+
text_replacements = data.get("text_replacements", [])
|
|
149
|
+
if text_replacements and isinstance(text_replacements, list):
|
|
150
|
+
full_text_parts = []
|
|
151
|
+
for replacement in text_replacements:
|
|
152
|
+
if not isinstance(replacement, dict):
|
|
153
|
+
continue
|
|
154
|
+
new_text = replacement.get("new_text", "")
|
|
155
|
+
role = replacement.get("apply_to_role", "system")
|
|
156
|
+
if new_text:
|
|
157
|
+
full_text_parts.append(f"[{role}]\n{new_text}")
|
|
158
|
+
if full_text_parts:
|
|
159
|
+
return "\n\n".join(full_text_parts)
|
|
160
|
+
|
|
161
|
+
# Try direct sections on object
|
|
162
|
+
sections = obj.get("sections", [])
|
|
163
|
+
if sections:
|
|
164
|
+
return self._extract_full_text_from_template({"sections": sections})
|
|
165
|
+
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
async def get_prompts(self, job_id: str) -> PromptResults:
|
|
169
|
+
"""Get the best prompts and scoring metadata from a completed job.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
job_id: Job ID
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
PromptResults dataclass containing:
|
|
176
|
+
- best_prompt: The top-performing prompt with sections and metadata
|
|
177
|
+
- best_score: The best accuracy score achieved
|
|
178
|
+
- top_prompts: List of top-K prompts with train/val scores
|
|
179
|
+
- optimized_candidates: All frontier/Pareto-optimal candidates
|
|
180
|
+
- attempted_candidates: All candidates tried during optimization
|
|
181
|
+
|
|
182
|
+
Raises:
|
|
183
|
+
ValueError: If job_id format is invalid
|
|
184
|
+
"""
|
|
185
|
+
_validate_job_id(job_id)
|
|
186
|
+
events = await self.get_events(job_id, limit=10000)
|
|
187
|
+
|
|
188
|
+
result = PromptResults()
|
|
189
|
+
|
|
190
|
+
# Build validation score map by rank for later use
|
|
191
|
+
validation_by_rank: Dict[int, float] = {}
|
|
192
|
+
|
|
193
|
+
# Extract results from events
|
|
194
|
+
for event in events:
|
|
195
|
+
event_type = event.get("type", "")
|
|
196
|
+
event_data = event.get("data", {})
|
|
197
|
+
|
|
198
|
+
# Best prompt event
|
|
199
|
+
if event_type == "prompt.learning.best.prompt":
|
|
200
|
+
result.best_prompt = event_data.get("best_prompt")
|
|
201
|
+
result.best_score = event_data.get("best_score")
|
|
202
|
+
|
|
203
|
+
# Top-K prompt content events
|
|
204
|
+
elif event_type == "prompt.learning.top.prompt.content":
|
|
205
|
+
result.top_prompts.append({
|
|
206
|
+
"rank": event_data.get("rank"),
|
|
207
|
+
"train_accuracy": event_data.get("train_accuracy"),
|
|
208
|
+
"val_accuracy": event_data.get("val_accuracy"),
|
|
209
|
+
"template": event_data.get("template"),
|
|
210
|
+
"full_text": event_data.get("full_text"),
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
# Final results event (contains all candidates)
|
|
214
|
+
elif event_type == "prompt.learning.final.results":
|
|
215
|
+
result.optimized_candidates = event_data.get("optimized_candidates", [])
|
|
216
|
+
result.attempted_candidates = event_data.get("attempted_candidates", [])
|
|
217
|
+
result.version_tree = event_data.get("version_tree")
|
|
218
|
+
# Also extract best_prompt from final.results if not already set
|
|
219
|
+
if result.best_prompt is None:
|
|
220
|
+
result.best_prompt = event_data.get("best_prompt")
|
|
221
|
+
if result.best_score is None:
|
|
222
|
+
result.best_score = event_data.get("best_score")
|
|
223
|
+
|
|
224
|
+
# Extract validation results from validation field if present
|
|
225
|
+
validation_data = event_data.get("validation")
|
|
226
|
+
if isinstance(validation_data, list):
|
|
227
|
+
for val_item in validation_data:
|
|
228
|
+
if isinstance(val_item, dict):
|
|
229
|
+
rank = val_item.get("rank")
|
|
230
|
+
accuracy = val_item.get("accuracy")
|
|
231
|
+
if rank is not None and accuracy is not None:
|
|
232
|
+
validation_by_rank[rank] = accuracy
|
|
233
|
+
|
|
234
|
+
# Validation results - build map by rank
|
|
235
|
+
elif event_type == "prompt.learning.validation.scored":
|
|
236
|
+
result.validation_results.append(event_data)
|
|
237
|
+
# Try to extract rank and accuracy for mapping
|
|
238
|
+
rank = event_data.get("rank")
|
|
239
|
+
accuracy = event_data.get("accuracy")
|
|
240
|
+
if rank is not None and accuracy is not None:
|
|
241
|
+
validation_by_rank[rank] = accuracy
|
|
242
|
+
|
|
243
|
+
# Completion event (fallback for best_score)
|
|
244
|
+
elif event_type == "prompt.learning.gepa.complete":
|
|
245
|
+
if result.best_score is None:
|
|
246
|
+
result.best_score = event_data.get("best_score")
|
|
247
|
+
|
|
248
|
+
# MIPRO completion event - extract best_score
|
|
249
|
+
elif event_type == "mipro.job.completed":
|
|
250
|
+
if result.best_score is None:
|
|
251
|
+
# Prefer unified best_score field, fallback to best_full_score or best_minibatch_score
|
|
252
|
+
result.best_score = (
|
|
253
|
+
event_data.get("best_score")
|
|
254
|
+
or event_data.get("best_full_score")
|
|
255
|
+
or event_data.get("best_minibatch_score")
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# If top_prompts is empty but we have optimized_candidates, extract from them
|
|
259
|
+
if not result.top_prompts and result.optimized_candidates:
|
|
260
|
+
for idx, cand in enumerate(result.optimized_candidates):
|
|
261
|
+
if not isinstance(cand, dict):
|
|
262
|
+
continue
|
|
263
|
+
|
|
264
|
+
# Extract rank (use index+1 if rank not present)
|
|
265
|
+
rank = cand.get("rank")
|
|
266
|
+
if rank is None:
|
|
267
|
+
rank = idx + 1
|
|
268
|
+
|
|
269
|
+
# Extract train accuracy from score
|
|
270
|
+
score = cand.get("score", {})
|
|
271
|
+
if not isinstance(score, dict):
|
|
272
|
+
score = {}
|
|
273
|
+
train_accuracy = score.get("accuracy")
|
|
274
|
+
|
|
275
|
+
# Extract val accuracy from validation map
|
|
276
|
+
val_accuracy = validation_by_rank.get(rank)
|
|
277
|
+
|
|
278
|
+
# Try to extract template and full_text
|
|
279
|
+
template = None
|
|
280
|
+
full_text = None
|
|
281
|
+
|
|
282
|
+
# First try: template field (may be serialized dict)
|
|
283
|
+
cand_template = cand.get("template")
|
|
284
|
+
if cand_template and isinstance(cand_template, dict):
|
|
285
|
+
template = cand_template
|
|
286
|
+
full_text = self._extract_full_text_from_template(cand_template)
|
|
287
|
+
# If it's not a dict, skip (might be a backend object that wasn't serialized)
|
|
288
|
+
|
|
289
|
+
# Second try: object field
|
|
290
|
+
if not full_text:
|
|
291
|
+
obj = cand.get("object", {})
|
|
292
|
+
if isinstance(obj, dict):
|
|
293
|
+
full_text = self._extract_full_text_from_object(obj)
|
|
294
|
+
# If we got full_text but no template, try to build template structure
|
|
295
|
+
if full_text and not template:
|
|
296
|
+
# Try to extract template from object.data
|
|
297
|
+
obj_data = obj.get("data", {})
|
|
298
|
+
if isinstance(obj_data, dict) and obj_data.get("sections"):
|
|
299
|
+
template = {"sections": obj_data["sections"]}
|
|
300
|
+
|
|
301
|
+
# Build prompt entry
|
|
302
|
+
prompt_entry: Dict[str, Any] = {
|
|
303
|
+
"rank": rank,
|
|
304
|
+
"train_accuracy": train_accuracy,
|
|
305
|
+
"val_accuracy": val_accuracy,
|
|
306
|
+
}
|
|
307
|
+
if template:
|
|
308
|
+
prompt_entry["template"] = template
|
|
309
|
+
if full_text:
|
|
310
|
+
prompt_entry["full_text"] = full_text
|
|
311
|
+
|
|
312
|
+
result.top_prompts.append(prompt_entry)
|
|
313
|
+
|
|
314
|
+
# Sort by rank to ensure correct order
|
|
315
|
+
result.top_prompts.sort(key=lambda p: p.get("rank", 999))
|
|
316
|
+
|
|
317
|
+
# If we have validation results, prefer validation score for best_score
|
|
318
|
+
# Rank 0 is the best prompt
|
|
319
|
+
if validation_by_rank and 0 in validation_by_rank:
|
|
320
|
+
# Use validation score for best_score when available
|
|
321
|
+
result.best_score = validation_by_rank[0]
|
|
322
|
+
|
|
323
|
+
return result
|
|
324
|
+
|
|
325
|
+
async def get_prompt_text(self, job_id: str, rank: int = 1) -> Optional[str]:
|
|
326
|
+
"""Get the full text of a specific prompt by rank.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
job_id: Job ID
|
|
330
|
+
rank: Prompt rank (1 = best, 2 = second best, etc.)
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
Full prompt text or None if not found
|
|
334
|
+
|
|
335
|
+
Raises:
|
|
336
|
+
ValueError: If job_id format is invalid or rank < 1
|
|
337
|
+
"""
|
|
338
|
+
_validate_job_id(job_id)
|
|
339
|
+
if rank < 1:
|
|
340
|
+
raise ValueError(f"Rank must be >= 1, got: {rank}")
|
|
341
|
+
prompts_data = await self.get_prompts(job_id)
|
|
342
|
+
top_prompts = prompts_data.top_prompts
|
|
343
|
+
|
|
344
|
+
for prompt_info in top_prompts:
|
|
345
|
+
if prompt_info.get("rank") == rank:
|
|
346
|
+
return prompt_info.get("full_text")
|
|
347
|
+
|
|
348
|
+
return None
|
|
349
|
+
|
|
350
|
+
async def get_scoring_summary(self, job_id: str) -> Dict[str, Any]:
|
|
351
|
+
"""Get a summary of scoring metrics for all candidates.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
job_id: Job ID
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
Dictionary with scoring statistics:
|
|
358
|
+
- best_train_accuracy: Best training accuracy
|
|
359
|
+
- best_val_accuracy: Best validation accuracy (if available)
|
|
360
|
+
- num_candidates_tried: Total candidates evaluated
|
|
361
|
+
- num_frontier_candidates: Number in Pareto frontier
|
|
362
|
+
- score_distribution: Histogram of accuracy scores
|
|
363
|
+
|
|
364
|
+
Raises:
|
|
365
|
+
ValueError: If job_id format is invalid
|
|
366
|
+
"""
|
|
367
|
+
_validate_job_id(job_id)
|
|
368
|
+
prompts_data = await self.get_prompts(job_id)
|
|
369
|
+
|
|
370
|
+
attempted = prompts_data.attempted_candidates
|
|
371
|
+
optimized = prompts_data.optimized_candidates
|
|
372
|
+
validation = prompts_data.validation_results
|
|
373
|
+
|
|
374
|
+
# Extract train accuracies (only from candidates that have accuracy field)
|
|
375
|
+
train_accuracies = [
|
|
376
|
+
c["accuracy"] for c in attempted if "accuracy" in c
|
|
377
|
+
]
|
|
378
|
+
|
|
379
|
+
# Extract val accuracies (only from validations that have accuracy field)
|
|
380
|
+
# IMPORTANT: Exclude baseline from "best" calculation - baseline is for comparison only
|
|
381
|
+
val_accuracies = [
|
|
382
|
+
v["accuracy"] for v in validation
|
|
383
|
+
if "accuracy" in v and not v.get("is_baseline", False)
|
|
384
|
+
]
|
|
385
|
+
|
|
386
|
+
# Score distribution (bins)
|
|
387
|
+
bins = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
|
|
388
|
+
distribution = {f"{bins[i]:.1f}-{bins[i+1]:.1f}": 0 for i in range(len(bins) - 1)}
|
|
389
|
+
for acc in train_accuracies:
|
|
390
|
+
for i in range(len(bins) - 1):
|
|
391
|
+
if bins[i] <= acc < bins[i+1] or (i == len(bins) - 2 and acc == bins[i+1]):
|
|
392
|
+
distribution[f"{bins[i]:.1f}-{bins[i+1]:.1f}"] += 1
|
|
393
|
+
break
|
|
394
|
+
|
|
395
|
+
return {
|
|
396
|
+
"best_train_accuracy": max(train_accuracies) if train_accuracies else None,
|
|
397
|
+
"best_val_accuracy": max(val_accuracies) if val_accuracies else None,
|
|
398
|
+
"num_candidates_tried": len(attempted),
|
|
399
|
+
"num_frontier_candidates": len(optimized),
|
|
400
|
+
"score_distribution": distribution,
|
|
401
|
+
"mean_train_accuracy": sum(train_accuracies) / len(train_accuracies) if train_accuracies else None,
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
# Synchronous wrapper for convenience
|
|
406
|
+
def get_prompts(job_id: str, base_url: str, api_key: str) -> PromptResults:
|
|
407
|
+
"""Synchronous wrapper to get prompts from a job.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
job_id: Job ID (e.g., "pl_9c58b711c2644083")
|
|
411
|
+
base_url: Backend API base URL
|
|
412
|
+
api_key: API key for authentication
|
|
413
|
+
|
|
414
|
+
Returns:
|
|
415
|
+
PromptResults dataclass with prompt results
|
|
416
|
+
"""
|
|
417
|
+
import asyncio
|
|
418
|
+
|
|
419
|
+
client = PromptLearningClient(base_url, api_key)
|
|
420
|
+
return asyncio.run(client.get_prompts(job_id))
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def get_prompt_text(job_id: str, base_url: str, api_key: str, rank: int = 1) -> Optional[str]:
|
|
424
|
+
"""Synchronous wrapper to get prompt text by rank.
|
|
425
|
+
|
|
426
|
+
Args:
|
|
427
|
+
job_id: Job ID
|
|
428
|
+
base_url: Backend API base URL
|
|
429
|
+
api_key: API key for authentication
|
|
430
|
+
rank: Prompt rank (1 = best, 2 = second best, etc.)
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
Full prompt text or None if not found
|
|
434
|
+
"""
|
|
435
|
+
import asyncio
|
|
436
|
+
|
|
437
|
+
client = PromptLearningClient(base_url, api_key)
|
|
438
|
+
return asyncio.run(client.get_prompt_text(job_id, rank))
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def get_scoring_summary(job_id: str, base_url: str, api_key: str) -> Dict[str, Any]:
|
|
442
|
+
"""Synchronous wrapper to get scoring summary.
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
job_id: Job ID
|
|
446
|
+
base_url: Backend API base URL
|
|
447
|
+
api_key: API key for authentication
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
Dictionary with scoring statistics
|
|
451
|
+
"""
|
|
452
|
+
import asyncio
|
|
453
|
+
|
|
454
|
+
client = PromptLearningClient(base_url, api_key)
|
|
455
|
+
return asyncio.run(client.get_scoring_summary(job_id))
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Type definitions for prompt learning data structures."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class TextReplacement:
|
|
11
|
+
"""A text replacement in a prompt transformation."""
|
|
12
|
+
|
|
13
|
+
new_text: str
|
|
14
|
+
apply_to_role: str = "system"
|
|
15
|
+
old_text: Optional[str] = None
|
|
16
|
+
position: Optional[int] = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class CandidateScore:
|
|
21
|
+
"""Scoring information for a candidate prompt."""
|
|
22
|
+
|
|
23
|
+
accuracy: float
|
|
24
|
+
prompt_length: int = 0
|
|
25
|
+
tool_call_rate: float = 0.0
|
|
26
|
+
instance_scores: List[float] = field(default_factory=list)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class PromptSection:
|
|
31
|
+
"""A section of a prompt (e.g., system, user, assistant)."""
|
|
32
|
+
|
|
33
|
+
role: str
|
|
34
|
+
content: str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class Candidate:
|
|
39
|
+
"""A candidate prompt from the optimization process."""
|
|
40
|
+
|
|
41
|
+
accuracy: float
|
|
42
|
+
prompt_length: int = 0
|
|
43
|
+
tool_call_rate: float = 0.0
|
|
44
|
+
instance_scores: List[float] = field(default_factory=list)
|
|
45
|
+
object: Optional[Dict[str, Any]] = None
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def from_dict(cls, data: Dict[str, Any]) -> Candidate:
|
|
49
|
+
"""Create a Candidate from a dictionary."""
|
|
50
|
+
return cls(
|
|
51
|
+
accuracy=data.get("accuracy", 0.0),
|
|
52
|
+
prompt_length=data.get("prompt_length", 0),
|
|
53
|
+
tool_call_rate=data.get("tool_call_rate", 0.0),
|
|
54
|
+
instance_scores=data.get("instance_scores", []),
|
|
55
|
+
object=data.get("object"),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class OptimizedCandidate:
|
|
61
|
+
"""An optimized candidate from the Pareto frontier."""
|
|
62
|
+
|
|
63
|
+
score: CandidateScore
|
|
64
|
+
payload_kind: str # "transformation" or "template"
|
|
65
|
+
object: Optional[Dict[str, Any]] = None
|
|
66
|
+
instance_scores: Optional[List[float]] = None
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def from_dict(cls, data: Dict[str, Any]) -> OptimizedCandidate:
|
|
70
|
+
"""Create an OptimizedCandidate from a dictionary."""
|
|
71
|
+
score_data = data.get("score", {})
|
|
72
|
+
if isinstance(score_data, dict):
|
|
73
|
+
score = CandidateScore(
|
|
74
|
+
accuracy=score_data.get("accuracy", 0.0),
|
|
75
|
+
prompt_length=score_data.get("prompt_length", 0),
|
|
76
|
+
tool_call_rate=score_data.get("tool_call_rate", 0.0),
|
|
77
|
+
instance_scores=score_data.get("instance_scores", []),
|
|
78
|
+
)
|
|
79
|
+
else:
|
|
80
|
+
score = CandidateScore(accuracy=0.0)
|
|
81
|
+
|
|
82
|
+
return cls(
|
|
83
|
+
score=score,
|
|
84
|
+
payload_kind=data.get("payload_kind", "unknown"),
|
|
85
|
+
object=data.get("object"),
|
|
86
|
+
instance_scores=data.get("instance_scores"),
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class PromptLearningEvent:
|
|
92
|
+
"""A generic prompt learning event."""
|
|
93
|
+
|
|
94
|
+
type: str
|
|
95
|
+
message: str
|
|
96
|
+
data: Dict[str, Any]
|
|
97
|
+
seq: int
|
|
98
|
+
created_at: Optional[str] = None
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def from_dict(cls, data: Dict[str, Any]) -> PromptLearningEvent:
|
|
102
|
+
"""Create a PromptLearningEvent from a dictionary."""
|
|
103
|
+
return cls(
|
|
104
|
+
type=data.get("type", ""),
|
|
105
|
+
message=data.get("message", ""),
|
|
106
|
+
data=data.get("data", {}),
|
|
107
|
+
seq=data.get("seq", 0),
|
|
108
|
+
created_at=data.get("created_at"),
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass
|
|
113
|
+
class BestPromptEventData:
|
|
114
|
+
"""Data for prompt.learning.best.prompt event."""
|
|
115
|
+
|
|
116
|
+
best_score: float
|
|
117
|
+
best_prompt: Dict[str, Any]
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def from_dict(cls, data: Dict[str, Any]) -> BestPromptEventData:
|
|
121
|
+
"""Create BestPromptEventData from a dictionary."""
|
|
122
|
+
return cls(
|
|
123
|
+
best_score=data.get("best_score", 0.0),
|
|
124
|
+
best_prompt=data.get("best_prompt", {}),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@dataclass
|
|
129
|
+
class FinalResultsEventData:
|
|
130
|
+
"""Data for prompt.learning.final.results event."""
|
|
131
|
+
|
|
132
|
+
attempted_candidates: List[Dict[str, Any]]
|
|
133
|
+
optimized_candidates: List[Dict[str, Any]]
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
def from_dict(cls, data: Dict[str, Any]) -> FinalResultsEventData:
|
|
137
|
+
"""Create FinalResultsEventData from a dictionary."""
|
|
138
|
+
return cls(
|
|
139
|
+
attempted_candidates=data.get("attempted_candidates", []),
|
|
140
|
+
optimized_candidates=data.get("optimized_candidates", []),
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@dataclass
|
|
145
|
+
class ValidationScoredEventData:
|
|
146
|
+
"""Data for prompt.learning.validation.scored event."""
|
|
147
|
+
|
|
148
|
+
accuracy: float
|
|
149
|
+
instance_scores: List[float] = field(default_factory=list)
|
|
150
|
+
is_baseline: bool = False
|
|
151
|
+
|
|
152
|
+
@classmethod
|
|
153
|
+
def from_dict(cls, data: Dict[str, Any]) -> ValidationScoredEventData:
|
|
154
|
+
"""Create ValidationScoredEventData from a dictionary."""
|
|
155
|
+
return cls(
|
|
156
|
+
accuracy=data.get("accuracy", 0.0),
|
|
157
|
+
instance_scores=data.get("instance_scores", []),
|
|
158
|
+
is_baseline=data.get("is_baseline", False),
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@dataclass
|
|
163
|
+
class PromptResults:
|
|
164
|
+
"""Results from a completed prompt learning job."""
|
|
165
|
+
|
|
166
|
+
best_prompt: Optional[Dict[str, Any]] = None
|
|
167
|
+
best_score: Optional[float] = None
|
|
168
|
+
version_tree: Optional[Dict[str, Any]] = None
|
|
169
|
+
top_prompts: List[Dict[str, Any]] = field(default_factory=list)
|
|
170
|
+
optimized_candidates: List[Dict[str, Any]] = field(default_factory=list)
|
|
171
|
+
attempted_candidates: List[Dict[str, Any]] = field(default_factory=list)
|
|
172
|
+
validation_results: List[Dict[str, Any]] = field(default_factory=list)
|
|
173
|
+
|
|
174
|
+
@classmethod
|
|
175
|
+
def from_dict(cls, data: Dict[str, Any]) -> PromptResults:
|
|
176
|
+
"""Create PromptResults from a dictionary."""
|
|
177
|
+
return cls(
|
|
178
|
+
best_prompt=data.get("best_prompt"),
|
|
179
|
+
best_score=data.get("best_score"),
|
|
180
|
+
version_tree=data.get("version_tree"),
|
|
181
|
+
top_prompts=data.get("top_prompts", []),
|
|
182
|
+
optimized_candidates=data.get("optimized_candidates", []),
|
|
183
|
+
attempted_candidates=data.get("attempted_candidates", []),
|
|
184
|
+
validation_results=data.get("validation_results", []),
|
|
185
|
+
)
|
|
186
|
+
|
|
@@ -1,25 +1,31 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .client import RlClient
|
|
4
|
+
from .config import RLJobConfig
|
|
1
5
|
from .contracts import (
|
|
2
6
|
RolloutEnvSpec,
|
|
7
|
+
RolloutMetrics,
|
|
3
8
|
RolloutPolicySpec,
|
|
4
9
|
RolloutRecordConfig,
|
|
5
|
-
RolloutSafetyConfig,
|
|
6
10
|
RolloutRequest,
|
|
7
|
-
RolloutStep,
|
|
8
|
-
RolloutTrajectory,
|
|
9
|
-
RolloutMetrics,
|
|
10
11
|
RolloutResponse,
|
|
12
|
+
RolloutSafetyConfig,
|
|
13
|
+
)
|
|
14
|
+
from .env_keys import (
|
|
15
|
+
MAX_ENVIRONMENT_API_KEY_BYTES,
|
|
16
|
+
encrypt_for_backend,
|
|
17
|
+
setup_environment_api_key,
|
|
11
18
|
)
|
|
12
|
-
from .env_keys import MAX_ENVIRONMENT_API_KEY_BYTES, encrypt_for_backend, setup_environment_api_key
|
|
13
19
|
from .secrets import mint_environment_api_key
|
|
14
20
|
|
|
15
21
|
__all__ = [
|
|
22
|
+
"RlClient",
|
|
23
|
+
"RLJobConfig",
|
|
16
24
|
"RolloutEnvSpec",
|
|
17
25
|
"RolloutPolicySpec",
|
|
18
26
|
"RolloutRecordConfig",
|
|
19
27
|
"RolloutSafetyConfig",
|
|
20
28
|
"RolloutRequest",
|
|
21
|
-
"RolloutStep",
|
|
22
|
-
"RolloutTrajectory",
|
|
23
29
|
"RolloutMetrics",
|
|
24
30
|
"RolloutResponse",
|
|
25
31
|
"encrypt_for_backend",
|
|
@@ -27,4 +33,3 @@ __all__ = [
|
|
|
27
33
|
"mint_environment_api_key",
|
|
28
34
|
"MAX_ENVIRONMENT_API_KEY_BYTES",
|
|
29
35
|
]
|
|
30
|
-
|