synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/baseline/banking77_baseline.py +204 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +6 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -38
- examples/swe/task_app/hosted/policy_routes.py +17 -0
- examples/swe/task_app/hosted/rollout.py +4 -2
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +841 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +288 -39
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
- synth_ai/api/train/builders.py +99 -4
- synth_ai/api/train/cli.py +516 -26
- synth_ai/api/train/config_finder.py +13 -2
- synth_ai/api/train/configs/__init__.py +23 -2
- synth_ai/api/train/configs/prompt_learning.py +442 -0
- synth_ai/api/train/configs/rl.py +61 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/task_app.py +1 -1
- synth_ai/api/train/validators.py +277 -0
- synth_ai/auth/credentials.py +119 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cli/__init__.py +94 -18
- synth_ai/cli/__main__.py +0 -0
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +84 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1436 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +30 -158
- synth_ai/cli/deploy/__init__.py +43 -0
- synth_ai/cli/deploy.py +162 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +14 -8
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +107 -0
- synth_ai/cli/root.py +9 -5
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +51 -1480
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -10
- synth_ai/learning/__init__.py +10 -0
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +184 -0
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/pricing/__init__.py +2 -0
- synth_ai/pricing/model_pricing.py +57 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +518 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +2 -0
- synth_ai/task/tracing_utils.py +25 -25
- synth_ai/task/validators.py +45 -9
- synth_ai/task_app_cfgs.py +21 -0
- synth_ai/tracing_v3/config.py +162 -19
- synth_ai/tracing_v3/constants.py +1 -1
- synth_ai/tracing_v3/db_config.py +24 -38
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/storage/config.py +47 -13
- synth_ai/tracing_v3/storage/factory.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +113 -11
- synth_ai/tracing_v3/turso/native_manager.py +92 -16
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +30 -1
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/cli.py +149 -5
- synth_ai/utils/env.py +40 -33
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/modal.py +285 -3
- synth_ai/utils/paths.py +48 -0
- synth_ai/utils/uvicorn.py +113 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
|
@@ -156,13 +156,13 @@ class OpenAIClient:
|
|
|
156
156
|
keys_preview = sorted(processed_request.keys())
|
|
157
157
|
logger.info(f"Request keys: {keys_preview}")
|
|
158
158
|
|
|
159
|
-
# Final hard-guard for OpenAI: ensure unsupported field is not present
|
|
159
|
+
# Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
|
|
160
160
|
try:
|
|
161
|
-
|
|
161
|
+
low_url = url.lower()
|
|
162
|
+
if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
|
|
162
163
|
processed_request.pop("stop_after_tool_calls", None)
|
|
163
|
-
logger.info("Removed stop_after_tool_calls for OpenAI request")
|
|
164
|
+
logger.info("Removed stop_after_tool_calls for Groq/OpenAI request")
|
|
164
165
|
# Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
|
|
165
|
-
low_url = url.lower()
|
|
166
166
|
if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
|
|
167
167
|
processed_request, dict
|
|
168
168
|
):
|
|
@@ -340,40 +340,6 @@ class OpenAIClient:
|
|
|
340
340
|
pass
|
|
341
341
|
except Exception:
|
|
342
342
|
pass
|
|
343
|
-
# Gracefully degrade on 422 so rollouts can still produce a trajectory
|
|
344
|
-
if status == 422:
|
|
345
|
-
try:
|
|
346
|
-
# Best-effort parse of error for diagnostics
|
|
347
|
-
err = None
|
|
348
|
-
try:
|
|
349
|
-
err = e.response.json()
|
|
350
|
-
except Exception:
|
|
351
|
-
err = {"error": "unprocessable", "detail": (text or "")[:200]}
|
|
352
|
-
logger.warning(
|
|
353
|
-
{
|
|
354
|
-
"inference_422_recovered": True,
|
|
355
|
-
"detail": err,
|
|
356
|
-
}
|
|
357
|
-
)
|
|
358
|
-
except Exception:
|
|
359
|
-
pass
|
|
360
|
-
# Return a minimal OpenAI-compatible response with no tool_calls/content
|
|
361
|
-
import time as _t
|
|
362
|
-
|
|
363
|
-
return {
|
|
364
|
-
"id": f"cmpl-{int(_t.time())}",
|
|
365
|
-
"object": "chat.completion",
|
|
366
|
-
"created": int(_t.time()),
|
|
367
|
-
"model": processed_request.get("model") or "unknown",
|
|
368
|
-
"choices": [
|
|
369
|
-
{
|
|
370
|
-
"index": 0,
|
|
371
|
-
"message": {"role": "assistant", "content": "", "tool_calls": []},
|
|
372
|
-
"finish_reason": "stop",
|
|
373
|
-
}
|
|
374
|
-
],
|
|
375
|
-
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
|
|
376
|
-
}
|
|
377
343
|
raise
|
|
378
344
|
except Exception as e:
|
|
379
345
|
logger.error(f"Unexpected error calling {url}: {e}")
|
|
@@ -945,6 +945,23 @@ async def step_policy(
|
|
|
945
945
|
except Exception as exc:
|
|
946
946
|
logger.debug(f"TRACING_LLM_FAIL: {exc}")
|
|
947
947
|
|
|
948
|
+
if not tool_calls:
|
|
949
|
+
preview = ""
|
|
950
|
+
try:
|
|
951
|
+
preview = str(meta.get("raw_response") or "")[:400]
|
|
952
|
+
except Exception:
|
|
953
|
+
preview = "<unavailable>"
|
|
954
|
+
logger.error(
|
|
955
|
+
{
|
|
956
|
+
"rollout.policy_step": True,
|
|
957
|
+
"policy_id": request.policy_id,
|
|
958
|
+
"error": "no_tool_calls",
|
|
959
|
+
"inference_url": meta.get("inference_url"),
|
|
960
|
+
"raw_preview": preview,
|
|
961
|
+
}
|
|
962
|
+
)
|
|
963
|
+
raise RuntimeError("Policy step produced no tool calls; inference response unusable.")
|
|
964
|
+
|
|
948
965
|
return PolicyStepResponse(
|
|
949
966
|
tool_calls=tool_calls,
|
|
950
967
|
meta=meta,
|
|
@@ -251,14 +251,16 @@ class RolloutTracingContext:
|
|
|
251
251
|
await self.tracer.initialize()
|
|
252
252
|
except Exception as exc:
|
|
253
253
|
logger.debug("TRACING_INIT_FAIL: %s", exc)
|
|
254
|
+
# Hard fail: tracing requested but cannot initialize
|
|
255
|
+
raise
|
|
254
256
|
try:
|
|
255
257
|
await self.tracer.start_session(
|
|
256
258
|
session_id=self.run_id, metadata=dict(self.metadata_base)
|
|
257
259
|
)
|
|
258
260
|
except Exception as exc:
|
|
259
261
|
logger.warning("TRACING_START_FAIL: %s", exc)
|
|
260
|
-
|
|
261
|
-
|
|
262
|
+
# Hard fail: tracing requested but cannot start session
|
|
263
|
+
raise
|
|
262
264
|
|
|
263
265
|
async def start_decision(self, turn_number: int) -> None:
|
|
264
266
|
self.current_turn = turn_number
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Utility classes for running swe-mini environments on Morph Cloud."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
import os
|
|
7
|
+
import shlex
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any, Dict
|
|
11
|
+
|
|
12
|
+
_IMPORT_ERROR: Exception | None = None
|
|
13
|
+
|
|
14
|
+
try: # pragma: no cover - optional dependency
|
|
15
|
+
from morphcloud.api import MorphCloudClient
|
|
16
|
+
except Exception as exc: # pragma: no cover - optional dependency
|
|
17
|
+
MorphCloudClient = None # type: ignore[assignment]
|
|
18
|
+
_IMPORT_ERROR = exc
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _quote_env_var(key: str, value: str) -> str:
|
|
22
|
+
"""Return a safe shell export statement."""
|
|
23
|
+
return f"export {key}={shlex.quote(value)}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _now() -> float:
|
|
27
|
+
return time.time()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class MorphSandboxBackend:
|
|
32
|
+
"""Thin wrapper around Morph Cloud instances for command execution.
|
|
33
|
+
|
|
34
|
+
The API mirrors the subset consumed by :class:`MiniSweEnvironmentWrapper`:
|
|
35
|
+
we expose an ``execute`` method that matches the mini-swe environment shape.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
snapshot_id: str | None = None
|
|
39
|
+
image_id: str | None = None
|
|
40
|
+
cwd: str = "/workspace"
|
|
41
|
+
env: Dict[str, str] | None = None
|
|
42
|
+
metadata: Dict[str, str] | None = None
|
|
43
|
+
vcpus: int = 4
|
|
44
|
+
memory_mb: int = 8192
|
|
45
|
+
disk_mb: int = 65536
|
|
46
|
+
startup_timeout: int = 600
|
|
47
|
+
|
|
48
|
+
_client: MorphCloudClient = field(init=False)
|
|
49
|
+
_instance: Any = field(init=False, default=None)
|
|
50
|
+
_last_exec: Dict[str, Any] = field(init=False, default_factory=dict)
|
|
51
|
+
_started_at: float | None = field(init=False, default=None)
|
|
52
|
+
|
|
53
|
+
def __post_init__(self) -> None:
|
|
54
|
+
if MorphCloudClient is None: # pragma: no cover - optional dependency
|
|
55
|
+
raise RuntimeError(
|
|
56
|
+
"morphcloud package is required for Morph environments. "
|
|
57
|
+
"Install with `pip install morphcloud`."
|
|
58
|
+
) from _IMPORT_ERROR
|
|
59
|
+
|
|
60
|
+
api_key = os.getenv("MORPH_API_KEY", "")
|
|
61
|
+
if not api_key:
|
|
62
|
+
raise RuntimeError("Set MORPH_API_KEY before using the Morph backend.")
|
|
63
|
+
|
|
64
|
+
# Normalise metadata/env early to avoid shared references.
|
|
65
|
+
self.metadata = {str(k): str(v) for k, v in (self.metadata or {}).items()}
|
|
66
|
+
self.env = {str(k): str(v) for k, v in (self.env or {}).items()}
|
|
67
|
+
self.cwd = self.cwd or "/workspace"
|
|
68
|
+
|
|
69
|
+
self._client = MorphCloudClient()
|
|
70
|
+
|
|
71
|
+
# Public API -----------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
def execute(self, command: str, timeout: int | None = None) -> Dict[str, Any]:
|
|
74
|
+
"""Execute ``command`` inside the Morph instance."""
|
|
75
|
+
if not command.strip():
|
|
76
|
+
command = "true"
|
|
77
|
+
|
|
78
|
+
instance = self._ensure_instance()
|
|
79
|
+
|
|
80
|
+
script_parts = []
|
|
81
|
+
for key, value in self.env.items():
|
|
82
|
+
script_parts.append(_quote_env_var(key, value))
|
|
83
|
+
if self.cwd:
|
|
84
|
+
script_parts.append(f"cd {shlex.quote(self.cwd)}")
|
|
85
|
+
script_parts.append(command)
|
|
86
|
+
|
|
87
|
+
script = " && ".join(script_parts)
|
|
88
|
+
if timeout:
|
|
89
|
+
wrapped = f"timeout {int(timeout)}s bash -lc {shlex.quote(script)}"
|
|
90
|
+
else:
|
|
91
|
+
wrapped = script
|
|
92
|
+
|
|
93
|
+
shell_cmd = f"bash -lc {shlex.quote(wrapped)}"
|
|
94
|
+
started = _now()
|
|
95
|
+
result = instance.exec(shell_cmd)
|
|
96
|
+
duration = _now() - started
|
|
97
|
+
|
|
98
|
+
payload = {
|
|
99
|
+
"output": (result.stdout or ""),
|
|
100
|
+
"stderr": (result.stderr or ""),
|
|
101
|
+
"returncode": getattr(result, "exit_code", None),
|
|
102
|
+
"duration": duration,
|
|
103
|
+
}
|
|
104
|
+
self._last_exec = payload
|
|
105
|
+
return payload
|
|
106
|
+
|
|
107
|
+
def close(self) -> None:
|
|
108
|
+
"""Stops the Morph instance if one is running."""
|
|
109
|
+
instance = getattr(self, "_instance", None)
|
|
110
|
+
if not instance:
|
|
111
|
+
return
|
|
112
|
+
try:
|
|
113
|
+
instance.stop()
|
|
114
|
+
except Exception: # pragma: no cover - best-effort shutdown
|
|
115
|
+
pass
|
|
116
|
+
finally:
|
|
117
|
+
self._instance = None
|
|
118
|
+
|
|
119
|
+
# Internal helpers -----------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
def _ensure_instance(self):
|
|
122
|
+
instance = getattr(self, "_instance", None)
|
|
123
|
+
if instance is not None:
|
|
124
|
+
return instance
|
|
125
|
+
|
|
126
|
+
snapshot_id = (
|
|
127
|
+
self.snapshot_id
|
|
128
|
+
or os.getenv("SWE_MINI_MORPH_SNAPSHOT_ID")
|
|
129
|
+
or os.getenv("MORPH_SNAPSHOT_ID")
|
|
130
|
+
)
|
|
131
|
+
metadata = dict(self.metadata)
|
|
132
|
+
|
|
133
|
+
if snapshot_id:
|
|
134
|
+
instance = self._client.instances.start(snapshot_id=snapshot_id, metadata=metadata or None)
|
|
135
|
+
else:
|
|
136
|
+
image_id = (
|
|
137
|
+
self.image_id
|
|
138
|
+
or os.getenv("SWE_MINI_MORPH_IMAGE_ID")
|
|
139
|
+
or os.getenv("MORPH_IMAGE_ID")
|
|
140
|
+
or "morphvm-minimal"
|
|
141
|
+
)
|
|
142
|
+
snapshot = self._client.snapshots.create(
|
|
143
|
+
image_id=image_id,
|
|
144
|
+
vcpus=self.vcpus,
|
|
145
|
+
memory=self.memory_mb,
|
|
146
|
+
disk_size=self.disk_mb,
|
|
147
|
+
)
|
|
148
|
+
instance = self._client.instances.start(snapshot_id=snapshot.id, metadata=metadata or None)
|
|
149
|
+
self.snapshot_id = snapshot.id
|
|
150
|
+
|
|
151
|
+
self._instance = instance
|
|
152
|
+
self._started_at = _now()
|
|
153
|
+
self._wait_until_ready(instance)
|
|
154
|
+
self._ensure_cwd(instance)
|
|
155
|
+
return instance
|
|
156
|
+
|
|
157
|
+
def _wait_until_ready(self, instance) -> None:
|
|
158
|
+
deadline = _now() + float(self.startup_timeout)
|
|
159
|
+
while True:
|
|
160
|
+
try:
|
|
161
|
+
instance.wait_until_ready()
|
|
162
|
+
break
|
|
163
|
+
except Exception as exc: # pragma: no cover - SDK may raise while polling
|
|
164
|
+
if _now() > deadline:
|
|
165
|
+
raise TimeoutError(f"Morph instance did not become ready within {self.startup_timeout}s") from exc
|
|
166
|
+
time.sleep(5.0)
|
|
167
|
+
|
|
168
|
+
def _ensure_cwd(self, instance) -> None:
|
|
169
|
+
if not self.cwd:
|
|
170
|
+
return
|
|
171
|
+
try:
|
|
172
|
+
instance.exec(f"bash -lc {shlex.quote(f'mkdir -p {self.cwd}')}")
|
|
173
|
+
except Exception as exc: # pragma: no cover - surface friendly error
|
|
174
|
+
raise RuntimeError(f"Failed to create remote workspace {self.cwd!r}: {exc}") from exc
|
|
175
|
+
|
|
176
|
+
def __del__(self) -> None: # pragma: no cover - defensive cleanup
|
|
177
|
+
with contextlib.suppress(Exception):
|
|
178
|
+
self.close()
|