synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/baseline/banking77_baseline.py +204 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +6 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -38
- examples/swe/task_app/hosted/policy_routes.py +17 -0
- examples/swe/task_app/hosted/rollout.py +4 -2
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +841 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +288 -39
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
- synth_ai/api/train/builders.py +99 -4
- synth_ai/api/train/cli.py +516 -26
- synth_ai/api/train/config_finder.py +13 -2
- synth_ai/api/train/configs/__init__.py +23 -2
- synth_ai/api/train/configs/prompt_learning.py +442 -0
- synth_ai/api/train/configs/rl.py +61 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/task_app.py +1 -1
- synth_ai/api/train/validators.py +277 -0
- synth_ai/auth/credentials.py +119 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cli/__init__.py +94 -18
- synth_ai/cli/__main__.py +0 -0
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +84 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1436 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +30 -158
- synth_ai/cli/deploy/__init__.py +43 -0
- synth_ai/cli/deploy.py +162 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +14 -8
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +107 -0
- synth_ai/cli/root.py +9 -5
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +51 -1480
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -10
- synth_ai/learning/__init__.py +10 -0
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +184 -0
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/pricing/__init__.py +2 -0
- synth_ai/pricing/model_pricing.py +57 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +518 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +2 -0
- synth_ai/task/tracing_utils.py +25 -25
- synth_ai/task/validators.py +45 -9
- synth_ai/task_app_cfgs.py +21 -0
- synth_ai/tracing_v3/config.py +162 -19
- synth_ai/tracing_v3/constants.py +1 -1
- synth_ai/tracing_v3/db_config.py +24 -38
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/storage/config.py +47 -13
- synth_ai/tracing_v3/storage/factory.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +113 -11
- synth_ai/tracing_v3/turso/native_manager.py +92 -16
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +30 -1
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/cli.py +149 -5
- synth_ai/utils/env.py +40 -33
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/modal.py +285 -3
- synth_ai/utils/paths.py +48 -0
- synth_ai/utils/uvicorn.py +113 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
|
@@ -12,7 +12,7 @@ from synth_ai.environments.examples.red.taskset import INSTANCE as RED_DEFAULT_I
|
|
|
12
12
|
from synth_ai.environments.examples.red.engine_helpers.reward_library.pallet_town_progression import (
|
|
13
13
|
PalletTownProgressionCompositeReward,
|
|
14
14
|
)
|
|
15
|
-
from synth_ai.task.apps import TaskAppEntry, register_task_app
|
|
15
|
+
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
16
16
|
from synth_ai.task.contracts import (
|
|
17
17
|
RolloutMetrics,
|
|
18
18
|
RolloutRequest,
|
|
@@ -29,6 +29,8 @@ from synth_ai.task.tracing_utils import (
|
|
|
29
29
|
tracing_env_enabled,
|
|
30
30
|
)
|
|
31
31
|
from synth_ai.tracing_v3.session_tracer import SessionTracer
|
|
32
|
+
from synth_ai.tracing_v3.abstractions import EnvironmentEvent, TimeRecord
|
|
33
|
+
from datetime import datetime, UTC
|
|
32
34
|
|
|
33
35
|
logger = logging.getLogger(__name__)
|
|
34
36
|
|
|
@@ -260,8 +262,14 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
260
262
|
{
|
|
261
263
|
"role": "system",
|
|
262
264
|
"content": (
|
|
263
|
-
"You are controlling Pokémon Red.
|
|
264
|
-
"
|
|
265
|
+
"You are controlling Pokémon Red, a classic Game Boy game. You can see the game screen in the images provided. "
|
|
266
|
+
"Your goal is to make progress in the game. "
|
|
267
|
+
"IMPORTANT: Always use the 'execute_sequence' tool to submit 5-10 actions per call. "
|
|
268
|
+
"Do not reason about which tool to use - execute_sequence is the only tool available. "
|
|
269
|
+
"Choose appropriate button presses based on what you see in the game screen. "
|
|
270
|
+
"Plan 5-10 actions ahead to play efficiently. "
|
|
271
|
+
"CRITICAL: If stuck in a text box (text_box_active=True), try pressing B button first, then try A. "
|
|
272
|
+
"Always respond with exactly one tool call containing 5-10 actions."
|
|
265
273
|
),
|
|
266
274
|
},
|
|
267
275
|
{
|
|
@@ -277,7 +285,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
277
285
|
"type": "function",
|
|
278
286
|
"function": {
|
|
279
287
|
"name": "execute_sequence",
|
|
280
|
-
"description": "Execute multiple button presses in sequence. More efficient than separate calls.
|
|
288
|
+
"description": "Execute multiple button presses in sequence. More efficient than separate calls. ALWAYS use this tool. Plan 5-10 actions ahead to play efficiently.",
|
|
281
289
|
"parameters": {
|
|
282
290
|
"type": "object",
|
|
283
291
|
"properties": {
|
|
@@ -300,31 +308,15 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
300
308
|
},
|
|
301
309
|
"required": ["button", "frames"]
|
|
302
310
|
},
|
|
303
|
-
"minItems":
|
|
304
|
-
"maxItems":
|
|
305
|
-
"description": "Sequence of button presses to execute"
|
|
311
|
+
"minItems": 5,
|
|
312
|
+
"maxItems": 10,
|
|
313
|
+
"description": "Sequence of 5-10 button presses to execute. Plan ahead to navigate efficiently."
|
|
306
314
|
}
|
|
307
315
|
},
|
|
308
316
|
"required": ["actions"],
|
|
309
317
|
"additionalProperties": False,
|
|
310
318
|
},
|
|
311
319
|
},
|
|
312
|
-
},
|
|
313
|
-
{
|
|
314
|
-
"type": "function",
|
|
315
|
-
"function": {
|
|
316
|
-
"name": "press_button",
|
|
317
|
-
"description": "Press a single Game Boy button for N frames (use execute_sequence for multiple actions)",
|
|
318
|
-
"parameters": {
|
|
319
|
-
"type": "object",
|
|
320
|
-
"properties": {
|
|
321
|
-
"button": {"type": "string", "enum": ["UP", "DOWN", "LEFT", "RIGHT", "A", "B", "START", "SELECT"]},
|
|
322
|
-
"frames": {"type": "integer", "minimum": 1, "maximum": 120},
|
|
323
|
-
},
|
|
324
|
-
"required": ["button"],
|
|
325
|
-
"additionalProperties": False,
|
|
326
|
-
},
|
|
327
|
-
},
|
|
328
320
|
}
|
|
329
321
|
],
|
|
330
322
|
"tool_choice": {"type": "function", "function": {"name": "execute_sequence"}},
|
|
@@ -350,35 +342,154 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
350
342
|
if "api.openai.com" in inference_url and not inference_url.endswith("/chat/completions"):
|
|
351
343
|
inference_url = inference_url + "/v1/chat/completions"
|
|
352
344
|
|
|
345
|
+
# Debug: print exact payload being sent
|
|
346
|
+
import json as _json_debug
|
|
347
|
+
print(f"\n{'='*80}")
|
|
348
|
+
print(f"[pokemon_red] INFERENCE REQUEST DEBUG")
|
|
349
|
+
print(f"{'='*80}")
|
|
350
|
+
print(f"Inference URL: {inference_url}")
|
|
351
|
+
print(f"Payload keys: {list(payload.keys())}")
|
|
352
|
+
print(f"Payload (formatted):")
|
|
353
|
+
print(_json_debug.dumps(payload, indent=2)[:2000])
|
|
354
|
+
print(f"{'='*80}\n")
|
|
355
|
+
|
|
356
|
+
|
|
353
357
|
if is_external:
|
|
354
358
|
# External API: use direct HTTP client with auth header
|
|
355
359
|
headers = {}
|
|
360
|
+
import os
|
|
356
361
|
if "api.openai.com" in inference_url:
|
|
357
|
-
import os
|
|
358
362
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
359
363
|
if api_key:
|
|
360
364
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
365
|
+
elif "modal.run" in inference_url or "synth" in inference_url.lower():
|
|
366
|
+
# Synth API: use SYNTH_API_KEY
|
|
367
|
+
api_key = os.getenv("SYNTH_API_KEY")
|
|
368
|
+
if api_key:
|
|
369
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
370
|
+
print(f"[pokemon_red] Using Synth API auth: {'Bearer ' + api_key[:10] + '...' if api_key else 'NONE'}")
|
|
371
|
+
# For 30B-A3B models, require H200 (A100 doesn't have enough memory)
|
|
372
|
+
model_id = payload.get("model", "")
|
|
373
|
+
if "30B-A3B" in model_id or "A3B" in model_id:
|
|
374
|
+
headers["X-GPU-Preference"] = "H200"
|
|
375
|
+
print(f"[pokemon_red] Setting X-GPU-Preference: H200 (required for A3B MoE)")
|
|
361
376
|
|
|
362
|
-
async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
|
|
377
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(connect=30.0, read=1800.0, write=60.0, pool=60.0)) as client: # 30 min read timeout for cold starts
|
|
363
378
|
resp = await client.post(inference_url, json=payload, headers=headers)
|
|
364
379
|
else:
|
|
365
380
|
# Internal proxy: use local base_url
|
|
366
381
|
async with httpx.AsyncClient(
|
|
367
382
|
base_url="http://127.0.0.1:" + str(fastapi_request.url.port or 8913),
|
|
368
|
-
timeout=httpx.Timeout(60.0)
|
|
383
|
+
timeout=httpx.Timeout(connect=30.0, read=1800.0, write=60.0, pool=60.0) # 30 min read timeout for cold starts
|
|
369
384
|
) as client:
|
|
370
385
|
resp = await client.post(inference_url, json=payload)
|
|
371
386
|
|
|
372
387
|
resp.raise_for_status()
|
|
373
388
|
data = resp.json()
|
|
374
|
-
|
|
389
|
+
|
|
390
|
+
# Record user message (system + user)
|
|
391
|
+
if tracer_instance is not None:
|
|
392
|
+
try:
|
|
393
|
+
print(f"[pokemon_red] Recording messages: tracer_instance={tracer_instance is not None}", flush=True)
|
|
394
|
+
# Record system message
|
|
395
|
+
await tracer_instance.record_message(
|
|
396
|
+
content=messages[0].get("content", ""),
|
|
397
|
+
message_type="system",
|
|
398
|
+
)
|
|
399
|
+
# Record user message
|
|
400
|
+
user_msg_content = messages[1].get("content", "")
|
|
401
|
+
if isinstance(user_msg_content, list):
|
|
402
|
+
# For multimodal content, extract text summary
|
|
403
|
+
text_parts = [item.get("text", "") for item in user_msg_content if item.get("type") == "text"]
|
|
404
|
+
user_msg_content = " ".join(text_parts) if text_parts else str(user_msg_content)
|
|
405
|
+
await tracer_instance.record_message(
|
|
406
|
+
content=user_msg_content,
|
|
407
|
+
message_type="user",
|
|
408
|
+
)
|
|
409
|
+
print(f"[pokemon_red] Recorded user messages", flush=True)
|
|
410
|
+
except Exception as exc:
|
|
411
|
+
logger.debug(f"[pokemon_red] Failed to record user messages: {exc}")
|
|
412
|
+
print(f"[pokemon_red] ERROR recording user messages: {exc}", flush=True)
|
|
413
|
+
|
|
414
|
+
# Debug logging for tool calls
|
|
415
|
+
print(f"\n{'='*80}")
|
|
416
|
+
print(f"[pokemon_red] INFERENCE RESPONSE DEBUG")
|
|
417
|
+
print(f"{'='*80}")
|
|
418
|
+
print(f"Response status: {resp.status_code}")
|
|
419
|
+
print(f"Response keys: {list(data.keys())}")
|
|
375
420
|
choices = data.get("choices") or []
|
|
421
|
+
if choices:
|
|
422
|
+
message = choices[0].get("message") or {}
|
|
423
|
+
print(f"Message keys: {list(message.keys())}")
|
|
424
|
+
print(f"Message content preview: {str(message.get('content', ''))[:200]}")
|
|
425
|
+
print(f"Tool calls: {message.get('tool_calls', [])}")
|
|
426
|
+
print(f"Full message (formatted):")
|
|
427
|
+
print(_json_debug.dumps(message, indent=2)[:1500])
|
|
428
|
+
print(f"{'='*80}\n")
|
|
429
|
+
|
|
430
|
+
# Record assistant message/tool calls
|
|
431
|
+
if tracer_instance is not None:
|
|
432
|
+
try:
|
|
433
|
+
message = choices[0].get("message", {}) if choices else {}
|
|
434
|
+
tool_calls = message.get("tool_calls", [])
|
|
435
|
+
content = message.get("content", "")
|
|
436
|
+
|
|
437
|
+
if tool_calls:
|
|
438
|
+
# Record tool calls as assistant message
|
|
439
|
+
import json as _json_record
|
|
440
|
+
await tracer_instance.record_message(
|
|
441
|
+
content=_json_record.dumps(tool_calls) if tool_calls else (content or ""),
|
|
442
|
+
message_type="assistant",
|
|
443
|
+
metadata={"is_tool_call": True} if tool_calls else {},
|
|
444
|
+
)
|
|
445
|
+
elif content:
|
|
446
|
+
# Record text content as assistant message
|
|
447
|
+
await tracer_instance.record_message(
|
|
448
|
+
content=content,
|
|
449
|
+
message_type="assistant",
|
|
450
|
+
)
|
|
451
|
+
except Exception as exc:
|
|
452
|
+
logger.debug(f"[pokemon_red] Failed to record assistant message: {exc}")
|
|
453
|
+
|
|
454
|
+
# Extract first tool call
|
|
376
455
|
if not choices:
|
|
456
|
+
print("[pokemon_red] WARNING: No choices in inference response")
|
|
377
457
|
return {}
|
|
378
458
|
message = choices[0].get("message") or {}
|
|
379
459
|
raw_calls = message.get("tool_calls") or []
|
|
460
|
+
|
|
461
|
+
# If no structured tool_calls, try parsing XML tool calls from content
|
|
462
|
+
if not raw_calls:
|
|
463
|
+
content = message.get("content", "")
|
|
464
|
+
if content and "<tool_call>" in content:
|
|
465
|
+
import re as _re
|
|
466
|
+
import json as _json_parse
|
|
467
|
+
# Parse XML tool calls: <tool_call>{...}</tool_call>
|
|
468
|
+
xml_pattern = r'<tool_call>\s*({.*?})\s*</tool_call>'
|
|
469
|
+
matches = _re.findall(xml_pattern, content, _re.DOTALL)
|
|
470
|
+
if matches:
|
|
471
|
+
print(f"[pokemon_red] Parsed {len(matches)} XML tool call(s) from content")
|
|
472
|
+
try:
|
|
473
|
+
tool_data = _json_parse.loads(matches[0])
|
|
474
|
+
tool_name = tool_data.get("name", "")
|
|
475
|
+
args = tool_data.get("arguments", {})
|
|
476
|
+
|
|
477
|
+
print(f"[pokemon_red] Parsed tool: {tool_name}, args: {str(args)[:200]}")
|
|
478
|
+
|
|
479
|
+
# Handle execute_sequence tool
|
|
480
|
+
if tool_name == "execute_sequence":
|
|
481
|
+
return {"actions": args.get("actions", [])}
|
|
482
|
+
|
|
483
|
+
# Handle press_button tool (legacy single action)
|
|
484
|
+
if tool_name == "press_button":
|
|
485
|
+
return {"button": args.get("button"), "frames": int(args.get("frames") or 30)}
|
|
486
|
+
except Exception as parse_err:
|
|
487
|
+
print(f"[pokemon_red] Error parsing XML tool call: {parse_err}")
|
|
488
|
+
|
|
380
489
|
if not raw_calls:
|
|
490
|
+
print(f"[pokemon_red] WARNING: No tool_calls in response. Content: {message.get('content', '')[:200]}")
|
|
381
491
|
return {}
|
|
492
|
+
|
|
382
493
|
f = raw_calls[0].get("function") or {}
|
|
383
494
|
tool_name = f.get("name", "")
|
|
384
495
|
args = f.get("arguments")
|
|
@@ -437,6 +548,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
437
548
|
action_context = _build_action_context(prev_state, current_state)
|
|
438
549
|
step_reward = await reward_fn.score(current_state, action_context)
|
|
439
550
|
|
|
551
|
+
# Record environment event
|
|
552
|
+
if tracer_instance is not None:
|
|
553
|
+
try:
|
|
554
|
+
event = EnvironmentEvent(
|
|
555
|
+
system_instance_id="environment:pokemon_red",
|
|
556
|
+
time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
|
|
557
|
+
reward=step_reward,
|
|
558
|
+
terminated=False,
|
|
559
|
+
truncated=False,
|
|
560
|
+
system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
|
|
561
|
+
system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
|
|
562
|
+
metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
|
|
563
|
+
)
|
|
564
|
+
await tracer_instance.record_event(event)
|
|
565
|
+
except Exception as exc:
|
|
566
|
+
logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
|
|
567
|
+
|
|
440
568
|
sequence_reward += step_reward
|
|
441
569
|
sequence_tool_calls.append({"tool": "press_button", "args": {"button": button, "frames": frames}})
|
|
442
570
|
|
|
@@ -488,6 +616,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
488
616
|
current_state = dict(obs1) if isinstance(obs1, Mapping) else {}
|
|
489
617
|
action_context = _build_action_context(prev_state, current_state)
|
|
490
618
|
step_reward = await reward_fn.score(current_state, action_context)
|
|
619
|
+
|
|
620
|
+
# Record environment event
|
|
621
|
+
if tracer_instance is not None:
|
|
622
|
+
try:
|
|
623
|
+
event = EnvironmentEvent(
|
|
624
|
+
system_instance_id="environment:pokemon_red",
|
|
625
|
+
time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
|
|
626
|
+
reward=step_reward,
|
|
627
|
+
terminated=False,
|
|
628
|
+
truncated=False,
|
|
629
|
+
system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
|
|
630
|
+
system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
|
|
631
|
+
metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
|
|
632
|
+
)
|
|
633
|
+
await tracer_instance.record_event(event)
|
|
634
|
+
except Exception as exc:
|
|
635
|
+
logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
|
|
491
636
|
total_reward += step_reward
|
|
492
637
|
|
|
493
638
|
# Track reward components if non-zero
|
|
@@ -528,6 +673,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
528
673
|
# Attempt policy-driven step if policy.config present
|
|
529
674
|
policy_cfg = request.policy.config or {}
|
|
530
675
|
if policy_cfg:
|
|
676
|
+
print(f"[pokemon_red] Calling _call_inference: tracer_instance={tracer_instance is not None}", flush=True)
|
|
531
677
|
try:
|
|
532
678
|
action = await _call_inference(policy_cfg, final_obs if isinstance(final_obs, Mapping) else {})
|
|
533
679
|
|
|
@@ -546,6 +692,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
546
692
|
action_context = _build_action_context(prev_state, current_state)
|
|
547
693
|
step_reward = await reward_fn.score(current_state, action_context)
|
|
548
694
|
|
|
695
|
+
# Record environment event
|
|
696
|
+
if tracer_instance is not None:
|
|
697
|
+
try:
|
|
698
|
+
event = EnvironmentEvent(
|
|
699
|
+
system_instance_id="environment:pokemon_red",
|
|
700
|
+
time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
|
|
701
|
+
reward=step_reward,
|
|
702
|
+
terminated=False,
|
|
703
|
+
truncated=False,
|
|
704
|
+
system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
|
|
705
|
+
system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
|
|
706
|
+
metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
|
|
707
|
+
)
|
|
708
|
+
await tracer_instance.record_event(event)
|
|
709
|
+
except Exception as exc:
|
|
710
|
+
logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
|
|
711
|
+
|
|
549
712
|
sequence_reward += step_reward
|
|
550
713
|
sequence_tool_calls.append({"tool": "press_button", "args": {"button": button, "frames": frames}})
|
|
551
714
|
|
|
@@ -684,23 +847,58 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
684
847
|
# End session and get trace
|
|
685
848
|
session_trace = await tracer_instance.end_session()
|
|
686
849
|
|
|
687
|
-
# Build trace payload if requested
|
|
850
|
+
# Build trace payload if requested - ALWAYS use full format when return_trace=True
|
|
851
|
+
# This ensures markov_blanket_message_history is always included
|
|
688
852
|
record_config = getattr(request, 'record', None)
|
|
853
|
+
print(f"[pokemon_red] TRACE DEBUG: record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}, session_trace={session_trace is not None}", flush=True)
|
|
854
|
+
if session_trace:
|
|
855
|
+
print(f"[pokemon_red] TRACE DEBUG: IMMEDIATELY AFTER end_session: session_trace has {len(session_trace.markov_blanket_message_history)} messages, {len(session_trace.event_history)} events", flush=True)
|
|
856
|
+
print(f"[pokemon_red] TRACE DEBUG: session_trace.markov_blanket_message_history type: {type(session_trace.markov_blanket_message_history)}", flush=True)
|
|
857
|
+
if session_trace.markov_blanket_message_history:
|
|
858
|
+
print(f"[pokemon_red] TRACE DEBUG: First message type: {type(session_trace.markov_blanket_message_history[0])}, content: {str(session_trace.markov_blanket_message_history[0].content)[:100]}", flush=True)
|
|
859
|
+
else:
|
|
860
|
+
print(f"[pokemon_red] TRACE DEBUG: WARNING - markov_blanket_message_history is EMPTY RIGHT AFTER end_session!", flush=True)
|
|
861
|
+
|
|
689
862
|
if record_config and getattr(record_config, 'return_trace', False) and session_trace:
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
863
|
+
# Always return full trace with all messages and events (no compact format)
|
|
864
|
+
import dataclasses
|
|
865
|
+
trace_payload = session_trace.to_dict()
|
|
866
|
+
print(f"[pokemon_red] TRACE DEBUG: to_dict() returned keys: {list(trace_payload.keys())}", flush=True)
|
|
867
|
+
print(f"[pokemon_red] TRACE DEBUG: to_dict() markov_blanket_message_history length: {len(trace_payload.get('markov_blanket_message_history', []))}", flush=True)
|
|
868
|
+
|
|
869
|
+
# Always manually serialize messages and events to ensure they're included
|
|
870
|
+
# asdict() may not recursively serialize nested dataclasses correctly
|
|
871
|
+
from synth_ai.tracing_v3.abstractions import SessionEventMarkovBlanketMessage, BaseEvent
|
|
872
|
+
if session_trace.markov_blanket_message_history:
|
|
873
|
+
print(f"[pokemon_red] TRACE DEBUG: Manually serializing {len(session_trace.markov_blanket_message_history)} messages", flush=True)
|
|
874
|
+
trace_payload["markov_blanket_message_history"] = [
|
|
875
|
+
dataclasses.asdict(msg) if isinstance(msg, SessionEventMarkovBlanketMessage) else (msg if isinstance(msg, dict) else str(msg))
|
|
876
|
+
for msg in session_trace.markov_blanket_message_history
|
|
877
|
+
]
|
|
878
|
+
else:
|
|
879
|
+
print(f"[pokemon_red] TRACE DEBUG: WARNING - session_trace.markov_blanket_message_history is EMPTY!", flush=True)
|
|
880
|
+
if session_trace.event_history:
|
|
881
|
+
print(f"[pokemon_red] TRACE DEBUG: Manually serializing {len(session_trace.event_history)} events", flush=True)
|
|
882
|
+
trace_payload["event_history"] = [
|
|
883
|
+
dataclasses.asdict(evt) if isinstance(evt, BaseEvent) else (evt if isinstance(evt, dict) else str(evt))
|
|
884
|
+
for evt in session_trace.event_history
|
|
885
|
+
]
|
|
886
|
+
else:
|
|
887
|
+
print(f"[pokemon_red] TRACE DEBUG: WARNING - session_trace.event_history is EMPTY!", flush=True)
|
|
888
|
+
print(f"[pokemon_red] TRACE DEBUG: Final trace payload has {len(trace_payload.get('markov_blanket_message_history', []))} messages, {len(trace_payload.get('event_history', []))} events", flush=True)
|
|
889
|
+
print(f"[pokemon_red] TRACE DEBUG: Final trace payload keys: {list(trace_payload.keys())}", flush=True)
|
|
890
|
+
else:
|
|
891
|
+
print(f"[pokemon_red] TRACE DEBUG: SKIPPING trace payload build - record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}, session_trace={session_trace is not None}", flush=True)
|
|
698
892
|
except Exception as exc:
|
|
699
893
|
logger.warning(f"[pokemon_red] tracing finalization failed: {exc}")
|
|
894
|
+
print(f"[pokemon_red] TRACE DEBUG EXCEPTION: {exc}", flush=True)
|
|
895
|
+
import traceback
|
|
896
|
+
print(f"[pokemon_red] TRACE DEBUG EXCEPTION TRACEBACK: {traceback.format_exc()}", flush=True)
|
|
700
897
|
|
|
701
898
|
# Fallback trace payload if no tracer but CLI needs it
|
|
702
899
|
if trace_payload is None:
|
|
703
900
|
record_config = getattr(request, 'record', None)
|
|
901
|
+
print(f"[pokemon_red] TRACE DEBUG: trace_payload is None, using fallback. record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}", flush=True)
|
|
704
902
|
if record_config and getattr(record_config, 'return_trace', False):
|
|
705
903
|
trace_payload = {
|
|
706
904
|
"session_id": request.run_id,
|
|
@@ -718,8 +916,22 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
718
916
|
"num_events": len(steps),
|
|
719
917
|
"num_messages": len(steps) * 2,
|
|
720
918
|
}
|
|
919
|
+
print(f"[pokemon_red] TRACE DEBUG: Created fallback trace_payload with keys: {list(trace_payload.keys())}", flush=True)
|
|
920
|
+
|
|
921
|
+
print(f"[pokemon_red] TRACE DEBUG: About to return RolloutResponse with trace_payload={trace_payload is not None}, keys={list(trace_payload.keys()) if trace_payload else []}", flush=True)
|
|
922
|
+
if trace_payload:
|
|
923
|
+
import json as _json_final
|
|
924
|
+
markov_msgs = trace_payload.get('markov_blanket_message_history', [])
|
|
925
|
+
event_history = trace_payload.get('event_history', [])
|
|
926
|
+
print(f"[pokemon_red] TRACE DEBUG: trace_payload markov_blanket_message_history length: {len(markov_msgs)}", flush=True)
|
|
927
|
+
print(f"[pokemon_red] TRACE DEBUG: trace_payload event_history length: {len(event_history)}", flush=True)
|
|
928
|
+
if markov_msgs:
|
|
929
|
+
print(f"[pokemon_red] TRACE DEBUG: First markov message type: {type(markov_msgs[0]) if markov_msgs else None}", flush=True)
|
|
930
|
+
print(f"[pokemon_red] TRACE DEBUG: First markov message (first 500 chars): {_json_final.dumps(markov_msgs[0] if markov_msgs else {}, indent=2, default=str)[:500]}", flush=True)
|
|
931
|
+
else:
|
|
932
|
+
print(f"[pokemon_red] TRACE DEBUG: WARNING - markov_blanket_message_history is EMPTY in final trace_payload!", flush=True)
|
|
721
933
|
|
|
722
|
-
|
|
934
|
+
response = RolloutResponse(
|
|
723
935
|
run_id=request.run_id,
|
|
724
936
|
trajectories=[trajectory],
|
|
725
937
|
branches={},
|
|
@@ -728,6 +940,14 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
728
940
|
ops_executed=len(request.ops or []),
|
|
729
941
|
trace=trace_payload,
|
|
730
942
|
)
|
|
943
|
+
|
|
944
|
+
# Final check: inspect what's actually in the response
|
|
945
|
+
if response.trace:
|
|
946
|
+
import json as _json_response
|
|
947
|
+
resp_markov = response.trace.get('markov_blanket_message_history', []) if isinstance(response.trace, dict) else []
|
|
948
|
+
print(f"[pokemon_red] TRACE DEBUG: Response.trace markov_blanket_message_history length: {len(resp_markov)}", flush=True)
|
|
949
|
+
|
|
950
|
+
return response
|
|
731
951
|
|
|
732
952
|
|
|
733
953
|
def import_datetime():
|
|
@@ -788,11 +1008,40 @@ def build_config() -> TaskAppConfig:
|
|
|
788
1008
|
register_task_app(
|
|
789
1009
|
entry=TaskAppEntry(
|
|
790
1010
|
app_id="pokemon_red",
|
|
791
|
-
description="Pokémon Red demo task app",
|
|
1011
|
+
description="Pokémon Red demo task app with vision support",
|
|
792
1012
|
config_factory=build_config,
|
|
793
1013
|
aliases=("pokemon_red_demo",),
|
|
794
1014
|
env_files=(),
|
|
795
|
-
modal=
|
|
1015
|
+
modal=ModalDeploymentConfig(
|
|
1016
|
+
app_name="pokemon-red-vision-task-app",
|
|
1017
|
+
python_version="3.11",
|
|
1018
|
+
pip_packages=(
|
|
1019
|
+
"fastapi>=0.100.0",
|
|
1020
|
+
"uvicorn>=0.23.0",
|
|
1021
|
+
"pydantic>=2.0.0",
|
|
1022
|
+
"numpy>=1.24.0",
|
|
1023
|
+
"aiohttp>=3.8.0",
|
|
1024
|
+
"httpx>=0.24.0",
|
|
1025
|
+
"python-dotenv>=1.0.1",
|
|
1026
|
+
# Tracing/DB runtime deps
|
|
1027
|
+
"sqlalchemy>=2.0.42",
|
|
1028
|
+
"aiosqlite>=0.21.0",
|
|
1029
|
+
"greenlet>=3.2.3",
|
|
1030
|
+
# Pokemon Red environment
|
|
1031
|
+
"pyboy>=2.0.0",
|
|
1032
|
+
"pillow>=9.0.0",
|
|
1033
|
+
),
|
|
1034
|
+
extra_local_dirs=(
|
|
1035
|
+
# Mount repo root so local modules resolve when deployed on Modal
|
|
1036
|
+
("/Users/joshpurtell/Documents/GitHub/synth-ai", "/opt/synth_ai_repo"),
|
|
1037
|
+
("/Users/joshpurtell/Documents/GitHub/synth-ai/synth_ai", "/opt/synth_ai_repo/synth_ai"),
|
|
1038
|
+
("/Users/joshpurtell/Documents/GitHub/synth-ai/examples/task_apps/pokemon_red", "/opt/synth_ai_repo/examples/task_apps/pokemon_red"),
|
|
1039
|
+
),
|
|
1040
|
+
secret_names=("openai-api-key", "groq-api-key"),
|
|
1041
|
+
memory=16384,
|
|
1042
|
+
cpu=4.0,
|
|
1043
|
+
max_containers=10,
|
|
1044
|
+
),
|
|
796
1045
|
)
|
|
797
1046
|
)
|
|
798
1047
|
|
|
@@ -20,7 +20,7 @@ Sokoban is a classic puzzle game where the player must push boxes onto target lo
|
|
|
20
20
|
cd /path/to/synth-ai
|
|
21
21
|
|
|
22
22
|
# Start the Sokoban task app on port 8911
|
|
23
|
-
uvx synth-ai task-app
|
|
23
|
+
uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
|
|
24
24
|
```
|
|
25
25
|
|
|
26
26
|
The server will be available at `http://localhost:8911`.
|
|
@@ -283,7 +283,7 @@ lsof -i :8911
|
|
|
283
283
|
kill -9 $(lsof -ti :8911)
|
|
284
284
|
|
|
285
285
|
# Restart
|
|
286
|
-
uvx synth-ai task-app
|
|
286
|
+
uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
|
|
287
287
|
```
|
|
288
288
|
|
|
289
289
|
## Examples
|
|
@@ -304,4 +304,3 @@ To add new features:
|
|
|
304
304
|
## License
|
|
305
305
|
|
|
306
306
|
MIT
|
|
307
|
-
|
|
@@ -1,24 +1,22 @@
|
|
|
1
1
|
# Verilog Eval Config for Groq Qwen3-32B
|
|
2
|
-
# Quick eval to test Verilog task app before RL training
|
|
3
|
-
|
|
4
|
-
[task_app]
|
|
5
|
-
# Update this with your Modal URL after deployment
|
|
6
|
-
url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
|
|
2
|
+
# Quick eval to test the Verilog task app before RL training
|
|
7
3
|
|
|
8
4
|
[eval]
|
|
9
|
-
|
|
5
|
+
app_id = "grpo-verilog"
|
|
6
|
+
task_app_url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
|
|
7
|
+
model = "groq:qwen3-32b"
|
|
10
8
|
seeds = [0, 1, 2]
|
|
11
|
-
|
|
9
|
+
max_turns = 15
|
|
10
|
+
concurrency = 1
|
|
11
|
+
return_trace = true
|
|
12
|
+
trace_format = "structured"
|
|
13
|
+
|
|
14
|
+
[eval.env_config]
|
|
15
|
+
difficulty = "medium"
|
|
12
16
|
|
|
13
|
-
[
|
|
17
|
+
[eval.policy_config]
|
|
14
18
|
provider = "groq"
|
|
15
19
|
model = "qwen/qwen3-32b"
|
|
16
20
|
temperature = 0.2
|
|
17
21
|
max_tokens = 768
|
|
18
22
|
inference_url = "https://api.groq.com/openai/v1/chat/completions"
|
|
19
|
-
|
|
20
|
-
[env]
|
|
21
|
-
difficulty = "medium" # Can be "easy", "medium", or "hard"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Compatibility wrapper for the GRPO Verilog task app.
|
|
2
2
|
|
|
3
3
|
This mirrors the Crafter task app wrapper while delegating configuration to
|
|
4
|
-
`grpo_verilog.py`. Normal usage should prefer `uvx synth-ai
|
|
4
|
+
`grpo_verilog.py`. Normal usage should prefer `uvx synth-ai deploy --runtime uvicorn grpo-verilog`,
|
|
5
5
|
but the module remains for direct execution or importing the FastAPI app.
|
|
6
6
|
"""
|
|
7
7
|
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
# Crafter Full Finetune (FFT) example on H100
|
|
2
2
|
# Adjust paths and hyperparameters to your environment before running.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
[algorithm]
|
|
5
|
+
type = "offline"
|
|
6
|
+
method = "sft"
|
|
7
|
+
variety = "fft"
|
|
5
8
|
|
|
6
9
|
[job]
|
|
7
10
|
model = "Qwen/Qwen3-4B" # base model to finetune
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# RL training starting from base Qwen/Qwen3-4B (TOML-only model selection)
|
|
2
2
|
|
|
3
|
-
type = "rl"
|
|
4
|
-
|
|
5
3
|
[algorithm]
|
|
6
4
|
type = "online"
|
|
7
5
|
method = "policy_gradient"
|
|
@@ -40,6 +38,7 @@ health_interval_ms = 300
|
|
|
40
38
|
[model]
|
|
41
39
|
# Base model start
|
|
42
40
|
base = "Qwen/Qwen3-4B"
|
|
41
|
+
trainer_mode = "full"
|
|
43
42
|
label = "crafter-rl-from-base"
|
|
44
43
|
|
|
45
44
|
[rollout]
|
|
@@ -50,6 +49,7 @@ policy_name = "crafter-react"
|
|
|
50
49
|
max_concurrent_rollouts = 8
|
|
51
50
|
batches_per_step = 2
|
|
52
51
|
ops = ["agent", "env"]
|
|
52
|
+
task_app_origin_rewards_only = true
|
|
53
53
|
|
|
54
54
|
[evaluation]
|
|
55
55
|
# Run baseline evaluation over the first 100 seeds every 20 training iterations
|
|
@@ -62,6 +62,7 @@ seeds = [
|
|
|
62
62
|
[training]
|
|
63
63
|
num_epochs = 1
|
|
64
64
|
iterations_per_epoch = 10
|
|
65
|
+
max_turns = 10
|
|
65
66
|
batch_size = 16
|
|
66
67
|
group_size = 4
|
|
67
68
|
gradient_accumulation_steps = 1
|
|
@@ -448,7 +448,7 @@ async def main() -> None:
|
|
|
448
448
|
|
|
449
449
|
print(f"Ops executed: {ops}")
|
|
450
450
|
print(
|
|
451
|
-
"Tip: export TASKAPP_TRACING_ENABLED=1 and optionally TASKAPP_SFT_OUTPUT_DIR before running `uvx synth-ai
|
|
451
|
+
"Tip: export TASKAPP_TRACING_ENABLED=1 and optionally TASKAPP_SFT_OUTPUT_DIR before running `uvx synth-ai deploy --runtime uvicorn …` to persist traces/SFT."
|
|
452
452
|
)
|
|
453
453
|
except httpx.HTTPStatusError as exc:
|
|
454
454
|
detail = (
|
|
@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
|
|
|
6
6
|
|
|
7
7
|
## Local development
|
|
8
8
|
```bash
|
|
9
|
-
uvx synth-ai
|
|
9
|
+
uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
|
|
10
10
|
# Optional extras:
|
|
11
11
|
# --env-file path/to/.env # load additional environment variables
|
|
12
12
|
# --reload # enable uvicorn auto-reload
|