synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -1
- examples/swe/task_app/grpo_swe_mini.py +55 -26
- examples/swe/task_app/hosted/rollout.py +40 -0
- examples/swe/task_app/hosted/test_service.py +5 -6
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/__init__.py +0 -0
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/task_app/__init__.py +2 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +18 -13
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +25 -3
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +10 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/math/__init__.py +0 -0
- examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/README.md +357 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
- examples/task_apps/pokemon_red/task_app.py +606 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
- examples/task_apps/sokoban/README.md +307 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/vlm/crafter_openai_vlm_agent.py +4 -4
- examples/vlm/run_crafter_vlm_benchmark.py +4 -4
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- synth_ai/__init__.py +2 -2
- synth_ai/api/train/builders.py +25 -11
- synth_ai/api/train/cli.py +12 -6
- synth_ai/api/train/configs/__init__.py +10 -10
- synth_ai/api/train/configs/rl.py +5 -4
- synth_ai/api/train/configs/sft.py +4 -3
- synth_ai/api/train/env_resolver.py +5 -2
- synth_ai/api/train/supported_algos.py +10 -5
- synth_ai/api/train/utils.py +7 -4
- synth_ai/cli/__init__.py +7 -51
- synth_ai/cli/_storage.py +4 -3
- synth_ai/cli/_validate_task_app.py +11 -0
- synth_ai/cli/balance.py +4 -3
- synth_ai/cli/calc.py +2 -2
- synth_ai/cli/demo.py +14 -7
- synth_ai/cli/legacy_root_backup.py +1 -1
- synth_ai/cli/rl_demo.py +8 -7
- synth_ai/cli/root.py +0 -97
- synth_ai/cli/task_apps.py +1707 -186
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +27 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +60 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +30 -4
- synth_ai/evals/client.py +58 -61
- synth_ai/jobs/client.py +16 -4
- synth_ai/judge_schemas.py +16 -16
- synth_ai/py.typed +0 -0
- synth_ai/task/__init__.py +14 -5
- synth_ai/task/contracts.py +124 -38
- synth_ai/task/proxy.py +48 -56
- synth_ai/task/rubrics/__init__.py +53 -0
- synth_ai/task/rubrics/loaders.py +133 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +113 -0
- synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
- synth_ai/task/server.py +8 -7
- synth_ai/task/validators.py +269 -6
- synth_ai/tracing_v3/decorators.py +7 -3
- synth_ai/tracing_v3/replica_sync.py +4 -4
- synth_ai/tracing_v3/serialization.py +5 -5
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/native_manager.py +3 -3
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +214 -101
- examples/agora_ex/README_MoE.md +0 -224
- examples/agora_ex/__init__.py +0 -7
- examples/agora_ex/agora_ex.py +0 -65
- examples/agora_ex/agora_ex_task_app.py +0 -590
- examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
- examples/agora_ex/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/system_prompt_CURRENT.md +0 -63
- examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
- examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
- synth_ai/rubrics/__init__.py +0 -22
- synth_ai/task/rubrics.py +0 -219
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
- /examples/{rl/task_app → task_apps/math}/README.md +0 -0
- /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
- /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py
RENAMED
|
@@ -44,6 +44,7 @@ class CrafterPolicy(Policy):
|
|
|
44
44
|
self.inference_url = inference_url
|
|
45
45
|
self.model = model
|
|
46
46
|
self.use_tools = True
|
|
47
|
+
self.use_vision = False # Enable vision for VLMs
|
|
47
48
|
# Sampling parameters (populated via initialize(config))
|
|
48
49
|
self.temperature: float | None = None
|
|
49
50
|
self.top_p: float | None = None
|
|
@@ -63,6 +64,11 @@ class CrafterPolicy(Policy):
|
|
|
63
64
|
self.model = config["model"]
|
|
64
65
|
if "use_tools" in config:
|
|
65
66
|
self.use_tools = bool(config["use_tools"])
|
|
67
|
+
if "use_vision" in config:
|
|
68
|
+
self.use_vision = bool(config["use_vision"])
|
|
69
|
+
# Auto-detect vision capability from model name if not explicitly set
|
|
70
|
+
if "use_vision" not in config and self.model:
|
|
71
|
+
self.use_vision = self._is_vision_model(self.model)
|
|
66
72
|
# Adopt sampling params from policy config (trainer passes these through)
|
|
67
73
|
if "temperature" in config:
|
|
68
74
|
self.temperature = float(config["temperature"]) # fail fast on bad types
|
|
@@ -384,6 +390,7 @@ class CrafterPolicy(Policy):
|
|
|
384
390
|
"inference_url": self.inference_url,
|
|
385
391
|
"model": self.model,
|
|
386
392
|
"use_tools": self.use_tools,
|
|
393
|
+
"use_vision": self.use_vision,
|
|
387
394
|
},
|
|
388
395
|
"state": self.state_dict(),
|
|
389
396
|
}
|
|
@@ -396,7 +403,8 @@ class CrafterPolicy(Policy):
|
|
|
396
403
|
inference_url=config["inference_url"],
|
|
397
404
|
model=config.get("model"),
|
|
398
405
|
)
|
|
399
|
-
policy.use_tools = bool(config
|
|
406
|
+
policy.use_tools = bool(config.get("use_tools", True))
|
|
407
|
+
policy.use_vision = bool(config.get("use_vision", False))
|
|
400
408
|
policy.load_state_dict(state)
|
|
401
409
|
return policy
|
|
402
410
|
|
|
@@ -446,12 +454,60 @@ class CrafterPolicy(Policy):
|
|
|
446
454
|
|
|
447
455
|
return format_observation(obs_data, step_count=step_idx, max_steps=max_steps)
|
|
448
456
|
|
|
457
|
+
@staticmethod
|
|
458
|
+
def _is_vision_model(model_name: str) -> bool:
|
|
459
|
+
"""Check if a model supports vision/image inputs based on its name."""
|
|
460
|
+
if not model_name:
|
|
461
|
+
return False
|
|
462
|
+
|
|
463
|
+
model_lower = model_name.lower()
|
|
464
|
+
|
|
465
|
+
# Known vision-capable model patterns
|
|
466
|
+
vision_patterns = [
|
|
467
|
+
"gpt-4o", # GPT-4o series
|
|
468
|
+
"gpt-4-turbo", # GPT-4 Turbo with vision
|
|
469
|
+
"gpt-4-vision", # Explicit vision variant
|
|
470
|
+
"gpt-5", # GPT-5 series (all variants support vision)
|
|
471
|
+
"claude-3", # All Claude 3 models support vision
|
|
472
|
+
"gemini", # Gemini models
|
|
473
|
+
"qwen-vl", # Qwen Vision-Language models
|
|
474
|
+
"qwen2-vl", # Qwen2 VL
|
|
475
|
+
"pixtral", # Mistral's vision model
|
|
476
|
+
"llava", # LLaVA models
|
|
477
|
+
"phi-3-vision", # Microsoft Phi-3 Vision
|
|
478
|
+
"internvl", # InternVL models
|
|
479
|
+
"cogvlm", # CogVLM models
|
|
480
|
+
"vision", # Generic vision indicator
|
|
481
|
+
]
|
|
482
|
+
|
|
483
|
+
return any(pattern in model_lower for pattern in vision_patterns)
|
|
484
|
+
|
|
449
485
|
def _extract_image_parts(
|
|
450
486
|
self, observation: dict[str, Any] | None
|
|
451
487
|
) -> list[dict[str, Any]]:
|
|
452
|
-
"""
|
|
453
|
-
|
|
454
|
-
|
|
488
|
+
"""Extract image parts from crafter observation for vision-capable models.
|
|
489
|
+
|
|
490
|
+
Returns OpenAI-style image_url format if vision is enabled and image data is available.
|
|
491
|
+
"""
|
|
492
|
+
# Only extract images if vision is enabled for this policy
|
|
493
|
+
if not self.use_vision:
|
|
494
|
+
return []
|
|
495
|
+
|
|
496
|
+
if not observation:
|
|
497
|
+
return []
|
|
498
|
+
|
|
499
|
+
# Get the observation data (could be nested)
|
|
500
|
+
obs = observation.get("observation", observation)
|
|
501
|
+
if not isinstance(obs, dict):
|
|
502
|
+
return []
|
|
503
|
+
|
|
504
|
+
# Extract the data URL (includes base64-encoded image)
|
|
505
|
+
data_url = obs.get("observation_image_data_url")
|
|
506
|
+
if not data_url or not isinstance(data_url, str):
|
|
507
|
+
return []
|
|
508
|
+
|
|
509
|
+
# Return OpenAI-style image_url format
|
|
510
|
+
return [{"type": "image_url", "image_url": {"url": data_url}}]
|
|
455
511
|
|
|
456
512
|
def parse_model_response(
|
|
457
513
|
self, response: str, observation: dict[str, Any]
|
|
@@ -97,10 +97,32 @@ async def create_policy(
|
|
|
97
97
|
|
|
98
98
|
# Set defaults from TaskApp / environment if not provided
|
|
99
99
|
config = dict(request.config or {})
|
|
100
|
+
provider_raw = config.get("provider") or config.get("vendor")
|
|
101
|
+
provider = str(provider_raw).strip().lower() if provider_raw else None
|
|
102
|
+
|
|
103
|
+
# Resolve base URL for proxy endpoints (strip trailing slash)
|
|
104
|
+
base_url = str(req.base_url).rstrip("/")
|
|
105
|
+
|
|
106
|
+
if provider == "groq":
|
|
107
|
+
# Route through in-app Groq proxy by default
|
|
108
|
+
config.setdefault("inference_url", f"{base_url}/proxy/groq")
|
|
109
|
+
# Default to a recent Groq-hosted Qwen unless caller overrides
|
|
110
|
+
preferred_model = "qwen/qwen3-32b"
|
|
111
|
+
config.setdefault("model", preferred_model)
|
|
112
|
+
# Groq Qwen defaults tuned for deterministic tool use
|
|
113
|
+
config.setdefault("temperature", 0.0)
|
|
114
|
+
config.setdefault("top_p", 0.95)
|
|
115
|
+
config.setdefault("max_tokens", 256)
|
|
116
|
+
# Avoid leaking provider in downstream policy if unset
|
|
117
|
+
config["provider"] = "groq"
|
|
118
|
+
elif provider == "openai":
|
|
119
|
+
config.setdefault("inference_url", f"{base_url}/proxy")
|
|
120
|
+
config["provider"] = "openai"
|
|
121
|
+
|
|
100
122
|
if "inference_url" not in config and task_app is not None:
|
|
101
|
-
|
|
102
|
-
if
|
|
103
|
-
config["inference_url"] =
|
|
123
|
+
task_base_url = getattr(task_app, "vllm_base_url", None)
|
|
124
|
+
if task_base_url:
|
|
125
|
+
config["inference_url"] = task_base_url
|
|
104
126
|
if "model" not in config and task_app is not None:
|
|
105
127
|
default_model = getattr(task_app, "default_model", None)
|
|
106
128
|
if default_model:
|
|
@@ -1843,12 +1843,22 @@ async def execute_rollout(
|
|
|
1843
1843
|
timing_final.setdefault("overhead_ms", 0.0)
|
|
1844
1844
|
|
|
1845
1845
|
# Build trajectory
|
|
1846
|
+
# Extract inference_url from policy meta
|
|
1847
|
+
inference_url = None
|
|
1848
|
+
if policy_handle is not None:
|
|
1849
|
+
try:
|
|
1850
|
+
policy_snapshot = policy_handle.snapshot()
|
|
1851
|
+
inference_url = policy_snapshot.get("config", {}).get("inference_url")
|
|
1852
|
+
except Exception:
|
|
1853
|
+
pass
|
|
1854
|
+
|
|
1846
1855
|
trajectory = RolloutTrajectory(
|
|
1847
1856
|
env_id=env_id,
|
|
1848
1857
|
policy_id=policy_id,
|
|
1849
1858
|
steps=trajectory_steps,
|
|
1850
1859
|
final={"observation": _summarize_observation_for_storage(env_handle, current_obs)},
|
|
1851
1860
|
length=len(trajectory_steps),
|
|
1861
|
+
inference_url=inference_url, # NEW: Required for trace correlation
|
|
1852
1862
|
decision_samples=decision_samples if step_rewards_active else None,
|
|
1853
1863
|
)
|
|
1854
1864
|
|
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Simple test script for the GRPO Synth Envs Hosted Service.
|
|
4
|
-
|
|
5
|
-
Run this after starting the service with:
|
|
6
|
-
python main.py
|
|
7
|
-
"""
|
|
2
|
+
"""Manual smoke script for the GRPO Synth Envs Hosted Service."""
|
|
8
3
|
|
|
9
4
|
import asyncio
|
|
10
5
|
import json
|
|
11
6
|
|
|
12
7
|
import httpx
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
pytestmark = pytest.mark.skip(reason="Requires running hosted service on localhost:8000")
|
|
13
12
|
|
|
14
13
|
|
|
15
14
|
async def test_service():
|