synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +190 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
- examples/qwen_coder/configs/coder_lora_small.toml +2 -1
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +154 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +275 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +423 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
- examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +62 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +1 -1
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +37 -0
- examples/rl/configs/rl_from_base_qwen17.toml +76 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +22 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/sft/README.md +5 -5
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
- examples/sft/evaluate.py +4 -4
- examples/sft/export_dataset.py +7 -4
- examples/sft/generate_traces.py +2 -0
- examples/swe/task_app/README.md +1 -1
- examples/swe/task_app/grpo_swe_mini.py +1 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +2 -8
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/__init__.py +3 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
- examples/task_apps/pokemon_red/task_app.py +199 -6
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/groq_test.py +2 -0
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/run_rollout_remote.py +2 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +145 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/cli.py +30 -7
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/cli/__init__.py +66 -49
- synth_ai/cli/_modal_wrapper.py +9 -6
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/recent.py +1 -0
- synth_ai/cli/setup.py +266 -0
- synth_ai/cli/task_app_deploy.py +16 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +16 -0
- synth_ai/cli/task_app_serve.py +18 -0
- synth_ai/cli/task_apps.py +392 -141
- synth_ai/cli/train.py +18 -0
- synth_ai/cli/tui.py +62 -0
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/verilog/engine.py +76 -10
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/inference/client.py +1 -1
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +1 -1
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/task/__init__.py +11 -1
- synth_ai/task/apps/__init__.py +5 -2
- synth_ai/task/config.py +259 -0
- synth_ai/task/contracts.py +15 -2
- synth_ai/task/rubrics/__init__.py +4 -2
- synth_ai/task/rubrics/loaders.py +27 -4
- synth_ai/task/rubrics/scoring.py +3 -0
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +145 -2
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/session_tracer.py +10 -0
- synth_ai/tracing_v3/turso/daemon.py +2 -2
- synth_ai/tracing_v3/turso/native_manager.py +108 -77
- synth_ai/tracing_v3/utils.py +1 -1
- synth_ai/tui/__init__.py +5 -0
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/cli/__init__.py +1 -0
- synth_ai/tui/cli/query_experiments.py +164 -0
- synth_ai/tui/cli/query_experiments_v3.py +164 -0
- synth_ai/tui/dashboard.py +911 -0
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +287 -0
- synth_ai/utils/http.py +169 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
- synth_ai/cli/man.py +0 -106
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/http.py +0 -26
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import logging
|
|
3
4
|
from typing import Any, Dict, Iterable, Mapping, Sequence
|
|
4
5
|
|
|
5
6
|
from fastapi import HTTPException, Request
|
|
@@ -21,6 +22,15 @@ from synth_ai.task.contracts import (
|
|
|
21
22
|
TaskInfo,
|
|
22
23
|
)
|
|
23
24
|
from synth_ai.task.server import ProxyConfig, TaskAppConfig
|
|
25
|
+
from synth_ai.task.tracing_utils import (
|
|
26
|
+
build_tracer_factory,
|
|
27
|
+
resolve_sft_output_dir,
|
|
28
|
+
resolve_tracing_db_url,
|
|
29
|
+
tracing_env_enabled,
|
|
30
|
+
)
|
|
31
|
+
from synth_ai.tracing_v3.session_tracer import SessionTracer
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
24
34
|
|
|
25
35
|
|
|
26
36
|
def _base_task_info() -> TaskInfo:
|
|
@@ -182,7 +192,70 @@ def _calculate_outcome_score(final_state: dict[str, Any], total_reward: float) -
|
|
|
182
192
|
|
|
183
193
|
|
|
184
194
|
async def rollout_executor(request: RolloutRequest, fastapi_request: Request) -> RolloutResponse:
|
|
195
|
+
# Initialize SessionTracer for this rollout
|
|
196
|
+
tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
|
|
197
|
+
tracer_instance: SessionTracer | None = None
|
|
198
|
+
if callable(tracer_factory):
|
|
199
|
+
try:
|
|
200
|
+
inst = tracer_factory()
|
|
201
|
+
tracer_instance = inst if isinstance(inst, SessionTracer) else None
|
|
202
|
+
except Exception as exc:
|
|
203
|
+
logger.debug(f"TRACER_FACTORY_FAIL: {exc}")
|
|
204
|
+
|
|
205
|
+
# Start tracing session
|
|
206
|
+
if tracer_instance is not None:
|
|
207
|
+
try:
|
|
208
|
+
await tracer_instance.initialize()
|
|
209
|
+
await tracer_instance.start_session(
|
|
210
|
+
session_id=request.run_id,
|
|
211
|
+
metadata={
|
|
212
|
+
"run_id": request.run_id,
|
|
213
|
+
"env_name": "pokemon_red",
|
|
214
|
+
"policy_name": request.policy.policy_name or "default",
|
|
215
|
+
"seed": request.env.seed,
|
|
216
|
+
}
|
|
217
|
+
)
|
|
218
|
+
logger.info(f"[pokemon_red] tracing enabled for run_id={request.run_id}")
|
|
219
|
+
except Exception as exc:
|
|
220
|
+
logger.warning(f"[pokemon_red] tracing init failed: {exc}")
|
|
221
|
+
tracer_instance = None
|
|
222
|
+
|
|
185
223
|
async def _call_inference(policy_cfg: Mapping[str, Any], observation: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
224
|
+
# Check if vision mode is enabled
|
|
225
|
+
use_vision = bool(policy_cfg.get("use_vision", False))
|
|
226
|
+
image_only_mode = bool(policy_cfg.get("image_only_mode", False))
|
|
227
|
+
|
|
228
|
+
# Build user message content
|
|
229
|
+
if use_vision and "observation_image_data_url" in observation:
|
|
230
|
+
# Extract image data URL
|
|
231
|
+
image_data_url = observation["observation_image_data_url"]
|
|
232
|
+
|
|
233
|
+
# Build state summary (text observation)
|
|
234
|
+
state_summary = "State summary: " + str({
|
|
235
|
+
k: observation.get(k)
|
|
236
|
+
for k in observation.keys()
|
|
237
|
+
if k not in ["error", "observation_image_base64", "observation_image_data_url",
|
|
238
|
+
"observation_image_format", "observation_image_width", "observation_image_height"]
|
|
239
|
+
})
|
|
240
|
+
|
|
241
|
+
# Image-only mode: only send image, no text
|
|
242
|
+
if image_only_mode:
|
|
243
|
+
user_content = [
|
|
244
|
+
{"type": "image_url", "image_url": {"url": image_data_url}}
|
|
245
|
+
]
|
|
246
|
+
else:
|
|
247
|
+
# Vision mode with text: send both text and image
|
|
248
|
+
user_content = [
|
|
249
|
+
{"type": "text", "text": state_summary},
|
|
250
|
+
{"type": "image_url", "image_url": {"url": image_data_url}}
|
|
251
|
+
]
|
|
252
|
+
else:
|
|
253
|
+
# Text-only mode (default)
|
|
254
|
+
state_summary = "State summary: " + str({
|
|
255
|
+
k: observation.get(k) for k in observation.keys() if k != "error"
|
|
256
|
+
})
|
|
257
|
+
user_content = state_summary
|
|
258
|
+
|
|
186
259
|
messages = [
|
|
187
260
|
{
|
|
188
261
|
"role": "system",
|
|
@@ -193,9 +266,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
193
266
|
},
|
|
194
267
|
{
|
|
195
268
|
"role": "user",
|
|
196
|
-
"content":
|
|
197
|
-
"State summary: " + str({k: observation.get(k) for k in observation.keys() if k != "error"})
|
|
198
|
-
),
|
|
269
|
+
"content": user_content,
|
|
199
270
|
},
|
|
200
271
|
]
|
|
201
272
|
payload = {
|
|
@@ -262,6 +333,10 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
262
333
|
"max_tokens": int(policy_cfg.get("max_tokens") or 500),
|
|
263
334
|
}
|
|
264
335
|
inference_url = str(policy_cfg.get("inference_url") or "").rstrip("/")
|
|
336
|
+
|
|
337
|
+
# Determine if this is an external URL or internal proxy
|
|
338
|
+
is_external = inference_url.startswith("http://") or inference_url.startswith("https://")
|
|
339
|
+
|
|
265
340
|
if not inference_url:
|
|
266
341
|
# Prefer built-in proxy endpoints from app if no external URL
|
|
267
342
|
provider = (policy_cfg.get("provider") or "").lower()
|
|
@@ -269,8 +344,31 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
269
344
|
inference_url = "/proxy/groq/v1/chat/completions"
|
|
270
345
|
else:
|
|
271
346
|
inference_url = "/proxy/v1/chat/completions"
|
|
272
|
-
|
|
273
|
-
|
|
347
|
+
is_external = False
|
|
348
|
+
elif is_external:
|
|
349
|
+
# Add /v1/chat/completions if using OpenAI directly
|
|
350
|
+
if "api.openai.com" in inference_url and not inference_url.endswith("/chat/completions"):
|
|
351
|
+
inference_url = inference_url + "/v1/chat/completions"
|
|
352
|
+
|
|
353
|
+
if is_external:
|
|
354
|
+
# External API: use direct HTTP client with auth header
|
|
355
|
+
headers = {}
|
|
356
|
+
if "api.openai.com" in inference_url:
|
|
357
|
+
import os
|
|
358
|
+
api_key = os.getenv("OPENAI_API_KEY")
|
|
359
|
+
if api_key:
|
|
360
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
361
|
+
|
|
362
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
|
|
363
|
+
resp = await client.post(inference_url, json=payload, headers=headers)
|
|
364
|
+
else:
|
|
365
|
+
# Internal proxy: use local base_url
|
|
366
|
+
async with httpx.AsyncClient(
|
|
367
|
+
base_url="http://127.0.0.1:" + str(fastapi_request.url.port or 8913),
|
|
368
|
+
timeout=httpx.Timeout(60.0)
|
|
369
|
+
) as client:
|
|
370
|
+
resp = await client.post(inference_url, json=payload)
|
|
371
|
+
|
|
274
372
|
resp.raise_for_status()
|
|
275
373
|
data = resp.json()
|
|
276
374
|
# Extract first tool call
|
|
@@ -555,6 +653,72 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
555
653
|
inference_url=inference_url, # NEW: Required for trace correlation
|
|
556
654
|
)
|
|
557
655
|
|
|
656
|
+
# Record outcome rewards and end session
|
|
657
|
+
trace_payload = None
|
|
658
|
+
if tracer_instance is not None:
|
|
659
|
+
try:
|
|
660
|
+
# Count achievements (milestones)
|
|
661
|
+
achievements_count = len(milestone_events)
|
|
662
|
+
|
|
663
|
+
# Build metadata with all relevant info
|
|
664
|
+
reward_metadata = {
|
|
665
|
+
"run_id": request.run_id,
|
|
666
|
+
"env_name": "pokemon_red",
|
|
667
|
+
"final_map": final_state.get("map_id", -1),
|
|
668
|
+
"party_count": final_state.get("party_count", 0),
|
|
669
|
+
"badges": final_state.get("badges", 0),
|
|
670
|
+
"steps": len(steps),
|
|
671
|
+
"milestone_events": milestone_events,
|
|
672
|
+
"reward_components": all_reward_components,
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
# Record outcome reward to Turso
|
|
676
|
+
await tracer_instance.record_outcome_reward(
|
|
677
|
+
total_reward=int(total_reward),
|
|
678
|
+
achievements_count=achievements_count,
|
|
679
|
+
total_steps=len(steps),
|
|
680
|
+
reward_metadata=reward_metadata,
|
|
681
|
+
)
|
|
682
|
+
logger.info(f"[pokemon_red] recorded outcome: reward={total_reward}, achievements={achievements_count}")
|
|
683
|
+
|
|
684
|
+
# End session and get trace
|
|
685
|
+
session_trace = await tracer_instance.end_session()
|
|
686
|
+
|
|
687
|
+
# Build trace payload if requested
|
|
688
|
+
record_config = getattr(request, 'record', None)
|
|
689
|
+
if record_config and getattr(record_config, 'return_trace', False) and session_trace:
|
|
690
|
+
trace_payload = {
|
|
691
|
+
"session_id": session_trace.session_id,
|
|
692
|
+
"created_at": session_trace.created_at.isoformat() if session_trace.created_at else None,
|
|
693
|
+
"metadata": dict(session_trace.metadata or {}),
|
|
694
|
+
"num_timesteps": session_trace.num_timesteps,
|
|
695
|
+
"num_events": session_trace.num_events,
|
|
696
|
+
"num_messages": session_trace.num_messages,
|
|
697
|
+
}
|
|
698
|
+
except Exception as exc:
|
|
699
|
+
logger.warning(f"[pokemon_red] tracing finalization failed: {exc}")
|
|
700
|
+
|
|
701
|
+
# Fallback trace payload if no tracer but CLI needs it
|
|
702
|
+
if trace_payload is None:
|
|
703
|
+
record_config = getattr(request, 'record', None)
|
|
704
|
+
if record_config and getattr(record_config, 'return_trace', False):
|
|
705
|
+
trace_payload = {
|
|
706
|
+
"session_id": request.run_id,
|
|
707
|
+
"created_at": import_datetime().now().isoformat(),
|
|
708
|
+
"metadata": {
|
|
709
|
+
"run_id": request.run_id,
|
|
710
|
+
"env_name": "pokemon_red",
|
|
711
|
+
"total_reward": int(total_reward),
|
|
712
|
+
"final_map": final_state.get("map_id", -1),
|
|
713
|
+
"party_count": final_state.get("party_count", 0),
|
|
714
|
+
"badges": final_state.get("badges", 0),
|
|
715
|
+
"steps": len(steps),
|
|
716
|
+
},
|
|
717
|
+
"num_timesteps": len(steps),
|
|
718
|
+
"num_events": len(steps),
|
|
719
|
+
"num_messages": len(steps) * 2,
|
|
720
|
+
}
|
|
721
|
+
|
|
558
722
|
return RolloutResponse(
|
|
559
723
|
run_id=request.run_id,
|
|
560
724
|
trajectories=[trajectory],
|
|
@@ -562,11 +726,40 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
562
726
|
metrics=metrics,
|
|
563
727
|
aborted=False,
|
|
564
728
|
ops_executed=len(request.ops or []),
|
|
729
|
+
trace=trace_payload,
|
|
565
730
|
)
|
|
566
731
|
|
|
567
732
|
|
|
733
|
+
def import_datetime():
|
|
734
|
+
"""Helper to import datetime for trace timestamps."""
|
|
735
|
+
from datetime import datetime
|
|
736
|
+
return datetime
|
|
737
|
+
|
|
738
|
+
|
|
568
739
|
def build_config() -> TaskAppConfig:
|
|
569
740
|
base_info = _base_task_info()
|
|
741
|
+
|
|
742
|
+
# Set up tracing
|
|
743
|
+
tracing_enabled = tracing_env_enabled()
|
|
744
|
+
tracing_db_url = resolve_tracing_db_url()
|
|
745
|
+
tracer_factory = build_tracer_factory(
|
|
746
|
+
SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url
|
|
747
|
+
)
|
|
748
|
+
sft_output_dir = resolve_sft_output_dir()
|
|
749
|
+
|
|
750
|
+
app_state: dict[str, Any] = {
|
|
751
|
+
"tracing_enabled": tracing_enabled,
|
|
752
|
+
}
|
|
753
|
+
if tracer_factory is not None:
|
|
754
|
+
app_state["session_tracer_factory"] = tracer_factory
|
|
755
|
+
if sft_output_dir:
|
|
756
|
+
app_state["sft_output_dir"] = sft_output_dir
|
|
757
|
+
|
|
758
|
+
if tracing_enabled:
|
|
759
|
+
status_msg = f"[task:tracing] enabled (db={tracing_db_url or 'default'})"
|
|
760
|
+
logger.info(status_msg)
|
|
761
|
+
print(status_msg, flush=True)
|
|
762
|
+
|
|
570
763
|
return TaskAppConfig(
|
|
571
764
|
app_id="pokemon_red",
|
|
572
765
|
name="Pokémon Red Task App",
|
|
@@ -585,7 +778,7 @@ def build_config() -> TaskAppConfig:
|
|
|
585
778
|
"Example: {\"tool\": \"execute_sequence\", \"args\": {\"actions\": [{\"button\": \"DOWN\", \"frames\": 30}, ...]}}"
|
|
586
779
|
),
|
|
587
780
|
),
|
|
588
|
-
app_state=
|
|
781
|
+
app_state=app_state,
|
|
589
782
|
require_api_key=False,
|
|
590
783
|
expose_debug_env=True,
|
|
591
784
|
cors_origins=["*"],
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
# Verilog Eval Config for Groq Qwen3-32B
|
|
2
|
+
# Quick eval to test Verilog task app before RL training
|
|
2
3
|
|
|
3
4
|
[task_app]
|
|
4
|
-
|
|
5
|
+
# Update this with your Modal URL after deployment
|
|
6
|
+
url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
|
|
5
7
|
|
|
6
8
|
[eval]
|
|
7
|
-
num_episodes =
|
|
8
|
-
seeds = [0, 1, 2
|
|
9
|
-
max_steps =
|
|
9
|
+
num_episodes = 3 # Quick test with 3 seeds
|
|
10
|
+
seeds = [0, 1, 2]
|
|
11
|
+
max_steps = 15 # More steps for Verilog compilation chains
|
|
10
12
|
|
|
11
13
|
[policy]
|
|
12
14
|
provider = "groq"
|
|
@@ -18,3 +20,5 @@ inference_url = "https://api.groq.com/openai/v1/chat/completions"
|
|
|
18
20
|
[env]
|
|
19
21
|
difficulty = "medium" # Can be "easy", "medium", or "hard"
|
|
20
22
|
|
|
23
|
+
|
|
24
|
+
|