synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
- examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
- examples/qwen_coder/configs/coder_lora_small.toml +2 -1
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +154 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +275 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +423 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
- examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +62 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +1 -1
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +37 -0
- examples/rl/configs/rl_from_base_qwen17.toml +76 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +22 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/sft/README.md +5 -5
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
- examples/sft/evaluate.py +2 -4
- examples/sft/export_dataset.py +7 -4
- examples/swe/task_app/README.md +1 -1
- examples/swe/task_app/grpo_swe_mini.py +0 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +0 -8
- examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
- examples/task_apps/enron/__init__.py +1 -0
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +144 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/cli.py +30 -7
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/cli/__init__.py +62 -78
- synth_ai/cli/_modal_wrapper.py +7 -5
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/recent.py +2 -1
- synth_ai/cli/setup.py +266 -0
- synth_ai/cli/status.py +1 -1
- synth_ai/cli/task_app_deploy.py +16 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +16 -0
- synth_ai/cli/task_app_serve.py +18 -0
- synth_ai/cli/task_apps.py +71 -31
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/train.py +18 -0
- synth_ai/cli/tui.py +7 -2
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/inference/client.py +1 -1
- synth_ai/judge_schemas.py +8 -8
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +1 -1
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/task/apps/__init__.py +4 -2
- synth_ai/task/config.py +6 -4
- synth_ai/task/rubrics/__init__.py +1 -2
- synth_ai/task/rubrics/loaders.py +14 -10
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +24 -11
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +2 -3
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/session_tracer.py +7 -7
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -9
- synth_ai/tracing_v3/turso/native_manager.py +80 -72
- synth_ai/tracing_v3/utils.py +2 -2
- synth_ai/tui/cli/query_experiments.py +4 -4
- synth_ai/tui/cli/query_experiments_v3.py +4 -4
- synth_ai/tui/dashboard.py +14 -9
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +287 -0
- synth_ai/utils/http.py +169 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
- synth_ai/cli/man.py +0 -106
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/http.py +0 -26
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Collect Crafter vision traces for SFT dataset creation.
|
|
4
|
+
|
|
5
|
+
Supports both:
|
|
6
|
+
1. OpenAI models (gpt-5-nano, gpt-4o-mini) via OpenAI API
|
|
7
|
+
2. Qwen-VL models via synth-ai hosted inference
|
|
8
|
+
|
|
9
|
+
Traces are stored in SQLite with full multimodal messages (text + base64 images)
|
|
10
|
+
ready for export to SFT JSONL format.
|
|
11
|
+
|
|
12
|
+
Requirements:
|
|
13
|
+
- For OpenAI: OPENAI_API_KEY environment variable
|
|
14
|
+
- For synth-ai: SYNTH_API_KEY environment variable
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
# Collect with gpt-5-nano
|
|
18
|
+
uv run python examples/qwen_vl/collect_vision_traces.py \
|
|
19
|
+
--model gpt-5-nano \
|
|
20
|
+
--provider openai \
|
|
21
|
+
--episodes 100 \
|
|
22
|
+
--max-steps 50 \
|
|
23
|
+
--output-dir traces/gpt5nano_vision
|
|
24
|
+
|
|
25
|
+
# Collect with Qwen2-VL via synth
|
|
26
|
+
uv run python examples/qwen_vl/collect_vision_traces.py \
|
|
27
|
+
--model Qwen/Qwen2-VL-7B-Instruct \
|
|
28
|
+
--provider synth \
|
|
29
|
+
--episodes 100 \
|
|
30
|
+
--max-steps 50 \
|
|
31
|
+
--output-dir traces/qwen2vl_vision
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import argparse
|
|
37
|
+
import asyncio
|
|
38
|
+
import json
|
|
39
|
+
import logging
|
|
40
|
+
import os
|
|
41
|
+
from pathlib import Path
|
|
42
|
+
from typing import Any, cast
|
|
43
|
+
from uuid import uuid4
|
|
44
|
+
|
|
45
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.environment import (
|
|
46
|
+
CrafterEnvironmentWrapper,
|
|
47
|
+
)
|
|
48
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.policy import CrafterPolicy
|
|
49
|
+
from synth_ai.environments.examples.crafter_classic.environment import CrafterClassicEnvironment
|
|
50
|
+
from synth_ai.environments.examples.crafter_classic.taskset import (
|
|
51
|
+
CrafterTaskInstance,
|
|
52
|
+
CrafterTaskInstanceMetadata,
|
|
53
|
+
)
|
|
54
|
+
from synth_ai.environments.tasks.core import Impetus, Intent
|
|
55
|
+
|
|
56
|
+
# Try importing trace storage
|
|
57
|
+
try:
|
|
58
|
+
from synth_ai.tracing_v3.storage import create_storage
|
|
59
|
+
from synth_ai.tracing_v3.storage.config import StorageBackend, StorageConfig
|
|
60
|
+
TRACING_AVAILABLE = True
|
|
61
|
+
except ImportError:
|
|
62
|
+
print("Warning: Tracing storage not available. Traces will not be persisted.")
|
|
63
|
+
TRACING_AVAILABLE = False
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _get_openai_client():
|
|
67
|
+
"""Get OpenAI client."""
|
|
68
|
+
from openai import OpenAI
|
|
69
|
+
|
|
70
|
+
api_key = os.getenv("OPENAI_API_KEY")
|
|
71
|
+
if not api_key:
|
|
72
|
+
raise RuntimeError("OPENAI_API_KEY not set")
|
|
73
|
+
return OpenAI(api_key=api_key)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _default_backend_base_url() -> str:
|
|
77
|
+
raw = os.getenv("BACKEND_BASE_URL", "https://agent-learning.onrender.com/api").strip()
|
|
78
|
+
return raw if raw.endswith("/api") else f"{raw}/api"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _get_synth_client():
|
|
82
|
+
"""Get synth-ai inference client."""
|
|
83
|
+
from synth_ai.inference.client import InferenceClient
|
|
84
|
+
|
|
85
|
+
api_key = os.getenv("SYNTH_API_KEY")
|
|
86
|
+
if not api_key:
|
|
87
|
+
raise RuntimeError("SYNTH_API_KEY not set")
|
|
88
|
+
base_url = os.getenv("SYNTH_BASE_URL", _default_backend_base_url())
|
|
89
|
+
return InferenceClient(base_url=base_url, api_key=api_key)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _build_task_instance(seed: int) -> CrafterTaskInstance:
|
|
93
|
+
"""Create Crafter task instance."""
|
|
94
|
+
impetus = Impetus(instructions="Explore, survive, and unlock achievements.")
|
|
95
|
+
intent = Intent(
|
|
96
|
+
rubric={"goal": "Maximise Crafter achievements."},
|
|
97
|
+
gold_trajectories=None,
|
|
98
|
+
gold_state_diff={},
|
|
99
|
+
)
|
|
100
|
+
metadata = CrafterTaskInstanceMetadata(
|
|
101
|
+
difficulty="custom",
|
|
102
|
+
seed=seed,
|
|
103
|
+
num_trees_radius=0,
|
|
104
|
+
num_cows_radius=0,
|
|
105
|
+
num_hostiles_radius=0,
|
|
106
|
+
)
|
|
107
|
+
instance = CrafterTaskInstance(
|
|
108
|
+
id=uuid4(),
|
|
109
|
+
impetus=impetus,
|
|
110
|
+
intent=intent,
|
|
111
|
+
metadata=metadata,
|
|
112
|
+
is_reproducible=True,
|
|
113
|
+
initial_engine_snapshot=None,
|
|
114
|
+
)
|
|
115
|
+
setattr(instance, "config", {"seed": seed, "length": 256, "area": [64, 64]})
|
|
116
|
+
return instance
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _normalise_openai_request(payload: dict[str, Any], model: str, temperature: float) -> dict[str, Any]:
|
|
120
|
+
"""Normalize inference request for OpenAI API."""
|
|
121
|
+
request = dict(payload)
|
|
122
|
+
request["model"] = model
|
|
123
|
+
|
|
124
|
+
# Remove vendor-specific knobs
|
|
125
|
+
request.pop("stop_after_tool_calls", None)
|
|
126
|
+
request.pop("thinking_mode", None)
|
|
127
|
+
request.pop("thinking_budget", None)
|
|
128
|
+
|
|
129
|
+
# gpt-5 models have specific requirements
|
|
130
|
+
if "gpt-5" in model.lower():
|
|
131
|
+
# gpt-5-nano only supports temperature=1 (default)
|
|
132
|
+
request.pop("temperature", None) # Remove custom temperature
|
|
133
|
+
request.setdefault("max_completion_tokens", 512)
|
|
134
|
+
request.pop("max_tokens", None) # Remove if present
|
|
135
|
+
else:
|
|
136
|
+
# Older models use max_tokens and support custom temperature
|
|
137
|
+
request.setdefault("temperature", temperature)
|
|
138
|
+
max_completion = request.pop("max_completion_tokens", None)
|
|
139
|
+
if max_completion is not None:
|
|
140
|
+
request["max_tokens"] = max_completion
|
|
141
|
+
else:
|
|
142
|
+
request.setdefault("max_tokens", 512)
|
|
143
|
+
|
|
144
|
+
return request
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
async def collect_traces(
|
|
148
|
+
model: str,
|
|
149
|
+
provider: str,
|
|
150
|
+
num_episodes: int,
|
|
151
|
+
max_steps: int,
|
|
152
|
+
seed_start: int,
|
|
153
|
+
output_dir: Path,
|
|
154
|
+
temperature: float,
|
|
155
|
+
):
|
|
156
|
+
"""Collect vision traces for SFT."""
|
|
157
|
+
# Setup tracing store
|
|
158
|
+
if not TRACING_AVAILABLE:
|
|
159
|
+
raise RuntimeError("Tracing storage not available. Cannot persist traces.")
|
|
160
|
+
|
|
161
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
162
|
+
db_path = output_dir / "rollouts.db"
|
|
163
|
+
storage_config = StorageConfig(
|
|
164
|
+
backend=StorageBackend.SQLITE,
|
|
165
|
+
connection_string=f"sqlite+aiosqlite:///{db_path}",
|
|
166
|
+
)
|
|
167
|
+
tracing_store = create_storage(storage_config)
|
|
168
|
+
await tracing_store.initialize()
|
|
169
|
+
|
|
170
|
+
# Setup inference client
|
|
171
|
+
if provider == "openai":
|
|
172
|
+
client = _get_openai_client()
|
|
173
|
+
inference_url = "openai://chat-completions"
|
|
174
|
+
elif provider == "synth":
|
|
175
|
+
client = _get_synth_client()
|
|
176
|
+
inference_url = "synth://inference"
|
|
177
|
+
else:
|
|
178
|
+
raise ValueError(f"Unknown provider: {provider}")
|
|
179
|
+
|
|
180
|
+
print(f"🎮 Collecting {num_episodes} episodes with {model}")
|
|
181
|
+
print(f" Provider: {provider}")
|
|
182
|
+
print(f" Max steps: {max_steps}")
|
|
183
|
+
print(f" Output: {output_dir}")
|
|
184
|
+
print(f" Database: {db_path}")
|
|
185
|
+
print()
|
|
186
|
+
|
|
187
|
+
total_steps = 0
|
|
188
|
+
total_achievements = 0
|
|
189
|
+
|
|
190
|
+
for episode_id in range(num_episodes):
|
|
191
|
+
seed = seed_start + episode_id
|
|
192
|
+
|
|
193
|
+
# Build task instance
|
|
194
|
+
task_instance = _build_task_instance(seed)
|
|
195
|
+
env = CrafterClassicEnvironment(task_instance)
|
|
196
|
+
wrapper = CrafterEnvironmentWrapper(env, seed=seed)
|
|
197
|
+
|
|
198
|
+
# Initialize policy (vision auto-detected from model name)
|
|
199
|
+
policy = CrafterPolicy(inference_url=inference_url, model=model)
|
|
200
|
+
await policy.initialize({
|
|
201
|
+
"use_tools": True,
|
|
202
|
+
"model": model,
|
|
203
|
+
"temperature": temperature,
|
|
204
|
+
"max_tokens": 512,
|
|
205
|
+
})
|
|
206
|
+
|
|
207
|
+
observation_packet = await wrapper.initialize()
|
|
208
|
+
|
|
209
|
+
steps_taken = 0
|
|
210
|
+
achievements = set()
|
|
211
|
+
|
|
212
|
+
# Run episode
|
|
213
|
+
for step_idx in range(max_steps):
|
|
214
|
+
obs_dict = observation_packet.get("observation")
|
|
215
|
+
if not isinstance(obs_dict, dict):
|
|
216
|
+
break
|
|
217
|
+
|
|
218
|
+
# Format observation
|
|
219
|
+
obs_text = policy._format_observation_for_llm(observation_packet) # noqa: SLF001
|
|
220
|
+
|
|
221
|
+
# Get tool calls from policy
|
|
222
|
+
tool_calls, meta = await policy.step(
|
|
223
|
+
observation_text=obs_text,
|
|
224
|
+
metadata={"raw_observation": observation_packet},
|
|
225
|
+
)
|
|
226
|
+
if "inference_request" not in meta:
|
|
227
|
+
break
|
|
228
|
+
|
|
229
|
+
inference_request = meta["inference_request"]
|
|
230
|
+
|
|
231
|
+
# Call inference
|
|
232
|
+
if provider == "openai":
|
|
233
|
+
normalized_request = _normalise_openai_request(
|
|
234
|
+
inference_request,
|
|
235
|
+
model=model,
|
|
236
|
+
temperature=temperature,
|
|
237
|
+
)
|
|
238
|
+
response = client.chat.completions.create(**normalized_request)
|
|
239
|
+
response_dict = response.model_dump()
|
|
240
|
+
else: # synth
|
|
241
|
+
response_dict = await client.create_chat_completion(
|
|
242
|
+
model=model,
|
|
243
|
+
messages=inference_request["messages"],
|
|
244
|
+
temperature=temperature,
|
|
245
|
+
max_tokens=512,
|
|
246
|
+
tools=inference_request.get("tools"),
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Parse tool calls
|
|
250
|
+
assistant_tool_calls = CrafterPolicy.parse_response_to_tool_calls(
|
|
251
|
+
response_dict,
|
|
252
|
+
use_tools=policy.use_tools,
|
|
253
|
+
)
|
|
254
|
+
if not assistant_tool_calls:
|
|
255
|
+
break
|
|
256
|
+
|
|
257
|
+
# Store trace
|
|
258
|
+
assistant_message = response_dict["choices"][0].get("message", {})
|
|
259
|
+
trace_messages = inference_request["messages"] + [assistant_message]
|
|
260
|
+
|
|
261
|
+
tracing_store_any = cast(Any, tracing_store)
|
|
262
|
+
if hasattr(tracing_store_any, "store_trace"):
|
|
263
|
+
await tracing_store_any.store_trace(
|
|
264
|
+
session_id=f"ep{episode_id:04d}",
|
|
265
|
+
step=step_idx,
|
|
266
|
+
messages=trace_messages,
|
|
267
|
+
model=model,
|
|
268
|
+
metadata={
|
|
269
|
+
"seed": seed,
|
|
270
|
+
"has_image": policy.use_vision,
|
|
271
|
+
"provider": provider,
|
|
272
|
+
},
|
|
273
|
+
)
|
|
274
|
+
else:
|
|
275
|
+
logging.warning(
|
|
276
|
+
"Tracing backend does not expose store_trace(); skipping persistence for episode %s",
|
|
277
|
+
episode_id,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Execute action
|
|
281
|
+
assistant_text = assistant_message.get("content")
|
|
282
|
+
env_response = await wrapper.step(assistant_tool_calls)
|
|
283
|
+
if not isinstance(env_response, dict):
|
|
284
|
+
break
|
|
285
|
+
|
|
286
|
+
# Update policy history
|
|
287
|
+
policy._append_assistant_turn( # noqa: SLF001
|
|
288
|
+
assistant_text,
|
|
289
|
+
assistant_tool_calls,
|
|
290
|
+
env_response,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
steps_taken += 1
|
|
294
|
+
|
|
295
|
+
# Track achievements
|
|
296
|
+
obs = env_response.get("observation", {})
|
|
297
|
+
ach_status = obs.get("achievements_status", {})
|
|
298
|
+
for name, unlocked in ach_status.items():
|
|
299
|
+
if unlocked:
|
|
300
|
+
achievements.add(name)
|
|
301
|
+
|
|
302
|
+
if env_response.get("done"):
|
|
303
|
+
break
|
|
304
|
+
observation_packet = env_response
|
|
305
|
+
|
|
306
|
+
await wrapper.terminate()
|
|
307
|
+
|
|
308
|
+
total_steps += steps_taken
|
|
309
|
+
total_achievements += len(achievements)
|
|
310
|
+
|
|
311
|
+
print(
|
|
312
|
+
f"✓ Episode {episode_id:3d} (seed={seed}): {steps_taken} steps, "
|
|
313
|
+
f"{len(achievements)} achievements"
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
print()
|
|
317
|
+
print(f"✅ Collection complete!")
|
|
318
|
+
print(f" Total episodes: {num_episodes}")
|
|
319
|
+
print(f" Total steps: {total_steps}")
|
|
320
|
+
print(f" Avg achievements: {total_achievements / num_episodes:.2f}")
|
|
321
|
+
print(f" Database: {db_path}")
|
|
322
|
+
print()
|
|
323
|
+
print("Next steps:")
|
|
324
|
+
print(" 1. Export traces to SFT JSONL format")
|
|
325
|
+
print(" 2. Split into train/val datasets")
|
|
326
|
+
print(" 3. Train VLM with LoRA")
|
|
327
|
+
|
|
328
|
+
return db_path
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
async def main() -> None:
|
|
332
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
333
|
+
parser.add_argument(
|
|
334
|
+
"--model",
|
|
335
|
+
required=True,
|
|
336
|
+
help="Model name (e.g., gpt-5-nano, Qwen/Qwen2-VL-7B-Instruct)",
|
|
337
|
+
)
|
|
338
|
+
parser.add_argument(
|
|
339
|
+
"--provider",
|
|
340
|
+
choices=["openai", "synth"],
|
|
341
|
+
required=True,
|
|
342
|
+
help="Inference provider",
|
|
343
|
+
)
|
|
344
|
+
parser.add_argument("--episodes", type=int, default=100, help="Number of episodes")
|
|
345
|
+
parser.add_argument("--max-steps", type=int, default=50, help="Max steps per episode")
|
|
346
|
+
parser.add_argument("--seed-start", type=int, default=0, help="Starting seed")
|
|
347
|
+
parser.add_argument("--temperature", type=float, default=0.7, help="Sampling temperature")
|
|
348
|
+
parser.add_argument(
|
|
349
|
+
"--output-dir",
|
|
350
|
+
type=Path,
|
|
351
|
+
default=Path("traces/vision_traces"),
|
|
352
|
+
help="Output directory for traces",
|
|
353
|
+
)
|
|
354
|
+
args = parser.parse_args()
|
|
355
|
+
|
|
356
|
+
await collect_traces(
|
|
357
|
+
model=args.model,
|
|
358
|
+
provider=args.provider,
|
|
359
|
+
num_episodes=args.episodes,
|
|
360
|
+
max_steps=args.max_steps,
|
|
361
|
+
seed_start=args.seed_start,
|
|
362
|
+
output_dir=args.output_dir,
|
|
363
|
+
temperature=args.temperature,
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
if __name__ == "__main__":
|
|
368
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Crafter RL with Vision - Qwen3-VL-4B
|
|
2
|
+
#
|
|
3
|
+
# This configuration runs online RL (GRPO/GSPO) with a vision-language model
|
|
4
|
+
# using the same Crafter task app that generates image observations for SFT data.
|
|
5
|
+
#
|
|
6
|
+
# Model: Qwen/Qwen3-VL-4B (smaller, faster for testing)
|
|
7
|
+
# Task App: grpo-crafter-task-app (Modal deployed, supports vision)
|
|
8
|
+
# Policy: crafter-react with use_vision=true, image_only_mode=true
|
|
9
|
+
|
|
10
|
+
[algorithm]
|
|
11
|
+
type = "online"
|
|
12
|
+
method = "policy_gradient"
|
|
13
|
+
variety = "gspo"
|
|
14
|
+
|
|
15
|
+
[services]
|
|
16
|
+
# Replace with the Modal URL printed by `uvx synth-ai modal-serve grpo-crafter`
|
|
17
|
+
task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
|
|
18
|
+
|
|
19
|
+
[compute]
|
|
20
|
+
gpu_type = "H200"
|
|
21
|
+
gpu_count = 2
|
|
22
|
+
|
|
23
|
+
[topology]
|
|
24
|
+
type = "single_node_split"
|
|
25
|
+
gpus_for_vllm = 1
|
|
26
|
+
gpus_for_training = 1
|
|
27
|
+
gpus_for_ref = 0
|
|
28
|
+
tensor_parallel = 1
|
|
29
|
+
|
|
30
|
+
[vllm]
|
|
31
|
+
tensor_parallel_size = 1
|
|
32
|
+
max_model_len = 4096
|
|
33
|
+
# Vision-specific settings
|
|
34
|
+
limit_mm_per_prompt = { "image": 1 } # Max 1 image per prompt
|
|
35
|
+
|
|
36
|
+
[reference]
|
|
37
|
+
placement = "none"
|
|
38
|
+
|
|
39
|
+
[model]
|
|
40
|
+
base = "Qwen/Qwen3-VL-4B-Instruct"
|
|
41
|
+
trainer_mode = "lora"
|
|
42
|
+
label = "crafter-rl-vision-qwen3vl4b"
|
|
43
|
+
supports_vision = true # Enable vision support
|
|
44
|
+
|
|
45
|
+
[lora]
|
|
46
|
+
r = 16
|
|
47
|
+
alpha = 32
|
|
48
|
+
dropout = 0.05
|
|
49
|
+
target_modules = ["all-linear"]
|
|
50
|
+
# Note: will automatically include mm_projector for vision models
|
|
51
|
+
|
|
52
|
+
[rollout]
|
|
53
|
+
env_name = "crafter"
|
|
54
|
+
max_turns = 10 # 10 steps per episode for faster testing
|
|
55
|
+
episodes_per_batch = 2
|
|
56
|
+
policy_name = "crafter-react"
|
|
57
|
+
max_concurrent_rollouts = 4 # Lower for vision models (memory)
|
|
58
|
+
batches_per_step = 2
|
|
59
|
+
ops = ["agent", "env"]
|
|
60
|
+
|
|
61
|
+
[rollout.env_config]
|
|
62
|
+
difficulty = "easy"
|
|
63
|
+
|
|
64
|
+
[rollout.env_config.step_rewards]
|
|
65
|
+
enabled = true
|
|
66
|
+
mode = "decision_stepwise"
|
|
67
|
+
strategy = "consistent"
|
|
68
|
+
indicator_lambda = 1.0
|
|
69
|
+
step_beta = 0.0
|
|
70
|
+
|
|
71
|
+
[rollout.policy_config]
|
|
72
|
+
# Vision-specific policy settings
|
|
73
|
+
use_vision = true # Enable vision input
|
|
74
|
+
image_only_mode = true # Use only images, no text observations
|
|
75
|
+
temperature = 0.6 # Slightly higher for exploration
|
|
76
|
+
top_p = 0.95
|
|
77
|
+
max_tokens = 512
|
|
78
|
+
max_llm_calls = 10
|
|
79
|
+
|
|
80
|
+
[evaluation]
|
|
81
|
+
instances = 8 # Lower for faster vision evals
|
|
82
|
+
every_n_iters = 5
|
|
83
|
+
seeds = [0, 1, 2, 3, 4, 5, 6, 7]
|
|
84
|
+
|
|
85
|
+
[training]
|
|
86
|
+
num_epochs = 1
|
|
87
|
+
iterations_per_epoch = 3 # Shorter for integration test
|
|
88
|
+
gradient_accumulation_steps = 2
|
|
89
|
+
max_accumulated_minibatch = 1
|
|
90
|
+
max_turns = 10
|
|
91
|
+
batch_size = 2 # Smaller for vision models
|
|
92
|
+
group_size = 2
|
|
93
|
+
learning_rate = 5e-5
|
|
94
|
+
log_interval = 1
|
|
95
|
+
weight_sync_interval = 1
|
|
96
|
+
event_rewards_kind = "unique"
|
|
97
|
+
async_semaphore_max = 2 # Lower concurrency for vision
|
|
98
|
+
|
|
99
|
+
# Enable dense decision rewards
|
|
100
|
+
step_rewards_enabled = true
|
|
101
|
+
step_rewards_mode = "decision_stepwise"
|
|
102
|
+
step_rewards_indicator_lambda = 1.0
|
|
103
|
+
step_rewards_beta = 0.0
|
|
104
|
+
step_rewards_strategy = "consistent"
|
|
105
|
+
|
|
106
|
+
# Vision-specific training settings
|
|
107
|
+
max_images_per_message = 1 # Limit images for memory
|
|
108
|
+
supports_vision = true # Enable vision training path
|
|
109
|
+
|
|
110
|
+
[training.weight_sync]
|
|
111
|
+
enable = true
|
|
112
|
+
targets = ["policy"]
|
|
113
|
+
mode = "direct"
|
|
114
|
+
direct = true
|
|
115
|
+
verify_every_k = 0
|
|
116
|
+
|
|
117
|
+
[judge]
|
|
118
|
+
type = "env" # Use environment rewards only (simpler for testing)
|
|
119
|
+
timeout_s = 30
|
|
120
|
+
|
|
121
|
+
[tags]
|
|
122
|
+
experiment = "crafter_rl_vision_qwen3vl4b"
|
|
123
|
+
task = "crafter_agent_vision"
|
|
124
|
+
model_size = "4b"
|
|
125
|
+
vision_enabled = true
|
|
126
|
+
image_only = true
|
|
127
|
+
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Example Vision SFT Config for Crafter
|
|
2
|
+
# Train Qwen-VL on collected vision traces
|
|
3
|
+
|
|
4
|
+
[algorithm]
|
|
5
|
+
type = "offline"
|
|
6
|
+
method = "sft"
|
|
7
|
+
variety = "lora"
|
|
8
|
+
|
|
9
|
+
[job]
|
|
10
|
+
model = "Qwen/Qwen2-VL-7B-Instruct" # or Qwen/Qwen3-VL-8B
|
|
11
|
+
# Dataset from collect_vision_traces.py → export_to_sft.py
|
|
12
|
+
data = "traces/gpt5nano_vision/train.jsonl"
|
|
13
|
+
|
|
14
|
+
[compute]
|
|
15
|
+
gpu_type = "H200"
|
|
16
|
+
gpu_count = 2 # 2x H200 (282GB total)
|
|
17
|
+
nodes = 1
|
|
18
|
+
|
|
19
|
+
[training]
|
|
20
|
+
mode = "lora" # SFT with LoRA
|
|
21
|
+
use_qlora = true # Quantized LoRA for memory efficiency
|
|
22
|
+
|
|
23
|
+
[hyperparameters]
|
|
24
|
+
n_epochs = 2 # 2 epochs over collected samples
|
|
25
|
+
per_device_batch = 1 # Batch size 1 (images are memory-intensive)
|
|
26
|
+
gradient_accumulation_steps = 32
|
|
27
|
+
sequence_length = 2048 # Shorter context (images dominate memory)
|
|
28
|
+
learning_rate = 5e-06
|
|
29
|
+
warmup_ratio = 0.03
|
|
30
|
+
train_kind = "peft"
|
|
31
|
+
|
|
32
|
+
# LoRA config
|
|
33
|
+
lora_rank = 16
|
|
34
|
+
lora_alpha = 32
|
|
35
|
+
lora_dropout = 0.05
|
|
36
|
+
lora_target_modules = ["all-linear"] # Full linear layer adaptation
|
|
37
|
+
|
|
38
|
+
# Training optimizations
|
|
39
|
+
[hyperparameters.parallelism]
|
|
40
|
+
use_deepspeed = true
|
|
41
|
+
deepspeed_stage = 2
|
|
42
|
+
fsdp = false
|
|
43
|
+
bf16 = true
|
|
44
|
+
fp16 = false
|
|
45
|
+
activation_checkpointing = true
|
|
46
|
+
|
|
47
|
+
# Evaluation
|
|
48
|
+
evaluation_strategy = "steps"
|
|
49
|
+
eval_steps = 100
|
|
50
|
+
save_best_model_at_end = true
|
|
51
|
+
metric_for_best_model = "val.loss"
|
|
52
|
+
greater_is_better = false
|
|
53
|
+
load_best_model_at_end = true
|
|
54
|
+
|
|
55
|
+
[tags]
|
|
56
|
+
task = "crafter"
|
|
57
|
+
modality = "vision"
|
|
58
|
+
data_source = "collected_traces"
|
|
59
|
+
model_family = "qwen_vl"
|
|
60
|
+
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Evaluation config for gpt-4o-mini with vision
|
|
2
|
+
# Stronger teacher than gpt-5-nano, use for high-quality distillation
|
|
3
|
+
|
|
4
|
+
[eval]
|
|
5
|
+
model = "gpt-4o-mini-2024-07-18"
|
|
6
|
+
provider = "openai" # Use OpenAI API
|
|
7
|
+
|
|
8
|
+
# Task app endpoint
|
|
9
|
+
task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
|
|
10
|
+
|
|
11
|
+
# Vision settings (auto-detected from "gpt-4o" in model name)
|
|
12
|
+
use_vision = true
|
|
13
|
+
image_only_mode = false # Include both text + images
|
|
14
|
+
|
|
15
|
+
# Rollout settings
|
|
16
|
+
num_episodes = 100
|
|
17
|
+
max_steps_per_episode = 50
|
|
18
|
+
seeds = "200-299" # Different seeds for comparison
|
|
19
|
+
|
|
20
|
+
# Sampling parameters
|
|
21
|
+
temperature = 0.6 # Lower temperature for more consistent behavior
|
|
22
|
+
max_tokens = 512
|
|
23
|
+
|
|
24
|
+
# Trace collection
|
|
25
|
+
collect_traces = true
|
|
26
|
+
trace_db = "traces/gpt4o_mini_vision/rollouts.db"
|
|
27
|
+
|
|
28
|
+
# Tools
|
|
29
|
+
use_tools = true
|
|
30
|
+
|
|
31
|
+
# Parallel rollouts
|
|
32
|
+
parallel_episodes = 5
|
|
33
|
+
|
|
34
|
+
[task]
|
|
35
|
+
name = "crafter"
|
|
36
|
+
environment = "crafter-classic"
|
|
37
|
+
|
|
38
|
+
# Task-specific settings
|
|
39
|
+
[task.config]
|
|
40
|
+
seed_start = 200
|
|
41
|
+
max_episode_length = 256
|
|
42
|
+
render_size = [64, 64] # 64x64 PNG images
|
|
43
|
+
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Proper synth-ai eval config for Crafter with gpt-4o-mini vision
|
|
2
|
+
# Collects traces with images to database for synth-ai filter
|
|
3
|
+
|
|
4
|
+
[eval]
|
|
5
|
+
app_id = "grpo-crafter-task-app" # Modal deployed task app
|
|
6
|
+
model = "gpt-4o-mini-2024-07-18"
|
|
7
|
+
seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] # 10 episodes for test
|
|
8
|
+
max_turns = 50 # 50 steps per episode
|
|
9
|
+
concurrency = 2 # 2 parallel episodes
|
|
10
|
+
env_name = "crafter"
|
|
11
|
+
policy_name = "crafter-react"
|
|
12
|
+
trace_format = "structured" # Required for synth-ai eval
|
|
13
|
+
return_trace = true
|
|
14
|
+
|
|
15
|
+
[eval.env_config]
|
|
16
|
+
env_params = {max_steps_per_episode = 50}
|
|
17
|
+
|
|
18
|
+
[eval.policy_config]
|
|
19
|
+
provider = "openai"
|
|
20
|
+
model = "gpt-4o-mini-2024-07-18"
|
|
21
|
+
inference_url = "https://api.openai.com" # Base URL
|
|
22
|
+
# Note: Don't set temperature for gpt-4o-mini, use default
|
|
23
|
+
top_p = 0.95
|
|
24
|
+
max_tokens = 512
|
|
25
|
+
use_vision = true # Enable vision
|
|
26
|
+
image_only_mode = false # Use both text + images
|
|
27
|
+
max_llm_calls = 50
|
|
28
|
+
use_tools = true # Enable tool calling
|
|
29
|
+
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Evaluation config for gpt-4o-mini with vision
|
|
2
|
+
# Collects vision traces for SFT training
|
|
3
|
+
# Note: gpt-5-nano doesn't support tool calling yet, use gpt-4o-mini instead
|
|
4
|
+
|
|
5
|
+
[eval]
|
|
6
|
+
model = "gpt-4o-mini-2024-07-18" # Changed from gpt-5-nano (no tool support)
|
|
7
|
+
provider = "openai" # Use OpenAI API
|
|
8
|
+
|
|
9
|
+
# Task app endpoint (local or hosted)
|
|
10
|
+
# task_app_url = "http://localhost:8000" # Local
|
|
11
|
+
task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run" # Hosted
|
|
12
|
+
|
|
13
|
+
# Vision settings (auto-detected from "gpt-5" in model name)
|
|
14
|
+
use_vision = true
|
|
15
|
+
image_only_mode = false # Include both text + images
|
|
16
|
+
|
|
17
|
+
# Rollout settings
|
|
18
|
+
num_episodes = 100
|
|
19
|
+
max_steps_per_episode = 50
|
|
20
|
+
seeds = "0-99" # Seeds 0 through 99
|
|
21
|
+
|
|
22
|
+
# Sampling parameters
|
|
23
|
+
temperature = 0.7
|
|
24
|
+
max_tokens = 512
|
|
25
|
+
|
|
26
|
+
# Trace collection
|
|
27
|
+
collect_traces = true
|
|
28
|
+
trace_db = "traces/gpt5nano_vision/rollouts.db"
|
|
29
|
+
|
|
30
|
+
# Tools
|
|
31
|
+
use_tools = true
|
|
32
|
+
|
|
33
|
+
# Parallel rollouts (speeds up collection)
|
|
34
|
+
parallel_episodes = 5 # Run 5 episodes in parallel
|
|
35
|
+
|
|
36
|
+
[task]
|
|
37
|
+
name = "crafter"
|
|
38
|
+
environment = "crafter-classic"
|
|
39
|
+
|
|
40
|
+
# Task-specific settings
|
|
41
|
+
[task.config]
|
|
42
|
+
seed_start = 0
|
|
43
|
+
max_episode_length = 256
|
|
44
|
+
render_size = [64, 64] # 64x64 PNG images
|
|
45
|
+
|