synth-ai 0.2.9.dev3__py3-none-any.whl → 0.2.9.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (107) hide show
  1. examples/analyze_semantic_words.sh +17 -0
  2. examples/common_old/backend.py +21 -0
  3. examples/crafter_debug_render.py +180 -0
  4. examples/evals_old/README.md +98 -0
  5. examples/evals_old/__init__.py +6 -0
  6. examples/evals_old/compare_models.py +1037 -0
  7. examples/evals_old/example_log.md +145 -0
  8. examples/evals_old/run_demo.sh +126 -0
  9. examples/evals_old/trace_analysis.py +270 -0
  10. examples/finetuning_old/_backup_synth_qwen/config.toml +29 -0
  11. examples/finetuning_old/_backup_synth_qwen/example_log.md +324 -0
  12. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +60 -0
  13. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +239 -0
  14. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +109 -0
  15. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +1924 -0
  16. examples/finetuning_old/_backup_synth_qwen/readme.md +49 -0
  17. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +114 -0
  18. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +195 -0
  19. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +118 -0
  20. examples/finetuning_old/synth_qwen_v1/README.md +68 -0
  21. examples/finetuning_old/synth_qwen_v1/filter_traces.py +60 -0
  22. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +239 -0
  23. examples/finetuning_old/synth_qwen_v1/finetune.py +46 -0
  24. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +71 -0
  25. examples/finetuning_old/synth_qwen_v1/infer.py +37 -0
  26. examples/finetuning_old/synth_qwen_v1/poll.py +44 -0
  27. examples/finetuning_old/synth_qwen_v1/prepare_data.py +35 -0
  28. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +109 -0
  29. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +1932 -0
  30. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +207 -0
  31. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +232 -0
  32. examples/finetuning_old/synth_qwen_v1/upload_data.py +34 -0
  33. examples/finetuning_old/synth_qwen_v1/util.py +147 -0
  34. examples/rl/README.md +169 -0
  35. examples/rl/configs/eval_base_qwen.toml +15 -0
  36. examples/rl/configs/eval_rl_qwen.toml +11 -0
  37. examples/rl/configs/rl_from_base_qwen.toml +35 -0
  38. examples/rl/configs/rl_from_base_qwen17.toml +74 -0
  39. examples/rl/configs/rl_from_ft_qwen.toml +35 -0
  40. examples/rl/download_dataset.py +64 -0
  41. examples/rl/run_eval.py +435 -0
  42. examples/rl/run_rl_and_save.py +94 -0
  43. examples/rl/task_app/README.md +22 -0
  44. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +8 -8
  45. examples/rl/task_app/math_task_app.py +107 -0
  46. examples/rl_old/task_app.py +962 -0
  47. examples/run_crafter_demo.sh +10 -0
  48. examples/warming_up_to_rl/analyze_trace_db.py +420 -0
  49. examples/warming_up_to_rl/configs/crafter_fft.toml +48 -0
  50. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  51. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +20 -0
  52. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +13 -0
  53. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +23 -0
  54. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +73 -0
  55. examples/warming_up_to_rl/configs/rl_from_ft.toml +56 -0
  56. examples/warming_up_to_rl/export_trace_sft.py +541 -0
  57. examples/warming_up_to_rl/groq_test.py +88 -0
  58. examples/warming_up_to_rl/manage_secrets.py +127 -0
  59. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  60. examples/warming_up_to_rl/old/notes.md +73 -0
  61. examples/warming_up_to_rl/readme.md +172 -0
  62. examples/warming_up_to_rl/run_eval.py +434 -0
  63. examples/warming_up_to_rl/run_fft_and_save.py +309 -0
  64. examples/warming_up_to_rl/run_local_rollout.py +188 -0
  65. examples/warming_up_to_rl/run_local_rollout_modal.py +160 -0
  66. examples/warming_up_to_rl/run_local_rollout_parallel.py +342 -0
  67. examples/warming_up_to_rl/run_local_rollout_traced.py +372 -0
  68. examples/warming_up_to_rl/run_rl_and_save.py +101 -0
  69. examples/warming_up_to_rl/run_rollout_remote.py +129 -0
  70. examples/warming_up_to_rl/task_app/README.md +38 -0
  71. {synth_ai/task/apps → examples/warming_up_to_rl/task_app}/grpo_crafter.py +7 -7
  72. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +165 -0
  73. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  74. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  75. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +145 -0
  76. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1271 -0
  77. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  78. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +429 -0
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +442 -0
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +96 -0
  83. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +302 -0
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +202 -0
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +512 -0
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +102 -0
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +985 -0
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +197 -0
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1749 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +217 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +160 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +146 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +61 -0
  97. synth_ai/api/train/config_finder.py +18 -18
  98. synth_ai/api/train/env_resolver.py +28 -1
  99. synth_ai/cli/task_apps.py +291 -56
  100. synth_ai/task/apps/__init__.py +54 -13
  101. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/METADATA +1 -1
  102. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/RECORD +106 -13
  103. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/top_level.txt +1 -0
  104. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  105. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/WHEEL +0 -0
  106. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/entry_points.txt +0 -0
  107. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,5 @@
1
+ """Storage module for Modal Volume operations."""
2
+
3
+ from .volume import VolumeStorage, storage
4
+
5
+ __all__ = ["VolumeStorage", "storage"]
@@ -0,0 +1,217 @@
1
+ from __future__ import annotations
2
+
3
+ import gzip
4
+ import hashlib
5
+ import json
6
+ import os
7
+ import tarfile
8
+ import tempfile
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import Any, Dict, Optional
12
+
13
+
14
+ class VolumeStorage:
15
+ """Helpers for Modal Volume storage operations."""
16
+
17
+ def __init__(self, base_path: str = "/data/state") -> None:
18
+ self.base_path = Path(base_path)
19
+
20
+ def get_snapshot_path(
21
+ self,
22
+ rl_run_id: str,
23
+ kind: str,
24
+ snapshot_id: str,
25
+ ) -> Path:
26
+ """Build the path for a snapshot file."""
27
+ # Use first 2 chars of snapshot_id for sharding
28
+ shard1 = snapshot_id[:2] if len(snapshot_id) >= 2 else "00"
29
+ shard2 = snapshot_id[2:4] if len(snapshot_id) >= 4 else "00"
30
+
31
+ return (
32
+ self.base_path
33
+ / "runs"
34
+ / rl_run_id
35
+ / kind
36
+ / shard1
37
+ / shard2
38
+ / f"{snapshot_id}.tar.gz"
39
+ )
40
+
41
+ def get_index_path(self, rl_run_id: str) -> Path:
42
+ """Get the index file path for a run."""
43
+ return self.base_path / "runs" / rl_run_id / "index" / "meta.jsonl"
44
+
45
+ def write_snapshot_atomic(
46
+ self,
47
+ path: Path,
48
+ archive_bytes: bytes,
49
+ ) -> None:
50
+ """Atomically write a snapshot archive to disk."""
51
+ # Ensure parent directory exists
52
+ path.parent.mkdir(parents=True, exist_ok=True)
53
+
54
+ # Write to temp file first
55
+ tmp_path = path.with_suffix(".tmp")
56
+ with open(tmp_path, "wb") as f:
57
+ f.write(archive_bytes)
58
+ f.flush()
59
+ os.fsync(f.fileno())
60
+
61
+ # Atomic rename
62
+ os.replace(tmp_path, path)
63
+
64
+ def create_archive(
65
+ self,
66
+ state_dict: Dict[str, Any],
67
+ meta: Dict[str, Any],
68
+ ) -> bytes:
69
+ """Create a tar.gz archive with state and metadata."""
70
+ with tempfile.TemporaryDirectory() as tmpdir:
71
+ tmppath = Path(tmpdir)
72
+
73
+ # Write state.json
74
+ state_path = tmppath / "state.json"
75
+ with open(state_path, "w") as f:
76
+ json.dump(state_dict, f, sort_keys=True, indent=2)
77
+
78
+ # Write meta.json
79
+ meta_path = tmppath / "meta.json"
80
+ with open(meta_path, "w") as f:
81
+ json.dump(meta, f, sort_keys=True, indent=2)
82
+
83
+ # Create tar archive
84
+ tar_path = tmppath / "archive.tar"
85
+ with tarfile.open(tar_path, "w") as tar:
86
+ tar.add(state_path, arcname="state.json")
87
+ tar.add(meta_path, arcname="meta.json")
88
+
89
+ # Compress with gzip
90
+ with open(tar_path, "rb") as f:
91
+ tar_bytes = f.read()
92
+
93
+ compressed = gzip.compress(tar_bytes, compresslevel=6)
94
+
95
+ return compressed
96
+
97
+ def extract_archive(self, archive_bytes: bytes) -> tuple[Dict[str, Any], Dict[str, Any]]:
98
+ """Extract state and metadata from a tar.gz archive."""
99
+ # Decompress
100
+ tar_bytes = gzip.decompress(archive_bytes)
101
+
102
+ with tempfile.TemporaryDirectory() as tmpdir:
103
+ tmppath = Path(tmpdir)
104
+
105
+ # Write tar bytes to temp file
106
+ tar_path = tmppath / "archive.tar"
107
+ with open(tar_path, "wb") as f:
108
+ f.write(tar_bytes)
109
+
110
+ # Extract tar
111
+ with tarfile.open(tar_path, "r") as tar:
112
+ tar.extractall(tmppath)
113
+
114
+ # Read state and meta
115
+ with open(tmppath / "state.json", "r") as f:
116
+ state = json.load(f)
117
+
118
+ with open(tmppath / "meta.json", "r") as f:
119
+ meta = json.load(f)
120
+
121
+ return state, meta
122
+
123
+ def compute_snapshot_id(self, archive_bytes: bytes) -> str:
124
+ """Compute content-addressed snapshot ID."""
125
+ return hashlib.sha256(archive_bytes).hexdigest()
126
+
127
+ def save_snapshot(
128
+ self,
129
+ rl_run_id: str,
130
+ kind: str,
131
+ state_dict: Dict[str, Any],
132
+ config: Optional[Dict[str, Any]] = None,
133
+ parent_snapshot_id: Optional[str] = None,
134
+ ) -> tuple[str, str, int]:
135
+ """Save a snapshot and return (snapshot_id, path, size)."""
136
+ # Build metadata
137
+ meta = {
138
+ "kind": kind,
139
+ "rl_run_id": rl_run_id,
140
+ "schema_version": "1.0",
141
+ "created_at": datetime.utcnow().isoformat(),
142
+ }
143
+
144
+ if parent_snapshot_id:
145
+ meta["parent_snapshot_id"] = parent_snapshot_id
146
+
147
+ if config:
148
+ config_str = json.dumps(config, sort_keys=True)
149
+ meta["config_hash"] = hashlib.sha256(config_str.encode()).hexdigest()
150
+
151
+ # Create archive
152
+ archive_bytes = self.create_archive(state_dict, meta)
153
+
154
+ # Compute snapshot ID
155
+ snapshot_id = self.compute_snapshot_id(archive_bytes)
156
+ meta["snapshot_id"] = snapshot_id
157
+
158
+ # Recreate archive with snapshot_id in metadata
159
+ archive_bytes = self.create_archive(state_dict, meta)
160
+
161
+ # Get path and write
162
+ path = self.get_snapshot_path(rl_run_id, kind, snapshot_id)
163
+ self.write_snapshot_atomic(path, archive_bytes)
164
+
165
+ # Append to index
166
+ self.append_to_index(rl_run_id, meta)
167
+
168
+ return snapshot_id, str(path), len(archive_bytes)
169
+
170
+ def load_snapshot(
171
+ self,
172
+ rl_run_id: str,
173
+ kind: str,
174
+ snapshot_id: str,
175
+ ) -> tuple[Dict[str, Any], Dict[str, Any]]:
176
+ """Load a snapshot and return (state_dict, meta)."""
177
+ path = self.get_snapshot_path(rl_run_id, kind, snapshot_id)
178
+
179
+ if not path.exists():
180
+ raise FileNotFoundError(f"Snapshot not found: {path}")
181
+
182
+ with open(path, "rb") as f:
183
+ archive_bytes = f.read()
184
+
185
+ state, meta = self.extract_archive(archive_bytes)
186
+ return state, meta
187
+
188
+ def append_to_index(
189
+ self,
190
+ rl_run_id: str,
191
+ meta: Dict[str, Any],
192
+ ) -> None:
193
+ """Append metadata to the run's index file."""
194
+ index_path = self.get_index_path(rl_run_id)
195
+ index_path.parent.mkdir(parents=True, exist_ok=True)
196
+
197
+ with open(index_path, "a") as f:
198
+ f.write(json.dumps(meta) + "\n")
199
+
200
+ def read_index(self, rl_run_id: str) -> list[Dict[str, Any]]:
201
+ """Read all entries from a run's index file."""
202
+ index_path = self.get_index_path(rl_run_id)
203
+
204
+ if not index_path.exists():
205
+ return []
206
+
207
+ entries = []
208
+ with open(index_path, "r") as f:
209
+ for line in f:
210
+ if line.strip():
211
+ entries.append(json.loads(line))
212
+
213
+ return entries
214
+
215
+
216
+ # Global storage instance
217
+ storage = VolumeStorage()
@@ -0,0 +1,160 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Smoke test for Wordle and Sokoban ReAct agents using the hosted service.
4
+
5
+ Prereqs:
6
+ - Run the service: python backend/app/services/rl/online/synth_envs_hosted/main.py
7
+ - Run an OpenAI-compatible inference server (e.g., Flash/vLLM) at VLLM_BASE_URL
8
+ that serves model "gpt-5-nano" or adjust MODEL below.
9
+
10
+ This script will:
11
+ - Create a Wordle/Sokoban env
12
+ - Create corresponding *-react policy with tools
13
+ - Ask the policy for tool_calls via /policy/step (which calls the model)
14
+ - Apply tool_calls to the env via /env/step
15
+ """
16
+
17
+ import asyncio
18
+ import os
19
+ import httpx
20
+
21
+ BASE_URL = os.environ.get("SYNTH_ENVS_HOSTED_URL", "http://localhost:8000")
22
+ INFER_URL = os.environ.get("VLLM_BASE_URL", "http://localhost:8001")
23
+ MODEL = os.environ.get("MODEL", "gpt-5-nano")
24
+
25
+
26
+ async def run_wordle(rounds: int = 3) -> None:
27
+ async with httpx.AsyncClient() as client:
28
+ # Create env
29
+ resp = await client.post(
30
+ f"{BASE_URL}/env/create",
31
+ json={
32
+ "env_name": "Wordle",
33
+ "config": {"word_length": 5, "max_guesses": 6},
34
+ "seed": 0,
35
+ "rl_run_id": "agents-smoke",
36
+ },
37
+ )
38
+ resp.raise_for_status()
39
+ data = resp.json()
40
+ env_id = data["env_id"]
41
+ obs = data["observation"]
42
+ print("Wordle env created:", env_id)
43
+
44
+ # Create policy
45
+ resp = await client.post(
46
+ f"{BASE_URL}/policy/create",
47
+ json={
48
+ "policy_name": "wordle-react",
49
+ "config": {
50
+ "inference_url": INFER_URL,
51
+ "model": MODEL,
52
+ "use_tools": True,
53
+ "word_length": 5,
54
+ "max_guesses": 6,
55
+ },
56
+ "rl_run_id": "agents-smoke",
57
+ "bound_env_id": env_id,
58
+ },
59
+ )
60
+ resp.raise_for_status()
61
+ policy_id = resp.json()["policy_id"]
62
+ print("Wordle policy:", policy_id)
63
+
64
+ # Loop a few rounds
65
+ for i in range(rounds):
66
+ print(f"[Wordle] Round {i + 1}")
67
+ step_req = {"policy_id": policy_id, "observation": obs, "dry_run": False}
68
+ resp = await client.post(f"{BASE_URL}/policy/step", json=step_req)
69
+ resp.raise_for_status()
70
+ step_out = resp.json()
71
+ tool_calls = step_out.get("tool_calls", [])
72
+ print(" tool_calls:", tool_calls)
73
+ if not tool_calls:
74
+ break
75
+ resp = await client.post(
76
+ f"{BASE_URL}/env/step",
77
+ json={"env_id": env_id, "tool_calls": tool_calls},
78
+ )
79
+ resp.raise_for_status()
80
+ env_step = resp.json()
81
+ obs = env_step["observation"]
82
+ print(" done:", env_step.get("done"), "reward:", env_step.get("reward"))
83
+ if env_step.get("done"):
84
+ break
85
+
86
+
87
+ async def run_sokoban(rounds: int = 3) -> None:
88
+ async with httpx.AsyncClient() as client:
89
+ # Create env (no initial_state provided; relies on env default)
90
+ resp = await client.post(
91
+ f"{BASE_URL}/env/create",
92
+ json={
93
+ "env_name": "Sokoban",
94
+ "config": {"difficulty": "easy"},
95
+ "seed": 0,
96
+ "rl_run_id": "agents-smoke",
97
+ },
98
+ )
99
+ if resp.status_code != 200:
100
+ print("Sokoban create failed:", resp.status_code, resp.text)
101
+ return
102
+ data = resp.json()
103
+ env_id = data["env_id"]
104
+ obs = data["observation"]
105
+ print("Sokoban env created:", env_id)
106
+
107
+ resp = await client.post(
108
+ f"{BASE_URL}/policy/create",
109
+ json={
110
+ "policy_name": "sokoban-react",
111
+ "config": {
112
+ "inference_url": INFER_URL,
113
+ "model": MODEL,
114
+ "use_tools": True,
115
+ },
116
+ "rl_run_id": "agents-smoke",
117
+ "bound_env_id": env_id,
118
+ },
119
+ )
120
+ if resp.status_code != 200:
121
+ print("Sokoban policy create failed:", resp.status_code, resp.text)
122
+ return
123
+ policy_id = resp.json()["policy_id"]
124
+ print("Sokoban policy:", policy_id)
125
+
126
+ for i in range(rounds):
127
+ print(f"[Sokoban] Round {i + 1}")
128
+ step_req = {"policy_id": policy_id, "observation": obs, "dry_run": False}
129
+ resp = await client.post(f"{BASE_URL}/policy/step", json=step_req)
130
+ if resp.status_code != 200:
131
+ print(" policy step failed:", resp.status_code, resp.text)
132
+ break
133
+ step_out = resp.json()
134
+ tool_calls = step_out.get("tool_calls", [])
135
+ print(" tool_calls:", tool_calls)
136
+ if not tool_calls:
137
+ break
138
+ resp = await client.post(
139
+ f"{BASE_URL}/env/step",
140
+ json={"env_id": env_id, "tool_calls": tool_calls},
141
+ )
142
+ if resp.status_code != 200:
143
+ print(" env step failed:", resp.status_code, resp.text)
144
+ break
145
+ env_step = resp.json()
146
+ obs = env_step["observation"]
147
+ print(" done:", env_step.get("done"), "reward:", env_step.get("reward"))
148
+ if env_step.get("done"):
149
+ break
150
+
151
+
152
+ async def main():
153
+ print("Testing Wordle agent with model:", MODEL)
154
+ await run_wordle(rounds=3)
155
+ print("\nTesting Sokoban agent with model:", MODEL)
156
+ await run_sokoban(rounds=3)
157
+
158
+
159
+ if __name__ == "__main__":
160
+ asyncio.run(main())
@@ -0,0 +1,146 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple test script for the GRPO Synth Envs Hosted Service.
4
+
5
+ Run this after starting the service with:
6
+ python main.py
7
+ """
8
+
9
+ import asyncio
10
+ import json
11
+ import httpx
12
+
13
+
14
+ async def test_service():
15
+ """Test basic service functionality."""
16
+ base_url = "http://localhost:8000"
17
+
18
+ async with httpx.AsyncClient() as client:
19
+ # Test 1: Service info
20
+ print("1. Testing /info endpoint...")
21
+ response = await client.get(f"{base_url}/info")
22
+ assert response.status_code == 200
23
+ info = response.json()
24
+ print(f" Service info: {json.dumps(info, indent=2)}")
25
+
26
+ # Test 2: Health check
27
+ print("\n2. Testing /health endpoint...")
28
+ response = await client.get(f"{base_url}/health")
29
+ assert response.status_code == 200
30
+ print(f" Health: {response.json()}")
31
+
32
+ # Test 3: Create environment
33
+ print("\n3. Creating environment...")
34
+ response = await client.post(
35
+ f"{base_url}/env/create",
36
+ json={
37
+ "env_name": "crafter",
38
+ "config": {},
39
+ "seed": 42,
40
+ "rl_run_id": "test-run-001",
41
+ },
42
+ )
43
+ if response.status_code != 200:
44
+ print(f" Error: {response.status_code} - {response.text}")
45
+ return
46
+ env_data = response.json()
47
+ env_id = env_data["env_id"]
48
+ print(f" Created env: {env_id}")
49
+ print(f" Initial observation keys: {list(env_data['observation'].keys())}")
50
+
51
+ # Test 4: Create policy
52
+ print("\n4. Creating policy...")
53
+ response = await client.post(
54
+ f"{base_url}/policy/create",
55
+ json={
56
+ "policy_name": "crafter-react",
57
+ "config": {
58
+ "inference_url": "http://localhost:8001",
59
+ "model": "test-model",
60
+ },
61
+ "rl_run_id": "test-run-001",
62
+ "bound_env_id": env_id,
63
+ },
64
+ )
65
+ if response.status_code != 200:
66
+ print(f" Error: {response.status_code} - {response.text}")
67
+ return
68
+ policy_data = response.json()
69
+ policy_id = policy_data["policy_id"]
70
+ print(f" Created policy: {policy_id}")
71
+
72
+ # Test 5: Environment step with dummy tool calls
73
+ print("\n5. Testing environment step...")
74
+ response = await client.post(
75
+ f"{base_url}/env/step",
76
+ json={
77
+ "env_id": env_id,
78
+ "tool_calls": [{"tool": "interact", "args": {"action": "move_left"}}],
79
+ },
80
+ )
81
+ if response.status_code != 200:
82
+ print(f" Error: {response.status_code} - {response.text}")
83
+ else:
84
+ step_data = response.json()
85
+ print(
86
+ f" Step result - done: {step_data['done']}, reward: {step_data.get('reward')}"
87
+ )
88
+
89
+ # Test 6: Environment snapshot
90
+ print("\n6. Creating environment snapshot...")
91
+ response = await client.post(
92
+ f"{base_url}/env/snapshot", json={"env_id": env_id}
93
+ )
94
+ if response.status_code != 200:
95
+ print(f" Error: {response.status_code} - {response.text}")
96
+ else:
97
+ snapshot_data = response.json()
98
+ print(f" Snapshot ID: {snapshot_data['snapshot_id']}")
99
+ print(f" Size: {snapshot_data['size']} bytes")
100
+
101
+ # Test 7: Policy snapshot
102
+ print("\n7. Creating policy snapshot...")
103
+ response = await client.post(
104
+ f"{base_url}/policy/snapshot", json={"policy_id": policy_id}
105
+ )
106
+ if response.status_code != 200:
107
+ print(f" Error: {response.status_code} - {response.text}")
108
+ else:
109
+ snapshot_data = response.json()
110
+ print(f" Snapshot ID: {snapshot_data['snapshot_id']}")
111
+ print(f" Size: {snapshot_data['size']} bytes")
112
+
113
+ # Test 8: Run status
114
+ print("\n8. Testing run status...")
115
+ response = await client.get(f"{base_url}/run/status/test-run-001")
116
+ if response.status_code != 200:
117
+ print(f" Error: {response.status_code} - {response.text}")
118
+ else:
119
+ status_data = response.json()
120
+ print(f" Run status: {status_data['status']}")
121
+
122
+ # Test 9: Terminate environment
123
+ print("\n9. Terminating environment...")
124
+ response = await client.post(
125
+ f"{base_url}/env/terminate", json={"env_id": env_id}
126
+ )
127
+ if response.status_code != 200:
128
+ print(f" Error: {response.status_code} - {response.text}")
129
+ else:
130
+ print(f" Environment terminated: {response.json()['ok']}")
131
+
132
+ # Test 10: Terminate policy
133
+ print("\n10. Terminating policy...")
134
+ response = await client.post(
135
+ f"{base_url}/policy/terminate", json={"policy_id": policy_id}
136
+ )
137
+ if response.status_code != 200:
138
+ print(f" Error: {response.status_code} - {response.text}")
139
+ else:
140
+ print(f" Policy terminated: {response.json()['ok']}")
141
+
142
+ print("\n✅ All basic tests completed!")
143
+
144
+
145
+ if __name__ == "__main__":
146
+ asyncio.run(test_service())
@@ -0,0 +1,61 @@
1
+ """Utility functions for the task service."""
2
+
3
+ import numpy as np
4
+ from typing import Any, Dict, List, Union
5
+
6
+
7
+ def convert_numpy_to_python(obj: Any) -> Any:
8
+ """
9
+ Recursively convert numpy types to Python native types for JSON serialization.
10
+
11
+ Args:
12
+ obj: Object that may contain numpy types
13
+
14
+ Returns:
15
+ Object with numpy types converted to Python native types
16
+ """
17
+ if isinstance(obj, np.integer):
18
+ return int(obj)
19
+ elif isinstance(obj, np.floating):
20
+ return float(obj)
21
+ elif isinstance(obj, np.ndarray):
22
+ return obj.tolist()
23
+ elif isinstance(obj, dict):
24
+ return {key: convert_numpy_to_python(value) for key, value in obj.items()}
25
+ elif isinstance(obj, (list, tuple)):
26
+ return [convert_numpy_to_python(item) for item in obj]
27
+ else:
28
+ return obj
29
+
30
+
31
+ def sanitize_observation(observation: Dict[str, Any]) -> Dict[str, Any]:
32
+ """
33
+ Sanitize observation data for JSON serialization.
34
+
35
+ Converts numpy types and removes non-serializable objects.
36
+
37
+ Args:
38
+ observation: Raw observation from environment
39
+
40
+ Returns:
41
+ Sanitized observation safe for JSON serialization
42
+ """
43
+ if not isinstance(observation, dict):
44
+ return observation
45
+
46
+ sanitized = {}
47
+ for key, value in observation.items():
48
+ # Skip non-serializable keys or convert them
49
+ if key in ["semantic_map", "world_material_map", "observation_image"]:
50
+ # These are likely numpy arrays - convert to lists or skip
51
+ if isinstance(value, np.ndarray):
52
+ # For large arrays, we might want to skip or compress
53
+ # For now, skip them as they're likely debug info
54
+ continue
55
+ elif key == "player_position" and isinstance(value, tuple):
56
+ # Convert tuple with potential numpy types
57
+ sanitized[key] = [convert_numpy_to_python(v) for v in value]
58
+ else:
59
+ sanitized[key] = convert_numpy_to_python(value)
60
+
61
+ return sanitized
@@ -18,17 +18,15 @@ class ConfigCandidate:
18
18
 
19
19
 
20
20
  def _iter_candidate_paths() -> Iterable[Path]:
21
- # Prefer explicit config directories first
22
- preferred = [
23
- REPO_ROOT / "configs",
24
- REPO_ROOT / "examples",
25
- REPO_ROOT / "training",
26
- ]
27
21
  seen: set[Path] = set()
28
- for base in preferred:
29
- if not base.exists():
30
- continue
31
- for path in base.rglob("*.toml"):
22
+
23
+ # Prioritize current working directory first
24
+ try:
25
+ cwd = Path.cwd().resolve()
26
+ except Exception:
27
+ cwd = None
28
+ if cwd and cwd.exists():
29
+ for path in cwd.rglob("*.toml"):
32
30
  if any(part in _SKIP_DIRS for part in path.parts):
33
31
  continue
34
32
  resolved = path.resolve()
@@ -37,14 +35,16 @@ def _iter_candidate_paths() -> Iterable[Path]:
37
35
  seen.add(resolved)
38
36
  yield resolved
39
37
 
40
- # Additionally, discover configs anywhere under the current working directory
41
- # so users can run `uvx synth-ai train` from project roots without passing --config.
42
- try:
43
- cwd = Path.cwd().resolve()
44
- except Exception:
45
- cwd = None
46
- if cwd and cwd.exists():
47
- for path in cwd.rglob("*.toml"):
38
+ # Then look in explicit config directories
39
+ preferred = [
40
+ REPO_ROOT / "configs",
41
+ REPO_ROOT / "examples",
42
+ REPO_ROOT / "training",
43
+ ]
44
+ for base in preferred:
45
+ if not base.exists():
46
+ continue
47
+ for path in base.rglob("*.toml"):
48
48
  if any(part in _SKIP_DIRS for part in path.parts):
49
49
  continue
50
50
  resolved = path.resolve()
@@ -55,17 +55,43 @@ class EnvResolver:
55
55
 
56
56
  def _collect_default_candidates(config_path: Path | None) -> list[Path]:
57
57
  candidates: list[Path] = []
58
+ cwd = Path.cwd()
59
+
60
+ # Prioritize CWD env files
61
+ cwd_env = cwd / ".env"
62
+ if cwd_env.exists():
63
+ candidates.append(cwd_env.resolve())
64
+
65
+ # Search for additional .env files in CWD subdirectories
66
+ for sub in cwd.glob("**/.env"):
67
+ try:
68
+ resolved = sub.resolve()
69
+ except Exception:
70
+ continue
71
+ if resolved in candidates:
72
+ continue
73
+ # avoid nested venv caches
74
+ if any(part in {".venv", "node_modules", "__pycache__"} for part in resolved.parts):
75
+ continue
76
+ if len(candidates) >= 20:
77
+ break
78
+ candidates.append(resolved)
79
+
80
+ # Then config path env file
58
81
  if config_path:
59
82
  cfg_env = config_path.parent / ".env"
60
83
  if cfg_env.exists():
61
84
  candidates.append(cfg_env.resolve())
85
+
86
+ # Then repo env files
62
87
  repo_env = REPO_ROOT / ".env"
63
88
  if repo_env.exists():
64
89
  candidates.append(repo_env.resolve())
65
90
  examples_env = REPO_ROOT / "examples" / ".env"
66
91
  if examples_env.exists():
67
92
  candidates.append(examples_env.resolve())
68
- # Search shallow depth for additional .env files
93
+
94
+ # Search shallow depth for additional .env files in examples
69
95
  for sub in (REPO_ROOT / "examples").glob("**/.env"):
70
96
  try:
71
97
  resolved = sub.resolve()
@@ -79,6 +105,7 @@ def _collect_default_candidates(config_path: Path | None) -> list[Path]:
79
105
  if len(candidates) >= 20:
80
106
  break
81
107
  candidates.append(resolved)
108
+
82
109
  deduped: list[Path] = []
83
110
  for path in candidates:
84
111
  if path not in deduped: