synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (236) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
  4. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  5. examples/multi_step/convert_traces_to_sft.py +84 -0
  6. examples/multi_step/run_sft_qwen30b.sh +45 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
  8. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  9. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  10. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  11. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  12. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  13. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  14. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  15. examples/qwen_vl/QUICKSTART.md +327 -0
  16. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  17. examples/qwen_vl/README.md +154 -0
  18. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  19. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  20. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  21. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  22. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  23. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  24. examples/qwen_vl/__init__.py +2 -0
  25. examples/qwen_vl/collect_data_via_cli.md +423 -0
  26. examples/qwen_vl/collect_vision_traces.py +368 -0
  27. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  28. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  29. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  30. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  31. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  32. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  33. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  34. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  35. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  36. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  37. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  38. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  39. examples/qwen_vl/run_vision_comparison.sh +62 -0
  40. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  41. examples/qwen_vl/test_image_validation.py +201 -0
  42. examples/qwen_vl/test_sft_vision_data.py +110 -0
  43. examples/rl/README.md +1 -1
  44. examples/rl/configs/eval_base_qwen.toml +17 -0
  45. examples/rl/configs/eval_rl_qwen.toml +13 -0
  46. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  47. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  48. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  49. examples/rl/run_eval.py +436 -0
  50. examples/rl/run_rl_and_save.py +111 -0
  51. examples/rl/task_app/README.md +22 -0
  52. examples/rl/task_app/math_single_step.py +990 -0
  53. examples/rl/task_app/math_task_app.py +111 -0
  54. examples/sft/README.md +5 -5
  55. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  56. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  57. examples/sft/evaluate.py +2 -4
  58. examples/sft/export_dataset.py +7 -4
  59. examples/swe/task_app/README.md +1 -1
  60. examples/swe/task_app/grpo_swe_mini.py +0 -1
  61. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  62. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  63. examples/swe/task_app/hosted/policy_routes.py +0 -2
  64. examples/swe/task_app/hosted/rollout.py +0 -8
  65. examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
  66. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
  67. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
  68. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
  69. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
  70. examples/task_apps/enron/__init__.py +1 -0
  71. examples/vlm/README.md +3 -3
  72. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  73. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  74. examples/vlm/filter_image_rows.py +1 -1
  75. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  76. examples/warming_up_to_rl/_utils.py +92 -0
  77. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  78. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  79. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  80. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  81. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  82. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  83. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  84. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  85. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  86. examples/warming_up_to_rl/readme.md +63 -132
  87. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  88. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  89. examples/warming_up_to_rl/task_app/README.md +42 -0
  90. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  91. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  111. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  112. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  113. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  114. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  115. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  116. synth_ai/__init__.py +44 -30
  117. synth_ai/_utils/__init__.py +47 -0
  118. synth_ai/_utils/base_url.py +10 -0
  119. synth_ai/_utils/http.py +10 -0
  120. synth_ai/_utils/prompts.py +10 -0
  121. synth_ai/_utils/task_app_state.py +12 -0
  122. synth_ai/_utils/user_config.py +10 -0
  123. synth_ai/api/models/supported.py +144 -7
  124. synth_ai/api/train/__init__.py +13 -1
  125. synth_ai/api/train/cli.py +30 -7
  126. synth_ai/api/train/config_finder.py +18 -11
  127. synth_ai/api/train/env_resolver.py +13 -10
  128. synth_ai/cli/__init__.py +62 -78
  129. synth_ai/cli/_modal_wrapper.py +7 -5
  130. synth_ai/cli/_typer_patch.py +0 -2
  131. synth_ai/cli/_validate_task_app.py +22 -4
  132. synth_ai/cli/legacy_root_backup.py +3 -1
  133. synth_ai/cli/lib/__init__.py +10 -0
  134. synth_ai/cli/lib/task_app_discovery.py +7 -0
  135. synth_ai/cli/lib/task_app_env.py +518 -0
  136. synth_ai/cli/recent.py +2 -1
  137. synth_ai/cli/setup.py +266 -0
  138. synth_ai/cli/status.py +1 -1
  139. synth_ai/cli/task_app_deploy.py +16 -0
  140. synth_ai/cli/task_app_list.py +25 -0
  141. synth_ai/cli/task_app_modal_serve.py +16 -0
  142. synth_ai/cli/task_app_serve.py +18 -0
  143. synth_ai/cli/task_apps.py +71 -31
  144. synth_ai/cli/traces.py +1 -1
  145. synth_ai/cli/train.py +18 -0
  146. synth_ai/cli/tui.py +7 -2
  147. synth_ai/cli/turso.py +1 -1
  148. synth_ai/cli/watch.py +1 -1
  149. synth_ai/demos/__init__.py +10 -0
  150. synth_ai/demos/core/__init__.py +28 -1
  151. synth_ai/demos/crafter/__init__.py +1 -0
  152. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  153. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  154. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  155. synth_ai/demos/demo_registry.py +176 -0
  156. synth_ai/demos/math/__init__.py +1 -0
  157. synth_ai/demos/math/_common.py +16 -0
  158. synth_ai/demos/math/app.py +38 -0
  159. synth_ai/demos/math/config.toml +76 -0
  160. synth_ai/demos/math/deploy_modal.py +54 -0
  161. synth_ai/demos/math/modal_task_app.py +702 -0
  162. synth_ai/demos/math/task_app_entry.py +51 -0
  163. synth_ai/environments/environment/core.py +7 -1
  164. synth_ai/environments/examples/bandit/engine.py +0 -1
  165. synth_ai/environments/examples/bandit/environment.py +0 -1
  166. synth_ai/environments/examples/wordle/environment.py +0 -1
  167. synth_ai/evals/base.py +16 -5
  168. synth_ai/evals/client.py +1 -1
  169. synth_ai/inference/client.py +1 -1
  170. synth_ai/judge_schemas.py +8 -8
  171. synth_ai/learning/client.py +1 -1
  172. synth_ai/learning/health.py +1 -1
  173. synth_ai/learning/jobs.py +1 -1
  174. synth_ai/learning/rl/client.py +1 -1
  175. synth_ai/learning/rl/env_keys.py +1 -1
  176. synth_ai/learning/rl/secrets.py +1 -1
  177. synth_ai/learning/sft/client.py +1 -1
  178. synth_ai/learning/sft/data.py +407 -4
  179. synth_ai/learning/validators.py +4 -1
  180. synth_ai/task/apps/__init__.py +4 -2
  181. synth_ai/task/config.py +6 -4
  182. synth_ai/task/rubrics/__init__.py +1 -2
  183. synth_ai/task/rubrics/loaders.py +14 -10
  184. synth_ai/task/rubrics.py +219 -0
  185. synth_ai/task/trace_correlation_helpers.py +24 -11
  186. synth_ai/task/tracing_utils.py +14 -3
  187. synth_ai/task/validators.py +2 -3
  188. synth_ai/tracing_v3/abstractions.py +3 -3
  189. synth_ai/tracing_v3/config.py +15 -13
  190. synth_ai/tracing_v3/constants.py +21 -0
  191. synth_ai/tracing_v3/db_config.py +3 -1
  192. synth_ai/tracing_v3/decorators.py +10 -7
  193. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  194. synth_ai/tracing_v3/session_tracer.py +7 -7
  195. synth_ai/tracing_v3/storage/base.py +29 -29
  196. synth_ai/tracing_v3/storage/config.py +3 -3
  197. synth_ai/tracing_v3/turso/daemon.py +8 -9
  198. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  199. synth_ai/tracing_v3/utils.py +2 -2
  200. synth_ai/tui/cli/query_experiments.py +4 -4
  201. synth_ai/tui/cli/query_experiments_v3.py +4 -4
  202. synth_ai/tui/dashboard.py +14 -9
  203. synth_ai/utils/__init__.py +101 -0
  204. synth_ai/utils/base_url.py +94 -0
  205. synth_ai/utils/cli.py +131 -0
  206. synth_ai/utils/env.py +287 -0
  207. synth_ai/utils/http.py +169 -0
  208. synth_ai/utils/modal.py +308 -0
  209. synth_ai/utils/process.py +212 -0
  210. synth_ai/utils/prompts.py +39 -0
  211. synth_ai/utils/sqld.py +122 -0
  212. synth_ai/utils/task_app_discovery.py +882 -0
  213. synth_ai/utils/task_app_env.py +186 -0
  214. synth_ai/utils/task_app_state.py +318 -0
  215. synth_ai/utils/user_config.py +137 -0
  216. synth_ai/v0/config/__init__.py +1 -5
  217. synth_ai/v0/config/base_url.py +1 -7
  218. synth_ai/v0/tracing/config.py +1 -1
  219. synth_ai/v0/tracing/decorators.py +1 -1
  220. synth_ai/v0/tracing/upload.py +1 -1
  221. synth_ai/v0/tracing_v1/config.py +1 -1
  222. synth_ai/v0/tracing_v1/decorators.py +1 -1
  223. synth_ai/v0/tracing_v1/upload.py +1 -1
  224. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  225. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
  226. synth_ai/cli/man.py +0 -106
  227. synth_ai/compound/cais.py +0 -0
  228. synth_ai/core/experiment.py +0 -13
  229. synth_ai/core/system.py +0 -15
  230. synth_ai/demo_registry.py +0 -295
  231. synth_ai/handshake.py +0 -109
  232. synth_ai/http.py +0 -26
  233. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  234. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  235. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  236. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,305 @@
1
+ """Shared utilities for Crafter environment and policy.
2
+
3
+ This module formats Crafter observations for the LLM and parses actions.
4
+ It now mirrors the ludic_private implementation for semantic map rendering
5
+ by dynamically deriving the id->name mapping from the actual Crafter env
6
+ when available, with a sensible fallback. This fixes the issue where the
7
+ rendered surroundings appeared only as iron/stone due to a mismatched
8
+ hardcoded mapping.
9
+ """
10
+
11
+ import itertools
12
+ import re
13
+ from typing import Any
14
+
15
+ import numpy as np
16
+
17
+ VIEW_SIZE = 5 # Default view size for the map (match eval_rollout_table)
18
+
19
+ # Action mappings from the game
20
+ CRAFTER_ACTIONS = {
21
+ "noop": 0,
22
+ "move_left": 1,
23
+ "move_right": 2,
24
+ "move_up": 3,
25
+ "move_down": 4,
26
+ "do": 5,
27
+ "sleep": 6,
28
+ "place_stone": 7,
29
+ "place_table": 8,
30
+ "place_furnace": 9,
31
+ "place_plant": 10,
32
+ "make_wood_pickaxe": 11,
33
+ "make_stone_pickaxe": 12,
34
+ "make_iron_pickaxe": 13,
35
+ "make_wood_sword": 14,
36
+ "make_stone_sword": 15,
37
+ "make_iron_sword": 16,
38
+ }
39
+
40
+ # Common action aliases
41
+ ACTION_ALIASES = {
42
+ # Movement aliases
43
+ "left": "move_left",
44
+ "right": "move_right",
45
+ "up": "move_up",
46
+ "down": "move_down",
47
+ # Interaction aliases
48
+ "interact": "do",
49
+ "use": "do",
50
+ "action": "do",
51
+ # Sleep
52
+ "rest": "sleep",
53
+ # Crafting
54
+ "craft_wood_pickaxe": "make_wood_pickaxe",
55
+ "craft_stone_pickaxe": "make_stone_pickaxe",
56
+ "craft_iron_pickaxe": "make_iron_pickaxe",
57
+ "craft_wood_sword": "make_wood_sword",
58
+ "craft_stone_sword": "make_stone_sword",
59
+ "craft_iron_sword": "make_iron_sword",
60
+ }
61
+
62
+ VALID_PRIMARY_ACTIONS: set[str] = set(CRAFTER_ACTIONS.keys())
63
+ VALID_ACTION_ALIASES: set[str] = set(ACTION_ALIASES.keys())
64
+ ALL_VALID_ACTION_STRINGS: set[str] = VALID_PRIMARY_ACTIONS | VALID_ACTION_ALIASES
65
+
66
+
67
+ def validate_action(action: str) -> bool:
68
+ """Check if an action string is valid."""
69
+ normalized = action.strip().lower().replace(" ", "_")
70
+ return normalized in ALL_VALID_ACTION_STRINGS
71
+
72
+
73
+ def parse_actions(action_text: str) -> list[str]:
74
+ """Extract actions from response text.
75
+
76
+ Tries multiple parsing strategies:
77
+ 1. <action>...</action> tags (original format)
78
+ 2. [action]...[/action] or [action]... format
79
+ 3. ACTION: prefix format
80
+ 4. Plain action names if they match valid actions
81
+ 5. Newline-separated actions
82
+ """
83
+
84
+ # First try the original <action> tag format
85
+ matches = re.findall(r"<action>(.*?)</action>", action_text, re.IGNORECASE)
86
+ if matches:
87
+ return [m.strip() for m in matches if validate_action(m.strip())]
88
+
89
+ # Try [action] format
90
+ matches = re.findall(r"\[action\](.*?)(?:\[/action\]|\n|$)", action_text, re.IGNORECASE)
91
+ if matches:
92
+ return [m.strip() for m in matches if validate_action(m.strip())]
93
+
94
+ # If no tags found, try to parse plain text
95
+ text = action_text.strip()
96
+
97
+ # Check if the entire text is a valid action
98
+ if validate_action(text):
99
+ return [text]
100
+
101
+ # Try splitting by newlines and checking each line
102
+ lines = text.split("\n")
103
+ actions = []
104
+ for line in lines:
105
+ line = line.strip()
106
+
107
+ # Remove various prefixes
108
+ for prefix in ["ACTION:", "Action:", "action:", "ACTION", "-", "*", "•", "**ACTION:**"]:
109
+ if line.startswith(prefix):
110
+ line = line[len(prefix) :].strip()
111
+ break
112
+
113
+ # Also handle numbered lists
114
+ if re.match(r"^\d+\.\s*", line):
115
+ line = re.sub(r"^\d+\.\s*", "", line)
116
+
117
+ # Split by common separators to handle multiple actions on one line
118
+ parts = re.split(r"[,;]|\s+and\s+|\s+then\s+", line)
119
+
120
+ for part in parts:
121
+ part = part.strip()
122
+ # Remove quotes if present
123
+ if part.startswith('"') and part.endswith('"'):
124
+ part = part[1:-1]
125
+ if part.startswith("'") and part.endswith("'"):
126
+ part = part[1:-1]
127
+
128
+ # Check if it's a valid action
129
+ if part and validate_action(part):
130
+ actions.append(part)
131
+
132
+ return actions
133
+
134
+
135
+ def format_observation(obs_data: dict[str, Any], step_count: int = 0, max_steps: int = 100) -> str:
136
+ """Format a Crafter observation dictionary into a human-readable string.
137
+
138
+ This is critical for preventing massive token counts when observations
139
+ contain large numpy arrays or deeply nested structures.
140
+ """
141
+ if not obs_data:
142
+ return ""
143
+
144
+ # Extract key information
145
+ health = obs_data.get("health") or obs_data.get("inventory", {}).get("health", 0)
146
+ inventory_dict = obs_data.get("inventory", {})
147
+ pos = obs_data.get("player_position", [0, 0])
148
+ direction = obs_data.get("player_direction", [0, 1])
149
+ achievements = obs_data.get("achievements_status", {})
150
+
151
+ # Prefer step/max from observation if provided by the env
152
+ step_from_obs = (
153
+ obs_data.get("steps")
154
+ if obs_data.get("steps") is not None
155
+ else obs_data.get("num_steps_taken")
156
+ )
157
+ if isinstance(step_from_obs, int | float) and step_from_obs >= 0:
158
+ step_count = int(step_from_obs)
159
+
160
+ max_steps_from_obs = obs_data.get("max_steps_episode") or obs_data.get("max_steps")
161
+ if isinstance(max_steps_from_obs, int | float) and max_steps_from_obs > 0:
162
+ max_steps = int(max_steps_from_obs)
163
+
164
+ # Format inventory (skip health as it's shown separately)
165
+ inv_items = [f"{k}:{v}" for k, v in inventory_dict.items() if v > 0 and k != "health"]
166
+ inventory_str = ", ".join(inv_items) if inv_items else "empty"
167
+
168
+ # Format achievements
169
+ achieved_list = [k for k, v in achievements.items() if v]
170
+ achievements_str = ", ".join(achieved_list) if achieved_list else "none"
171
+
172
+ # Format semantic map view (simplified version)
173
+ map_view = _format_semantic_map_view(obs_data, VIEW_SIZE)
174
+
175
+ return (
176
+ f"=== CRAFTER GAME STATE ===\n"
177
+ f"Step: {step_count}/{max_steps}\n"
178
+ f"Health: {health}\n"
179
+ f"Position: {pos}\n"
180
+ f"Facing: {direction}\n"
181
+ f"Inventory: {inventory_str}\n"
182
+ f"Achievements: {achievements_str}\n"
183
+ f"{map_view}\n\n"
184
+ f"Choose your next actions.\n"
185
+ )
186
+
187
+
188
+ def _try_build_dynamic_mapping():
189
+ """Attempt to build id->name mapping from a real Crafter env.
190
+
191
+ Returns a list where index is semantic ID and value is the lowercase name.
192
+ On failure (crafter not installed or internal API changed), returns None.
193
+ """
194
+ try:
195
+ import crafter # type: ignore
196
+ except Exception:
197
+ return None
198
+
199
+ dummyenv = None
200
+ try:
201
+ dummyenv = crafter.Env()
202
+ # Combine material IDs and semantic view object IDs
203
+ world_ids = getattr(dummyenv, "_world", None)
204
+ sem_view = getattr(dummyenv, "_sem_view", None)
205
+ if world_ids is None or sem_view is None:
206
+ return None
207
+ mat_ids = getattr(world_ids, "_mat_ids", None)
208
+ obj_ids = getattr(sem_view, "_obj_ids", None)
209
+ if not isinstance(mat_ids, dict) or not isinstance(obj_ids, dict):
210
+ return None
211
+ max_id = max(max(mat_ids.values()), max(obj_ids.values())) + 1
212
+ id_to_item = ["void"] * max_id
213
+ for name, idx in itertools.chain(mat_ids.items(), obj_ids.items()):
214
+ if name is None:
215
+ clean = "none"
216
+ elif hasattr(name, "__name__"):
217
+ clean = name.__name__.lower()
218
+ else:
219
+ clean = str(name).lower()
220
+ if 0 <= idx < len(id_to_item):
221
+ id_to_item[idx] = clean
222
+ return id_to_item
223
+ except Exception:
224
+ return None
225
+ finally:
226
+ try:
227
+ if dummyenv is not None:
228
+ dummyenv.close()
229
+ except Exception:
230
+ pass
231
+
232
+
233
+ # Build dynamic mapping if possible; otherwise fall back to a basic map
234
+ _ID_TO_NAME = _try_build_dynamic_mapping()
235
+ _FALLBACK_ID_TO_NAME = {
236
+ 0: "none", # None from materials
237
+ 1: "water",
238
+ 2: "grass",
239
+ 3: "stone",
240
+ 4: "path",
241
+ 5: "sand",
242
+ 6: "tree",
243
+ 7: "lava",
244
+ 8: "coal",
245
+ 9: "iron",
246
+ 10: "diamond",
247
+ 11: "table",
248
+ 12: "furnace",
249
+ 13: "player",
250
+ 14: "cow",
251
+ 15: "zombie",
252
+ 16: "skeleton",
253
+ 17: "arrow",
254
+ 18: "plant",
255
+ }
256
+
257
+
258
+ def _format_semantic_map_view(obs_data: dict[str, Any], view_size: int = VIEW_SIZE) -> str:
259
+ """Format the semantic map into a text representation using dynamic IDs.
260
+
261
+ Shows a local view around the player with nearby objects.
262
+ """
263
+ semantic_map = obs_data.get("semantic_map")
264
+ player_position = obs_data.get("player_position", [0, 0])
265
+
266
+ if semantic_map is None:
267
+ return "Map view unavailable"
268
+
269
+ # Convert to numpy array if needed
270
+ sem_arr = np.asarray(semantic_map)
271
+ if sem_arr.ndim == 1:
272
+ # Reshape flat array to 2D
273
+ side = int(len(sem_arr) ** 0.5)
274
+ sem_arr = sem_arr.reshape(side, side)
275
+
276
+ px, py = map(int, player_position)
277
+ half = view_size // 2
278
+
279
+ # Choose mapping source
280
+ use_list = isinstance(_ID_TO_NAME, list) and len(_ID_TO_NAME) > 0
281
+
282
+ # Build matrix centered at player, then transpose for human-friendly view
283
+ matrix: list[list[str]] = []
284
+ for dy in range(-half, half + 1):
285
+ row_tokens: list[str] = []
286
+ for dx in range(-half, half + 1):
287
+ x, y = px + dx, py + dy
288
+ if not (0 <= x < sem_arr.shape[0] and 0 <= y < sem_arr.shape[1]):
289
+ row_tokens.append("void")
290
+ elif dx == 0 and dy == 0:
291
+ row_tokens.append("player")
292
+ else:
293
+ obj_id = int(sem_arr[x, y])
294
+ if use_list and 0 <= obj_id < len(_ID_TO_NAME):
295
+ name = _ID_TO_NAME[obj_id] # type: ignore[index]
296
+ else:
297
+ name = _FALLBACK_ID_TO_NAME.get(obj_id, str(obj_id))
298
+ row_tokens.append(name)
299
+ matrix.append(row_tokens)
300
+
301
+ transposed = list(zip(*matrix, strict=False))
302
+ grid_rows: list[str] = [" ".join(row) for row in transposed]
303
+ return (
304
+ "\nLocal Map View (" + str(view_size) + "x" + str(view_size) + "):\n" + "\n".join(grid_rows)
305
+ )
@@ -0,0 +1,47 @@
1
+ """OpenAI tools schema for Crafter, defined in Python."""
2
+
3
+ # Pass this list directly to OpenAI/vLLM `tools=`
4
+ TOOLS_SCHEMA = [
5
+ {
6
+ "type": "function",
7
+ "function": {
8
+ "name": "interact_many",
9
+ "description": "Execute a short sequence of Crafter actions in order (1-8).",
10
+ "parameters": {
11
+ "type": "object",
12
+ "properties": {
13
+ "actions": {
14
+ "type": "array",
15
+ "description": "List of Crafter actions to execute sequentially.",
16
+ "items": {
17
+ "type": "string",
18
+ "enum": [
19
+ "noop",
20
+ "move_left",
21
+ "move_right",
22
+ "move_up",
23
+ "move_down",
24
+ "do",
25
+ "sleep",
26
+ "place_stone",
27
+ "place_table",
28
+ "place_furnace",
29
+ "place_plant",
30
+ "make_wood_pickaxe",
31
+ "make_stone_pickaxe",
32
+ "make_iron_pickaxe",
33
+ "make_wood_sword",
34
+ "make_stone_sword",
35
+ "make_iron_sword",
36
+ ],
37
+ },
38
+ "minItems": 1,
39
+ "maxItems": 8,
40
+ }
41
+ },
42
+ "required": ["actions"],
43
+ "additionalProperties": False,
44
+ },
45
+ },
46
+ }
47
+ ]
@@ -0,0 +1,204 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import os
5
+
6
+ from fastapi import FastAPI
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from fastapi.responses import JSONResponse
9
+ from pydantic import BaseModel
10
+ from starlette.requests import Request
11
+
12
+
13
+ class TaskApp:
14
+ """Holds service configuration and shared state."""
15
+
16
+ def __init__(
17
+ self,
18
+ service_base_url: str | None = None,
19
+ vllm_base_url: str | None = None,
20
+ default_model: str | None = None,
21
+ ) -> None:
22
+ self.service_base_url = service_base_url or os.getenv(
23
+ "SERVICE_BASE_URL", "http://localhost:8000"
24
+ )
25
+ self.vllm_base_url = vllm_base_url or os.getenv("VLLM_BASE_URL", "http://localhost:8001")
26
+ self.default_model = default_model or os.getenv("DEFAULT_MODEL")
27
+
28
+
29
+ class ServiceInfo(BaseModel):
30
+ """Service discovery response."""
31
+
32
+ service: dict
33
+ inference: dict
34
+
35
+
36
+ def create_app(allowed_environments: list[str] = None) -> FastAPI:
37
+ """FastAPI app factory.
38
+
39
+ Args:
40
+ allowed_environments: List of environment names this service is allowed to handle.
41
+ If None, all environments are allowed (for backward compatibility).
42
+ """
43
+ env_filter = f" ({', '.join(allowed_environments)})" if allowed_environments else ""
44
+ app = FastAPI(
45
+ title=f"GRPO Synth Envs Hosted Service{env_filter}",
46
+ description=f"Hosted environment and policy service for GRPO training{env_filter}",
47
+ version="0.1.0",
48
+ )
49
+
50
+ # Add CORS middleware for development
51
+ app.add_middleware(
52
+ CORSMiddleware,
53
+ allow_origins=["*"],
54
+ allow_credentials=True,
55
+ allow_methods=["*"],
56
+ allow_headers=["*"],
57
+ )
58
+
59
+ # Initialize task app configuration
60
+ task_app = TaskApp()
61
+ app.state.task_app = task_app
62
+ app.state.allowed_environments = allowed_environments
63
+
64
+ # Add environment validation middleware
65
+ if allowed_environments:
66
+
67
+ @app.middleware("http")
68
+ async def validate_environment(request, call_next):
69
+ # Check if this is an environment-related request
70
+ path = request.url.path
71
+ if (
72
+ path.startswith("/env/") or path.startswith("/rollout")
73
+ ) and request.method == "POST":
74
+ # We need to read the body to check env_name
75
+ body = await request.body()
76
+ try:
77
+ import json
78
+
79
+ data = json.loads(body) if body else {}
80
+ env_name = data.get("env_name", "").lower()
81
+
82
+ # Check if environment is allowed
83
+ if env_name and env_name not in [e.lower() for e in allowed_environments]:
84
+ from fastapi import HTTPException
85
+
86
+ raise HTTPException(
87
+ status_code=403,
88
+ detail=f"Environment '{env_name}' not allowed. This service only handles: {allowed_environments}",
89
+ )
90
+ except json.JSONDecodeError:
91
+ pass # Invalid JSON, let the endpoint handle it
92
+
93
+ # Recreate request with the body we consumed
94
+ request._body = body
95
+
96
+ response = await call_next(request)
97
+ return response
98
+
99
+ # Mount routers
100
+ from .branching import router as branching_router
101
+ from .environment_routes import router as env_router
102
+ from .rollout import router as rollout_router
103
+
104
+ app.include_router(env_router, prefix="/env", tags=["environment"])
105
+
106
+ # Policy routes are optional; skip if optional envs are missing in this build
107
+ try:
108
+ from .policy_routes import router as policy_router
109
+
110
+ app.include_router(policy_router, prefix="/policy", tags=["policy"])
111
+ except Exception as _e:
112
+ # Log lightweight message; policy endpoints will be unavailable
113
+ with contextlib.suppress(Exception):
114
+ print(f"[hosted_app] Skipping policy routes: {_e}", flush=True)
115
+
116
+ app.include_router(rollout_router, tags=["rollout"])
117
+ app.include_router(branching_router, tags=["branching"])
118
+
119
+ @app.get("/info", response_model=ServiceInfo)
120
+ async def get_info() -> ServiceInfo:
121
+ """Service discovery endpoint."""
122
+ return ServiceInfo(
123
+ service={
124
+ "base_url": task_app.service_base_url,
125
+ "endpoints": {
126
+ "env": "/env/*",
127
+ "policy": "/policy/*",
128
+ "rollout": "/rollout",
129
+ "branch": "/branch",
130
+ "run": "/run/*",
131
+ },
132
+ },
133
+ inference={
134
+ "base_url": task_app.vllm_base_url,
135
+ "endpoints": {
136
+ "chat_completions": "/v1/chat/completions",
137
+ },
138
+ "default_model": task_app.default_model,
139
+ },
140
+ )
141
+
142
+ @app.get("/health")
143
+ async def health_check(request: Request) -> dict:
144
+ """Health and auth sanity check.
145
+
146
+ - Returns 503 if server missing ENVIRONMENT_API_KEY (misconfigured container).
147
+ - If X-API-Key header is provided and mismatches, returns 401.
148
+ - Otherwise returns 200 with basic info.
149
+ """
150
+
151
+ # Check if any environment API keys are configured
152
+ from synth_ai.task.auth import allowed_environment_api_keys
153
+
154
+ allowed_keys = allowed_environment_api_keys()
155
+ if not allowed_keys:
156
+ # Server-side misconfiguration; rollout would fail with 503
157
+ return JSONResponse(
158
+ status_code=503,
159
+ content={
160
+ "status": "unhealthy",
161
+ "detail": "Auth not configured: missing ENVIRONMENT_API_KEY in task service environment",
162
+ },
163
+ )
164
+
165
+ # Authorize using all header variants without typed Header params (avoid 422s)
166
+ from synth_ai.task.auth import is_api_key_header_authorized
167
+
168
+ authorized = is_api_key_header_authorized(request)
169
+ if not authorized:
170
+ # Soft-pass 200 with authorized=False to avoid failing CLI preflight
171
+ primary_key = list(allowed_keys)[0] if allowed_keys else None
172
+ prefix = primary_key[: max(1, len(primary_key) // 2)] if primary_key else None
173
+ content = {"status": "healthy", "authorized": False}
174
+ if prefix:
175
+ content["expected_api_key_prefix"] = prefix
176
+ return JSONResponse(status_code=200, content=content)
177
+ return {
178
+ "status": "healthy",
179
+ "authorized": True,
180
+ "service": {"base_url": task_app.service_base_url},
181
+ }
182
+
183
+ # Log and surface 422 validation errors with header presence
184
+ from fastapi.exceptions import RequestValidationError
185
+
186
+ @app.exception_handler(RequestValidationError)
187
+ async def _on_validation_error(request: Request, exc: RequestValidationError):
188
+ try:
189
+ hdr = request.headers
190
+ snapshot = {
191
+ "path": str(request.url.path),
192
+ "have_x_api_key": bool(hdr.get("x-api-key")),
193
+ "have_x_api_keys": bool(hdr.get("x-api-keys")),
194
+ "have_authorization": bool(hdr.get("authorization")),
195
+ "errors": exc.errors()[:5],
196
+ }
197
+ print("[422] validation", snapshot, flush=True)
198
+ except Exception:
199
+ pass
200
+ return JSONResponse(
201
+ status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
202
+ )
203
+
204
+ return app
@@ -0,0 +1,5 @@
1
+ """Inference module for OpenAI-compatible API clients."""
2
+
3
+ from .openai_client import OpenAIClient, create_inference_client
4
+
5
+ __all__ = ["OpenAIClient", "create_inference_client"]