synth-ai 0.2.9.dev3__py3-none-any.whl → 0.2.9.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (107) hide show
  1. examples/analyze_semantic_words.sh +17 -0
  2. examples/common_old/backend.py +21 -0
  3. examples/crafter_debug_render.py +180 -0
  4. examples/evals_old/README.md +98 -0
  5. examples/evals_old/__init__.py +6 -0
  6. examples/evals_old/compare_models.py +1037 -0
  7. examples/evals_old/example_log.md +145 -0
  8. examples/evals_old/run_demo.sh +126 -0
  9. examples/evals_old/trace_analysis.py +270 -0
  10. examples/finetuning_old/_backup_synth_qwen/config.toml +29 -0
  11. examples/finetuning_old/_backup_synth_qwen/example_log.md +324 -0
  12. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +60 -0
  13. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +239 -0
  14. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +109 -0
  15. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +1924 -0
  16. examples/finetuning_old/_backup_synth_qwen/readme.md +49 -0
  17. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +114 -0
  18. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +195 -0
  19. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +118 -0
  20. examples/finetuning_old/synth_qwen_v1/README.md +68 -0
  21. examples/finetuning_old/synth_qwen_v1/filter_traces.py +60 -0
  22. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +239 -0
  23. examples/finetuning_old/synth_qwen_v1/finetune.py +46 -0
  24. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +71 -0
  25. examples/finetuning_old/synth_qwen_v1/infer.py +37 -0
  26. examples/finetuning_old/synth_qwen_v1/poll.py +44 -0
  27. examples/finetuning_old/synth_qwen_v1/prepare_data.py +35 -0
  28. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +109 -0
  29. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +1932 -0
  30. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +207 -0
  31. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +232 -0
  32. examples/finetuning_old/synth_qwen_v1/upload_data.py +34 -0
  33. examples/finetuning_old/synth_qwen_v1/util.py +147 -0
  34. examples/rl/README.md +169 -0
  35. examples/rl/configs/eval_base_qwen.toml +15 -0
  36. examples/rl/configs/eval_rl_qwen.toml +11 -0
  37. examples/rl/configs/rl_from_base_qwen.toml +35 -0
  38. examples/rl/configs/rl_from_base_qwen17.toml +74 -0
  39. examples/rl/configs/rl_from_ft_qwen.toml +35 -0
  40. examples/rl/download_dataset.py +64 -0
  41. examples/rl/run_eval.py +435 -0
  42. examples/rl/run_rl_and_save.py +94 -0
  43. examples/rl/task_app/README.md +22 -0
  44. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +8 -8
  45. examples/rl/task_app/math_task_app.py +107 -0
  46. examples/rl_old/task_app.py +962 -0
  47. examples/run_crafter_demo.sh +10 -0
  48. examples/warming_up_to_rl/analyze_trace_db.py +420 -0
  49. examples/warming_up_to_rl/configs/crafter_fft.toml +48 -0
  50. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  51. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +20 -0
  52. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +13 -0
  53. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +23 -0
  54. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +73 -0
  55. examples/warming_up_to_rl/configs/rl_from_ft.toml +56 -0
  56. examples/warming_up_to_rl/export_trace_sft.py +541 -0
  57. examples/warming_up_to_rl/groq_test.py +88 -0
  58. examples/warming_up_to_rl/manage_secrets.py +127 -0
  59. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  60. examples/warming_up_to_rl/old/notes.md +73 -0
  61. examples/warming_up_to_rl/readme.md +172 -0
  62. examples/warming_up_to_rl/run_eval.py +434 -0
  63. examples/warming_up_to_rl/run_fft_and_save.py +309 -0
  64. examples/warming_up_to_rl/run_local_rollout.py +188 -0
  65. examples/warming_up_to_rl/run_local_rollout_modal.py +160 -0
  66. examples/warming_up_to_rl/run_local_rollout_parallel.py +342 -0
  67. examples/warming_up_to_rl/run_local_rollout_traced.py +372 -0
  68. examples/warming_up_to_rl/run_rl_and_save.py +101 -0
  69. examples/warming_up_to_rl/run_rollout_remote.py +129 -0
  70. examples/warming_up_to_rl/task_app/README.md +38 -0
  71. {synth_ai/task/apps → examples/warming_up_to_rl/task_app}/grpo_crafter.py +7 -7
  72. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +165 -0
  73. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  74. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  75. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +145 -0
  76. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1271 -0
  77. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  78. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +429 -0
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +442 -0
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +96 -0
  83. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +302 -0
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +202 -0
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +512 -0
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +102 -0
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +985 -0
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +197 -0
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1749 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +217 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +160 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +146 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +61 -0
  97. synth_ai/api/train/config_finder.py +18 -18
  98. synth_ai/api/train/env_resolver.py +28 -1
  99. synth_ai/cli/task_apps.py +291 -56
  100. synth_ai/task/apps/__init__.py +54 -13
  101. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/METADATA +1 -1
  102. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/RECORD +106 -13
  103. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/top_level.txt +1 -0
  104. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  105. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/WHEEL +0 -0
  106. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/entry_points.txt +0 -0
  107. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,302 @@
1
+ """Shared utilities for Crafter environment and policy.
2
+
3
+ This module formats Crafter observations for the LLM and parses actions.
4
+ It now mirrors the ludic_private implementation for semantic map rendering
5
+ by dynamically deriving the id->name mapping from the actual Crafter env
6
+ when available, with a sensible fallback. This fixes the issue where the
7
+ rendered surroundings appeared only as iron/stone due to a mismatched
8
+ hardcoded mapping.
9
+ """
10
+
11
+ from typing import Dict, Any, List, Set
12
+ import numpy as np
13
+ import re
14
+ import itertools
15
+
16
+ VIEW_SIZE = 5 # Default view size for the map (match eval_rollout_table)
17
+
18
+ # Action mappings from the game
19
+ CRAFTER_ACTIONS = {
20
+ "noop": 0,
21
+ "move_left": 1,
22
+ "move_right": 2,
23
+ "move_up": 3,
24
+ "move_down": 4,
25
+ "do": 5,
26
+ "sleep": 6,
27
+ "place_stone": 7,
28
+ "place_table": 8,
29
+ "place_furnace": 9,
30
+ "place_plant": 10,
31
+ "make_wood_pickaxe": 11,
32
+ "make_stone_pickaxe": 12,
33
+ "make_iron_pickaxe": 13,
34
+ "make_wood_sword": 14,
35
+ "make_stone_sword": 15,
36
+ "make_iron_sword": 16,
37
+ }
38
+
39
+ # Common action aliases
40
+ ACTION_ALIASES = {
41
+ # Movement aliases
42
+ "left": "move_left",
43
+ "right": "move_right",
44
+ "up": "move_up",
45
+ "down": "move_down",
46
+ # Interaction aliases
47
+ "interact": "do",
48
+ "use": "do",
49
+ "action": "do",
50
+ # Sleep
51
+ "rest": "sleep",
52
+ # Crafting
53
+ "craft_wood_pickaxe": "make_wood_pickaxe",
54
+ "craft_stone_pickaxe": "make_stone_pickaxe",
55
+ "craft_iron_pickaxe": "make_iron_pickaxe",
56
+ "craft_wood_sword": "make_wood_sword",
57
+ "craft_stone_sword": "make_stone_sword",
58
+ "craft_iron_sword": "make_iron_sword",
59
+ }
60
+
61
+ VALID_PRIMARY_ACTIONS: Set[str] = set(CRAFTER_ACTIONS.keys())
62
+ VALID_ACTION_ALIASES: Set[str] = set(ACTION_ALIASES.keys())
63
+ ALL_VALID_ACTION_STRINGS: Set[str] = VALID_PRIMARY_ACTIONS | VALID_ACTION_ALIASES
64
+
65
+
66
+ def validate_action(action: str) -> bool:
67
+ """Check if an action string is valid."""
68
+ normalized = action.strip().lower().replace(" ", "_")
69
+ return normalized in ALL_VALID_ACTION_STRINGS
70
+
71
+
72
+ def parse_actions(action_text: str) -> List[str]:
73
+ """Extract actions from response text.
74
+
75
+ Tries multiple parsing strategies:
76
+ 1. <action>...</action> tags (original format)
77
+ 2. [action]...[/action] or [action]... format
78
+ 3. ACTION: prefix format
79
+ 4. Plain action names if they match valid actions
80
+ 5. Newline-separated actions
81
+ """
82
+ import json
83
+
84
+ # First try the original <action> tag format
85
+ matches = re.findall(r"<action>(.*?)</action>", action_text, re.IGNORECASE)
86
+ if matches:
87
+ return [m.strip() for m in matches if validate_action(m.strip())]
88
+
89
+ # Try [action] format
90
+ matches = re.findall(r"\[action\](.*?)(?:\[/action\]|\n|$)", action_text, re.IGNORECASE)
91
+ if matches:
92
+ return [m.strip() for m in matches if validate_action(m.strip())]
93
+
94
+ # If no tags found, try to parse plain text
95
+ text = action_text.strip()
96
+
97
+ # Check if the entire text is a valid action
98
+ if validate_action(text):
99
+ return [text]
100
+
101
+ # Try splitting by newlines and checking each line
102
+ lines = text.split('\n')
103
+ actions = []
104
+ for line in lines:
105
+ line = line.strip()
106
+
107
+ # Remove various prefixes
108
+ for prefix in ['ACTION:', 'Action:', 'action:', 'ACTION', '-', '*', '•', '**ACTION:**']:
109
+ if line.startswith(prefix):
110
+ line = line[len(prefix):].strip()
111
+ break
112
+
113
+ # Also handle numbered lists
114
+ if re.match(r'^\d+\.\s*', line):
115
+ line = re.sub(r'^\d+\.\s*', '', line)
116
+
117
+ # Split by common separators to handle multiple actions on one line
118
+ parts = re.split(r'[,;]|\s+and\s+|\s+then\s+', line)
119
+
120
+ for part in parts:
121
+ part = part.strip()
122
+ # Remove quotes if present
123
+ if part.startswith('"') and part.endswith('"'):
124
+ part = part[1:-1]
125
+ if part.startswith("'") and part.endswith("'"):
126
+ part = part[1:-1]
127
+
128
+ # Check if it's a valid action
129
+ if part and validate_action(part):
130
+ actions.append(part)
131
+
132
+ return actions
133
+
134
+
135
+ def format_observation(obs_data: Dict[str, Any], step_count: int = 0, max_steps: int = 100) -> str:
136
+ """Format a Crafter observation dictionary into a human-readable string.
137
+
138
+ This is critical for preventing massive token counts when observations
139
+ contain large numpy arrays or deeply nested structures.
140
+ """
141
+ if not obs_data:
142
+ return ""
143
+
144
+ # Extract key information
145
+ health = obs_data.get("health") or obs_data.get("inventory", {}).get("health", 0)
146
+ inventory_dict = obs_data.get("inventory", {})
147
+ pos = obs_data.get("player_position", [0, 0])
148
+ direction = obs_data.get("player_direction", [0, 1])
149
+ achievements = obs_data.get("achievements_status", {})
150
+
151
+ # Prefer step/max from observation if provided by the env
152
+ step_from_obs = (
153
+ obs_data.get("steps")
154
+ if obs_data.get("steps") is not None
155
+ else obs_data.get("num_steps_taken")
156
+ )
157
+ if isinstance(step_from_obs, (int, float)) and step_from_obs >= 0:
158
+ step_count = int(step_from_obs)
159
+
160
+ max_steps_from_obs = obs_data.get("max_steps_episode") or obs_data.get("max_steps")
161
+ if isinstance(max_steps_from_obs, (int, float)) and max_steps_from_obs > 0:
162
+ max_steps = int(max_steps_from_obs)
163
+
164
+ # Format inventory (skip health as it's shown separately)
165
+ inv_items = [f"{k}:{v}" for k, v in inventory_dict.items() if v > 0 and k != "health"]
166
+ inventory_str = ", ".join(inv_items) if inv_items else "empty"
167
+
168
+ # Format achievements
169
+ achieved_list = [k for k, v in achievements.items() if v]
170
+ achievements_str = ", ".join(achieved_list) if achieved_list else "none"
171
+
172
+ # Format semantic map view (simplified version)
173
+ map_view = _format_semantic_map_view(obs_data, VIEW_SIZE)
174
+
175
+ return (
176
+ f"=== CRAFTER GAME STATE ===\n"
177
+ f"Step: {step_count}/{max_steps}\n"
178
+ f"Health: {health}\n"
179
+ f"Position: {pos}\n"
180
+ f"Facing: {direction}\n"
181
+ f"Inventory: {inventory_str}\n"
182
+ f"Achievements: {achievements_str}\n"
183
+ f"{map_view}\n\n"
184
+ f"Choose your next actions.\n"
185
+ )
186
+
187
+ def _try_build_dynamic_mapping():
188
+ """Attempt to build id->name mapping from a real Crafter env.
189
+
190
+ Returns a list where index is semantic ID and value is the lowercase name.
191
+ On failure (crafter not installed or internal API changed), returns None.
192
+ """
193
+ try:
194
+ import crafter # type: ignore
195
+ except Exception:
196
+ return None
197
+
198
+ dummyenv = None
199
+ try:
200
+ dummyenv = crafter.Env()
201
+ # Combine material IDs and semantic view object IDs
202
+ world_ids = getattr(dummyenv, "_world", None)
203
+ sem_view = getattr(dummyenv, "_sem_view", None)
204
+ if world_ids is None or sem_view is None:
205
+ return None
206
+ mat_ids = getattr(world_ids, "_mat_ids", None)
207
+ obj_ids = getattr(sem_view, "_obj_ids", None)
208
+ if not isinstance(mat_ids, dict) or not isinstance(obj_ids, dict):
209
+ return None
210
+ max_id = max(max(mat_ids.values()), max(obj_ids.values())) + 1
211
+ id_to_item = ["void"] * max_id
212
+ for name, idx in itertools.chain(mat_ids.items(), obj_ids.items()):
213
+ if name is None:
214
+ clean = "none"
215
+ elif hasattr(name, "__name__"):
216
+ clean = name.__name__.lower()
217
+ else:
218
+ clean = str(name).lower()
219
+ if 0 <= idx < len(id_to_item):
220
+ id_to_item[idx] = clean
221
+ return id_to_item
222
+ except Exception:
223
+ return None
224
+ finally:
225
+ try:
226
+ if dummyenv is not None:
227
+ dummyenv.close()
228
+ except Exception:
229
+ pass
230
+
231
+
232
+ # Build dynamic mapping if possible; otherwise fall back to a basic map
233
+ _ID_TO_NAME = _try_build_dynamic_mapping()
234
+ _FALLBACK_ID_TO_NAME = {
235
+ 0: "none", # None from materials
236
+ 1: "water",
237
+ 2: "grass",
238
+ 3: "stone",
239
+ 4: "path",
240
+ 5: "sand",
241
+ 6: "tree",
242
+ 7: "lava",
243
+ 8: "coal",
244
+ 9: "iron",
245
+ 10: "diamond",
246
+ 11: "table",
247
+ 12: "furnace",
248
+ 13: "player",
249
+ 14: "cow",
250
+ 15: "zombie",
251
+ 16: "skeleton",
252
+ 17: "arrow",
253
+ 18: "plant",
254
+ }
255
+
256
+
257
+ def _format_semantic_map_view(obs_data: Dict[str, Any], view_size: int = VIEW_SIZE) -> str:
258
+ """Format the semantic map into a text representation using dynamic IDs.
259
+
260
+ Shows a local view around the player with nearby objects.
261
+ """
262
+ semantic_map = obs_data.get("semantic_map")
263
+ player_position = obs_data.get("player_position", [0, 0])
264
+
265
+ if semantic_map is None:
266
+ return "Map view unavailable"
267
+
268
+ # Convert to numpy array if needed
269
+ sem_arr = np.asarray(semantic_map)
270
+ if sem_arr.ndim == 1:
271
+ # Reshape flat array to 2D
272
+ side = int(len(sem_arr) ** 0.5)
273
+ sem_arr = sem_arr.reshape(side, side)
274
+
275
+ px, py = map(int, player_position)
276
+ half = view_size // 2
277
+
278
+ # Choose mapping source
279
+ use_list = isinstance(_ID_TO_NAME, list) and len(_ID_TO_NAME) > 0
280
+
281
+ # Build matrix centered at player, then transpose for human-friendly view
282
+ matrix: List[List[str]] = []
283
+ for dy in range(-half, half + 1):
284
+ row_tokens: List[str] = []
285
+ for dx in range(-half, half + 1):
286
+ x, y = px + dx, py + dy
287
+ if not (0 <= x < sem_arr.shape[0] and 0 <= y < sem_arr.shape[1]):
288
+ row_tokens.append("void")
289
+ elif dx == 0 and dy == 0:
290
+ row_tokens.append("player")
291
+ else:
292
+ obj_id = int(sem_arr[x, y])
293
+ if use_list and 0 <= obj_id < len(_ID_TO_NAME):
294
+ name = _ID_TO_NAME[obj_id] # type: ignore[index]
295
+ else:
296
+ name = _FALLBACK_ID_TO_NAME.get(obj_id, str(obj_id))
297
+ row_tokens.append(name)
298
+ matrix.append(row_tokens)
299
+
300
+ transposed = list(zip(*matrix))
301
+ grid_rows: List[str] = [" ".join(row) for row in transposed]
302
+ return "\nLocal Map View (" + str(view_size) + "x" + str(view_size) + "):\n" + "\n".join(grid_rows)
@@ -0,0 +1,47 @@
1
+ """OpenAI tools schema for Crafter, defined in Python."""
2
+
3
+ # Pass this list directly to OpenAI/vLLM `tools=`
4
+ TOOLS_SCHEMA = [
5
+ {
6
+ "type": "function",
7
+ "function": {
8
+ "name": "interact_many",
9
+ "description": "Execute a short sequence of Crafter actions in order (1-8).",
10
+ "parameters": {
11
+ "type": "object",
12
+ "properties": {
13
+ "actions": {
14
+ "type": "array",
15
+ "description": "List of Crafter actions to execute sequentially.",
16
+ "items": {
17
+ "type": "string",
18
+ "enum": [
19
+ "noop",
20
+ "move_left",
21
+ "move_right",
22
+ "move_up",
23
+ "move_down",
24
+ "do",
25
+ "sleep",
26
+ "place_stone",
27
+ "place_table",
28
+ "place_furnace",
29
+ "place_plant",
30
+ "make_wood_pickaxe",
31
+ "make_stone_pickaxe",
32
+ "make_iron_pickaxe",
33
+ "make_wood_sword",
34
+ "make_stone_sword",
35
+ "make_iron_sword",
36
+ ],
37
+ },
38
+ "minItems": 1,
39
+ "maxItems": 8,
40
+ }
41
+ },
42
+ "required": ["actions"],
43
+ "additionalProperties": False,
44
+ },
45
+ },
46
+ }
47
+ ]
@@ -0,0 +1,202 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Optional
5
+
6
+ from fastapi import FastAPI
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from fastapi.responses import JSONResponse
9
+ from pydantic import BaseModel
10
+ from starlette.requests import Request
11
+
12
+
13
+ class TaskApp:
14
+ """Holds service configuration and shared state."""
15
+
16
+ def __init__(
17
+ self,
18
+ service_base_url: Optional[str] = None,
19
+ vllm_base_url: Optional[str] = None,
20
+ default_model: Optional[str] = None,
21
+ ) -> None:
22
+ self.service_base_url = service_base_url or os.getenv(
23
+ "SERVICE_BASE_URL", "http://localhost:8000"
24
+ )
25
+ self.vllm_base_url = vllm_base_url or os.getenv(
26
+ "VLLM_BASE_URL", "http://localhost:8001"
27
+ )
28
+ self.default_model = default_model or os.getenv("DEFAULT_MODEL")
29
+
30
+
31
+ class ServiceInfo(BaseModel):
32
+ """Service discovery response."""
33
+
34
+ service: dict
35
+ inference: dict
36
+
37
+
38
+ def create_app(allowed_environments: list[str] = None) -> FastAPI:
39
+ """FastAPI app factory.
40
+
41
+ Args:
42
+ allowed_environments: List of environment names this service is allowed to handle.
43
+ If None, all environments are allowed (for backward compatibility).
44
+ """
45
+ env_filter = f" ({', '.join(allowed_environments)})" if allowed_environments else ""
46
+ app = FastAPI(
47
+ title=f"GRPO Synth Envs Hosted Service{env_filter}",
48
+ description=f"Hosted environment and policy service for GRPO training{env_filter}",
49
+ version="0.1.0",
50
+ )
51
+
52
+ # Add CORS middleware for development
53
+ app.add_middleware(
54
+ CORSMiddleware,
55
+ allow_origins=["*"],
56
+ allow_credentials=True,
57
+ allow_methods=["*"],
58
+ allow_headers=["*"],
59
+ )
60
+
61
+ # Initialize task app configuration
62
+ task_app = TaskApp()
63
+ app.state.task_app = task_app
64
+ app.state.allowed_environments = allowed_environments
65
+
66
+ # Add environment validation middleware
67
+ if allowed_environments:
68
+
69
+ @app.middleware("http")
70
+ async def validate_environment(request, call_next):
71
+ # Check if this is an environment-related request
72
+ if request.url.path.startswith("/env/") or request.url.path.startswith(
73
+ "/rollout"
74
+ ):
75
+ # Extract environment name from request body for POST requests
76
+ if request.method == "POST":
77
+ # We need to read the body to check env_name
78
+ body = await request.body()
79
+ try:
80
+ import json
81
+
82
+ data = json.loads(body) if body else {}
83
+ env_name = data.get("env_name", "").lower()
84
+
85
+ # Check if environment is allowed
86
+ if env_name and env_name not in [
87
+ e.lower() for e in allowed_environments
88
+ ]:
89
+ from fastapi import HTTPException
90
+
91
+ raise HTTPException(
92
+ status_code=403,
93
+ detail=f"Environment '{env_name}' not allowed. This service only handles: {allowed_environments}",
94
+ )
95
+ except json.JSONDecodeError:
96
+ pass # Invalid JSON, let the endpoint handle it
97
+
98
+ # Recreate request with the body we consumed
99
+ request._body = body
100
+
101
+ response = await call_next(request)
102
+ return response
103
+
104
+ # Mount routers
105
+ from .environment_routes import router as env_router
106
+ from .rollout import router as rollout_router
107
+ from .branching import router as branching_router
108
+
109
+ app.include_router(env_router, prefix="/env", tags=["environment"])
110
+
111
+ # Policy routes are optional; skip if optional envs are missing in this build
112
+ try:
113
+ from .policy_routes import router as policy_router
114
+ app.include_router(policy_router, prefix="/policy", tags=["policy"])
115
+ except Exception as _e:
116
+ # Log lightweight message; policy endpoints will be unavailable
117
+ try:
118
+ print(f"[hosted_app] Skipping policy routes: {_e}", flush=True)
119
+ except Exception:
120
+ pass
121
+
122
+ app.include_router(rollout_router, tags=["rollout"])
123
+ app.include_router(branching_router, tags=["branching"])
124
+
125
+ @app.get("/info", response_model=ServiceInfo)
126
+ async def get_info() -> ServiceInfo:
127
+ """Service discovery endpoint."""
128
+ return ServiceInfo(
129
+ service={
130
+ "base_url": task_app.service_base_url,
131
+ "endpoints": {
132
+ "env": "/env/*",
133
+ "policy": "/policy/*",
134
+ "rollout": "/rollout",
135
+ "branch": "/branch",
136
+ "run": "/run/*",
137
+ },
138
+ },
139
+ inference={
140
+ "base_url": task_app.vllm_base_url,
141
+ "endpoints": {
142
+ "chat_completions": "/v1/chat/completions",
143
+ },
144
+ "default_model": task_app.default_model,
145
+ },
146
+ )
147
+
148
+ @app.get("/health")
149
+ async def health_check(request: Request) -> dict:
150
+ """Health and auth sanity check.
151
+
152
+ - Returns 503 if server missing ENVIRONMENT_API_KEY (misconfigured container).
153
+ - If X-API-Key header is provided and mismatches, returns 401.
154
+ - Otherwise returns 200 with basic info.
155
+ """
156
+ import os as _os
157
+
158
+ # Check if any environment API keys are configured
159
+ from synth_ai.task.auth import allowed_environment_api_keys
160
+ allowed_keys = allowed_environment_api_keys()
161
+ if not allowed_keys:
162
+ # Server-side misconfiguration; rollout would fail with 503
163
+ return JSONResponse(
164
+ status_code=503,
165
+ content={
166
+ "status": "unhealthy",
167
+ "detail": "Auth not configured: missing ENVIRONMENT_API_KEY in task service environment",
168
+ },
169
+ )
170
+
171
+ # Authorize using all header variants without typed Header params (avoid 422s)
172
+ from synth_ai.task.auth import is_api_key_header_authorized
173
+ authorized = is_api_key_header_authorized(request)
174
+ if not authorized:
175
+ # Soft-pass 200 with authorized=False to avoid failing CLI preflight
176
+ primary_key = list(allowed_keys)[0] if allowed_keys else None
177
+ prefix = (primary_key[: max(1, len(primary_key) // 2)] if primary_key else None)
178
+ content = {"status": "healthy", "authorized": False}
179
+ if prefix:
180
+ content["expected_api_key_prefix"] = prefix
181
+ return JSONResponse(status_code=200, content=content)
182
+ return {"status": "healthy", "authorized": True, "service": {"base_url": task_app.service_base_url}}
183
+
184
+ # Log and surface 422 validation errors with header presence
185
+ from fastapi.exceptions import RequestValidationError
186
+ @app.exception_handler(RequestValidationError)
187
+ async def _on_validation_error(request: Request, exc: RequestValidationError):
188
+ try:
189
+ hdr = request.headers
190
+ snapshot = {
191
+ "path": str(getattr(request, "url").path),
192
+ "have_x_api_key": bool(hdr.get("x-api-key")),
193
+ "have_x_api_keys": bool(hdr.get("x-api-keys")),
194
+ "have_authorization": bool(hdr.get("authorization")),
195
+ "errors": exc.errors()[:5],
196
+ }
197
+ print("[422] validation", snapshot, flush=True)
198
+ except Exception:
199
+ pass
200
+ return JSONResponse(status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]})
201
+
202
+ return app
@@ -0,0 +1,5 @@
1
+ """Inference module for OpenAI-compatible API clients."""
2
+
3
+ from .openai_client import OpenAIClient, create_inference_client
4
+
5
+ __all__ = ["OpenAIClient", "create_inference_client"]