synth-ai 0.2.4.dev8__py3-none-any.whl → 0.2.4.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (111) hide show
  1. synth_ai/cli/__init__.py +6 -0
  2. synth_ai/cli/demo.py +68 -9
  3. synth_ai/cli/rl_demo.py +137 -0
  4. synth_ai/cli/root.py +65 -0
  5. synth_ai/demos/core/__init__.py +1 -0
  6. synth_ai/demos/core/cli.py +621 -0
  7. synth_ai/demos/demo_task_apps/__init__.py +1 -0
  8. synth_ai/demos/demo_task_apps/core.py +374 -0
  9. synth_ai/demos/demo_task_apps/math/__init__.py +1 -0
  10. synth_ai/demos/demo_task_apps/math/app.py +37 -0
  11. synth_ai/demos/demo_task_apps/math/config.toml +44 -0
  12. synth_ai/demos/demo_task_apps/math/deploy_modal.py +60 -0
  13. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +22 -0
  14. synth_ai/environments/examples/bandit/__init__.py +33 -0
  15. synth_ai/environments/examples/bandit/engine.py +294 -0
  16. synth_ai/environments/examples/bandit/environment.py +194 -0
  17. synth_ai/environments/examples/bandit/taskset.py +200 -0
  18. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +250 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +59 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +724 -0
  26. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +384 -0
  27. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
  28. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
  29. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
  30. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
  31. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
  32. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
  33. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
  34. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
  35. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
  36. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
  37. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +324 -0
  38. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  39. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
  40. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
  41. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
  42. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
  43. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
  44. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
  45. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
  46. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
  47. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
  48. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
  49. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
  50. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +858 -0
  51. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +52 -0
  52. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +874 -0
  53. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  54. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +216 -0
  55. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
  56. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
  57. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
  58. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
  59. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
  60. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
  61. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
  62. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
  63. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
  64. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
  65. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
  66. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
  67. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
  68. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
  69. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
  70. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
  71. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
  72. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
  73. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
  74. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
  75. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
  76. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
  77. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
  78. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
  79. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
  80. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
  81. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
  82. synth_ai/environments/examples/crafter_classic/environment.py +41 -2
  83. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
  84. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
  85. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
  86. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
  87. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
  88. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
  89. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
  90. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
  91. synth_ai/environments/examples/enron/units/keyword_stats.py +112 -0
  92. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  93. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +48 -0
  94. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  95. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +221 -0
  96. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  97. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  98. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +831 -0
  99. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  100. synth_ai/environments/examples/red/units/__init__.py +1 -0
  101. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +899 -0
  102. synth_ai/environments/examples/sokoban/units/astar_common.py +95 -0
  103. synth_ai/environments/service/app.py +8 -0
  104. synth_ai/install_sqld.sh +40 -0
  105. synth_ai-0.2.4.dev9.dist-info/METADATA +91 -0
  106. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/RECORD +110 -11
  107. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/entry_points.txt +1 -0
  108. synth_ai-0.2.4.dev8.dist-info/METADATA +0 -635
  109. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/WHEEL +0 -0
  110. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/licenses/LICENSE +0 -0
  111. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,221 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple MiniGrid evaluation script to generate traces.
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ import os
9
+ import sys
10
+ import uuid
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+
14
+ # Add parent directories to path
15
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent.parent))
16
+
17
+ import base64
18
+ import io
19
+
20
+ import gymnasium as gym
21
+ import minigrid
22
+ import numpy as np
23
+ from minigrid.wrappers import ImgObsWrapper, RGBImgPartialObsWrapper
24
+ from PIL import Image
25
+
26
+
27
+ # Environment setup
28
+ def create_minigrid_env(env_name="MiniGrid-Empty-6x6-v0"):
29
+ """Create a MiniGrid environment with image observations."""
30
+ env = gym.make(env_name)
31
+ # Wrap to get RGB image observations
32
+ env = RGBImgPartialObsWrapper(env)
33
+ env = ImgObsWrapper(env)
34
+ return env
35
+
36
+
37
+ def image_to_base64(image_array):
38
+ """Convert numpy image array to base64 string."""
39
+ # Convert to PIL Image
40
+ img = Image.fromarray(image_array.astype(np.uint8))
41
+ # Save to bytes buffer
42
+ buffer = io.BytesIO()
43
+ img.save(buffer, format="PNG")
44
+ buffer.seek(0)
45
+ # Encode to base64
46
+ img_base64 = base64.b64encode(buffer.read()).decode("utf-8")
47
+ return img_base64
48
+
49
+
50
+ def get_action_name(action_idx):
51
+ """Map action index to name."""
52
+ action_names = {
53
+ 0: "left",
54
+ 1: "right",
55
+ 2: "forward",
56
+ 3: "pickup",
57
+ 4: "drop",
58
+ 5: "toggle",
59
+ 6: "done",
60
+ }
61
+ return action_names.get(action_idx, f"action_{action_idx}")
62
+
63
+
64
+ async def run_simple_minigrid_eval(
65
+ model_name="simple-agent",
66
+ env_name="MiniGrid-Empty-6x6-v0",
67
+ num_episodes=3,
68
+ max_steps=50,
69
+ ):
70
+ """Run a simple evaluation to generate MiniGrid traces."""
71
+
72
+ print(f"\nšŸŽ® Running MiniGrid Evaluation")
73
+ print(f" Environment: {env_name}")
74
+ print(f" Episodes: {num_episodes}")
75
+ print(f" Max steps: {max_steps}")
76
+
77
+ # Create output directory
78
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
79
+ run_id = f"run_{int(datetime.now().timestamp())}"
80
+ output_dir = Path(f"src/evals/minigrid/{run_id}")
81
+ traces_dir = output_dir / "traces"
82
+ traces_dir.mkdir(parents=True, exist_ok=True)
83
+
84
+ results = []
85
+
86
+ for episode in range(num_episodes):
87
+ print(f"\nšŸ“ Episode {episode + 1}/{num_episodes}")
88
+
89
+ # Create environment
90
+ env = create_minigrid_env(env_name)
91
+ obs, info = env.reset()
92
+
93
+ # Initialize trace
94
+ trace_id = str(uuid.uuid4())
95
+ trace_data = {
96
+ "trace": {
97
+ "metadata": {
98
+ "model_name": model_name,
99
+ "env_name": env_name,
100
+ "difficulty": "easy",
101
+ "seed": episode,
102
+ "max_steps": max_steps,
103
+ },
104
+ "partition": [],
105
+ },
106
+ "dataset": {"reward_signals": []},
107
+ }
108
+
109
+ total_reward = 0.0
110
+ done = False
111
+ step = 0
112
+
113
+ while not done and step < max_steps:
114
+ # Simple policy: random actions with bias towards forward
115
+ if np.random.random() < 0.6:
116
+ action = 2 # forward
117
+ else:
118
+ action = env.action_space.sample()
119
+
120
+ # Take action
121
+ next_obs, reward, terminated, truncated, info = env.step(action)
122
+ done = terminated or truncated
123
+ total_reward += reward
124
+
125
+ # Create partition for this step
126
+ partition = {
127
+ "events": [
128
+ {
129
+ "environment_compute_steps": [
130
+ {
131
+ "compute_output": [
132
+ {
133
+ "outputs": {
134
+ "observation": {
135
+ "mission": getattr(
136
+ env.unwrapped,
137
+ "mission",
138
+ "Reach the goal",
139
+ ),
140
+ "image_base64": image_to_base64(
141
+ obs
142
+ if isinstance(obs, np.ndarray)
143
+ else obs["image"]
144
+ ),
145
+ },
146
+ "action": action,
147
+ "reward": float(reward),
148
+ "terminated": terminated,
149
+ "truncated": truncated,
150
+ }
151
+ }
152
+ ]
153
+ }
154
+ ]
155
+ }
156
+ ]
157
+ }
158
+
159
+ trace_data["trace"]["partition"].append(partition)
160
+
161
+ obs = next_obs
162
+ step += 1
163
+
164
+ if done and reward > 0:
165
+ print(f" āœ… Success! Reached goal in {step} steps")
166
+
167
+ if not done:
168
+ print(f" ā° Timeout after {step} steps")
169
+
170
+ # Update trace metadata
171
+ trace_data["trace"]["metadata"]["success"] = reward > 0
172
+ trace_data["trace"]["metadata"]["num_steps"] = step
173
+ trace_data["dataset"]["reward_signals"].append({"reward": float(total_reward)})
174
+
175
+ # Save trace
176
+ trace_file = traces_dir / f"minigrid_trace_{trace_id}.json"
177
+ with open(trace_file, "w") as f:
178
+ json.dump(trace_data, f, indent=2)
179
+
180
+ results.append(
181
+ {
182
+ "trace_id": trace_id,
183
+ "success": reward > 0,
184
+ "steps": step,
185
+ "total_reward": total_reward,
186
+ }
187
+ )
188
+
189
+ print(f" šŸ’¾ Saved trace: {trace_file.name}")
190
+
191
+ # Save evaluation summary
192
+ summary = {
193
+ "run_id": run_id,
194
+ "timestamp": timestamp,
195
+ "environment": env_name,
196
+ "model_name": model_name,
197
+ "num_episodes": num_episodes,
198
+ "results": results,
199
+ "success_rate": sum(1 for r in results if r["success"]) / len(results),
200
+ "avg_steps": sum(r["steps"] for r in results) / len(results),
201
+ "models_evaluated": [model_name],
202
+ "difficulties_evaluated": ["easy"],
203
+ }
204
+
205
+ summary_file = output_dir / "evaluation_summary.json"
206
+ with open(summary_file, "w") as f:
207
+ json.dump(summary, f, indent=2)
208
+
209
+ print(f"\nāœ… Evaluation complete!")
210
+ print(f" Success rate: {summary['success_rate']:.1%}")
211
+ print(f" Average steps: {summary['avg_steps']:.1f}")
212
+ print(f" Output directory: {output_dir}")
213
+
214
+ return summary
215
+
216
+
217
+ if __name__ == "__main__":
218
+ # Run evaluation
219
+ asyncio.run(
220
+ run_simple_minigrid_eval(env_name="MiniGrid-Empty-6x6-v0", num_episodes=3, max_steps=30)
221
+ )