synth-ai 0.2.4.dev7__py3-none-any.whl → 0.2.4.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (154) hide show
  1. synth_ai/__init__.py +1 -1
  2. synth_ai/cli/__init__.py +6 -0
  3. synth_ai/cli/balance.py +3 -15
  4. synth_ai/cli/demo.py +68 -9
  5. synth_ai/cli/rl_demo.py +137 -0
  6. synth_ai/cli/root.py +65 -0
  7. synth_ai/config/base_url.py +47 -0
  8. synth_ai/demos/core/__init__.py +1 -0
  9. synth_ai/demos/core/cli.py +621 -0
  10. synth_ai/demos/demo_task_apps/__init__.py +1 -0
  11. synth_ai/demos/demo_task_apps/core.py +374 -0
  12. synth_ai/demos/demo_task_apps/math/__init__.py +1 -0
  13. synth_ai/demos/demo_task_apps/math/app.py +37 -0
  14. synth_ai/demos/demo_task_apps/math/config.toml +44 -0
  15. synth_ai/demos/demo_task_apps/math/deploy_modal.py +60 -0
  16. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +22 -0
  17. synth_ai/environments/examples/bandit/__init__.py +33 -0
  18. synth_ai/environments/examples/bandit/engine.py +294 -0
  19. synth_ai/environments/examples/bandit/environment.py +194 -0
  20. synth_ai/environments/examples/bandit/taskset.py +200 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +250 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +59 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  26. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
  27. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
  28. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +724 -0
  29. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +384 -0
  30. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
  31. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
  32. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
  33. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
  34. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
  35. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
  36. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
  37. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
  38. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
  39. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
  40. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +324 -0
  41. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  42. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
  43. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
  44. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
  45. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
  46. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
  47. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
  48. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
  49. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
  50. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
  51. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
  52. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
  53. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +858 -0
  54. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +52 -0
  55. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +874 -0
  56. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  57. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +216 -0
  58. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
  59. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
  60. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
  61. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
  62. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
  63. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
  64. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
  65. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
  66. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
  67. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
  68. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
  69. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
  70. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
  71. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
  72. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
  73. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
  74. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
  75. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
  76. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
  77. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
  78. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
  79. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
  80. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
  81. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
  82. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
  83. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
  84. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
  85. synth_ai/environments/examples/crafter_classic/environment.py +41 -2
  86. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
  87. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
  88. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
  89. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
  90. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
  91. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
  92. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
  93. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
  94. synth_ai/environments/examples/enron/units/keyword_stats.py +112 -0
  95. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  96. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +48 -0
  97. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  98. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +221 -0
  99. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  100. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  101. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +831 -0
  102. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  103. synth_ai/environments/examples/red/units/__init__.py +1 -0
  104. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +899 -0
  105. synth_ai/environments/examples/sokoban/units/astar_common.py +95 -0
  106. synth_ai/environments/service/app.py +8 -0
  107. synth_ai/http.py +102 -0
  108. synth_ai/inference/__init__.py +7 -0
  109. synth_ai/inference/client.py +20 -0
  110. synth_ai/install_sqld.sh +40 -0
  111. synth_ai/jobs/client.py +246 -0
  112. synth_ai/learning/__init__.py +24 -0
  113. synth_ai/learning/client.py +149 -0
  114. synth_ai/learning/config.py +43 -0
  115. synth_ai/learning/constants.py +29 -0
  116. synth_ai/learning/ft_client.py +59 -0
  117. synth_ai/learning/health.py +43 -0
  118. synth_ai/learning/jobs.py +205 -0
  119. synth_ai/learning/rl_client.py +256 -0
  120. synth_ai/learning/sse.py +58 -0
  121. synth_ai/learning/validators.py +48 -0
  122. synth_ai/lm/core/main_v3.py +13 -0
  123. synth_ai/lm/core/synth_models.py +48 -0
  124. synth_ai/lm/core/vendor_clients.py +9 -6
  125. synth_ai/lm/vendors/core/openai_api.py +31 -3
  126. synth_ai/lm/vendors/openai_standard.py +45 -14
  127. synth_ai/lm/vendors/supported/custom_endpoint.py +12 -2
  128. synth_ai/lm/vendors/synth_client.py +372 -28
  129. synth_ai/rl/__init__.py +30 -0
  130. synth_ai/rl/contracts.py +32 -0
  131. synth_ai/rl/env_keys.py +137 -0
  132. synth_ai/rl/secrets.py +19 -0
  133. synth_ai/scripts/verify_rewards.py +100 -0
  134. synth_ai/task/__init__.py +10 -0
  135. synth_ai/task/contracts.py +120 -0
  136. synth_ai/task/health.py +28 -0
  137. synth_ai/task/validators.py +12 -0
  138. synth_ai/tracing_v3/hooks.py +3 -1
  139. synth_ai/tracing_v3/session_tracer.py +123 -2
  140. synth_ai/tracing_v3/turso/manager.py +218 -0
  141. synth_ai/tracing_v3/turso/models.py +53 -0
  142. synth_ai-0.2.4.dev9.dist-info/METADATA +91 -0
  143. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/RECORD +147 -30
  144. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/entry_points.txt +1 -0
  145. synth_ai/tui/__init__.py +0 -1
  146. synth_ai/tui/__main__.py +0 -13
  147. synth_ai/tui/cli/__init__.py +0 -1
  148. synth_ai/tui/cli/query_experiments.py +0 -164
  149. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  150. synth_ai/tui/dashboard.py +0 -340
  151. synth_ai-0.2.4.dev7.dist-info/METADATA +0 -193
  152. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/WHEEL +0 -0
  153. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/licenses/LICENSE +0 -0
  154. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,222 @@
1
+ #!/usr/bin/env python3
2
+ """Analyze the latest run to understand why no achievements were unlocked."""
3
+
4
+ import duckdb
5
+ import json
6
+ from pathlib import Path
7
+ from collections import Counter, defaultdict
8
+ from datetime import datetime
9
+
10
+ def analyze_latest_run(db_path: str):
11
+ """Analyze the most recent run."""
12
+ conn = duckdb.connect(db_path, read_only=True)
13
+
14
+ print("šŸ” Analyzing latest run with 97 steps and 0 achievements...\n")
15
+
16
+ # Get the latest experiment
17
+ latest_experiment = conn.execute("""
18
+ SELECT experiment_id, experiment_name, created_at
19
+ FROM experiments
20
+ ORDER BY created_at DESC
21
+ LIMIT 1
22
+ """).fetchone()
23
+
24
+ if latest_experiment:
25
+ exp_id, exp_name, created_at = latest_experiment
26
+ print(f"šŸ“Š Latest Experiment: {exp_name}")
27
+ print(f" ID: {exp_id}")
28
+ print(f" Created: {created_at}")
29
+
30
+ # Get all sessions from latest experiment
31
+ sessions = conn.execute("""
32
+ SELECT DISTINCT s.session_id, s.num_timesteps, s.num_events
33
+ FROM session_traces s
34
+ JOIN events e ON s.session_id = e.session_id
35
+ WHERE e.timestep_id IN (
36
+ SELECT id FROM session_timesteps
37
+ WHERE experiment_id = ?
38
+ )
39
+ ORDER BY s.created_at DESC
40
+ """, [exp_id]).fetchall()
41
+
42
+ print(f"\nšŸ“Š Sessions in latest experiment: {len(sessions)}")
43
+
44
+ for session_id, num_timesteps, num_events in sessions:
45
+ print(f"\n{'='*60}")
46
+ print(f"SESSION: {session_id}")
47
+ print(f"Timesteps: {num_timesteps}, Events: {num_events}")
48
+ print(f"{'='*60}")
49
+
50
+ # Get all events for this session
51
+ events = conn.execute("""
52
+ SELECT event_type, metadata, system_state_after, reward
53
+ FROM events
54
+ WHERE session_id = ?
55
+ ORDER BY id
56
+ """, [session_id]).fetchall()
57
+
58
+ # Analyze actions taken
59
+ actions_taken = []
60
+ action_sequences = []
61
+ current_sequence = []
62
+ total_reward = 0
63
+ achievements_over_time = []
64
+
65
+ for event_type, metadata_str, state_after_str, reward in events:
66
+ if reward:
67
+ total_reward += reward
68
+
69
+ if metadata_str:
70
+ metadata = json.loads(metadata_str)
71
+
72
+ # Track runtime actions
73
+ if event_type == 'runtime' and 'action_name' in metadata:
74
+ action = metadata['action_name']
75
+ actions_taken.append(action)
76
+ current_sequence.append(action)
77
+
78
+ # Group actions into sequences (reset on certain actions)
79
+ if action in ['do', 'make_wood_pickaxe', 'place_table', 'sleep']:
80
+ if len(current_sequence) > 1:
81
+ action_sequences.append(current_sequence[:-1])
82
+ current_sequence = [action]
83
+
84
+ # Check achievements
85
+ if state_after_str:
86
+ state_after = json.loads(state_after_str)
87
+ if 'public_state' in state_after:
88
+ ps = state_after['public_state']
89
+ if 'achievements_status' in ps:
90
+ unlocked = [k for k, v in ps['achievements_status'].items() if v]
91
+ achievements_over_time.append(len(unlocked))
92
+
93
+ # Check inventory
94
+ if 'inventory' in ps and len(actions_taken) % 20 == 0: # Sample every 20 actions
95
+ inv = ps['inventory']
96
+ non_zero = {k: v for k, v in inv.items() if v > 0}
97
+ if non_zero:
98
+ print(f"\nšŸ“¦ Inventory at action {len(actions_taken)}: {non_zero}")
99
+
100
+ # Action analysis
101
+ print(f"\nšŸ“Š ACTION ANALYSIS")
102
+ print(f"Total actions taken: {len(actions_taken)}")
103
+
104
+ if actions_taken:
105
+ action_counts = Counter(actions_taken)
106
+ print(f"\nAction distribution:")
107
+ for action, count in action_counts.most_common():
108
+ percentage = (count / len(actions_taken)) * 100
109
+ print(f" {action:20} {count:4} ({percentage:5.1f}%)")
110
+
111
+ # Check for repetitive patterns
112
+ print(f"\nšŸ”„ REPETITIVE PATTERNS")
113
+ # Find consecutive repeated actions
114
+ consecutive_repeats = []
115
+ if actions_taken:
116
+ current_action = actions_taken[0]
117
+ repeat_count = 1
118
+
119
+ for action in actions_taken[1:]:
120
+ if action == current_action:
121
+ repeat_count += 1
122
+ else:
123
+ if repeat_count > 3:
124
+ consecutive_repeats.append((current_action, repeat_count))
125
+ current_action = action
126
+ repeat_count = 1
127
+
128
+ if repeat_count > 3:
129
+ consecutive_repeats.append((current_action, repeat_count))
130
+
131
+ if consecutive_repeats:
132
+ print("Found repetitive sequences:")
133
+ for action, count in consecutive_repeats[:5]:
134
+ print(f" {action} repeated {count} times consecutively")
135
+ else:
136
+ print("No significant repetitive patterns found")
137
+
138
+ # Check action sequences
139
+ if action_sequences:
140
+ print(f"\nšŸŽÆ ACTION SEQUENCES (movement -> action):")
141
+ seq_counter = Counter([' → '.join(seq) for seq in action_sequences if len(seq) <= 5])
142
+ for seq, count in seq_counter.most_common(5):
143
+ print(f" {seq}: {count} times")
144
+
145
+ # Achievement progress
146
+ print(f"\nšŸ† ACHIEVEMENT PROGRESS")
147
+ print(f"Total reward: {total_reward}")
148
+ if achievements_over_time:
149
+ max_achievements = max(achievements_over_time)
150
+ print(f"Max achievements reached: {max_achievements}")
151
+
152
+ # Find when achievements were unlocked
153
+ achievement_unlocks = []
154
+ prev_count = 0
155
+ for i, count in enumerate(achievements_over_time):
156
+ if count > prev_count:
157
+ achievement_unlocks.append((i, count))
158
+ prev_count = count
159
+
160
+ if achievement_unlocks:
161
+ print("Achievement unlock timeline:")
162
+ for step, count in achievement_unlocks:
163
+ print(f" Step {step}: {count} achievements")
164
+ else:
165
+ print("No achievement data found")
166
+
167
+ # Check final state
168
+ final_event = events[-1] if events else None
169
+ if final_event and final_event[2]: # state_after
170
+ final_state = json.loads(final_event[2])
171
+ if 'public_state' in final_state:
172
+ ps = final_state['public_state']
173
+
174
+ print(f"\nšŸ“Š FINAL STATE")
175
+
176
+ # Final inventory
177
+ if 'inventory' in ps:
178
+ inv = ps['inventory']
179
+ non_zero = {k: v for k, v in inv.items() if v > 0}
180
+ print(f"Final inventory: {non_zero if non_zero else 'Empty'}")
181
+
182
+ # Final achievements
183
+ if 'achievements_status' in ps:
184
+ unlocked = [k for k, v in ps['achievements_status'].items() if v]
185
+ print(f"Final achievements: {unlocked if unlocked else 'None'}")
186
+
187
+ # Player stats
188
+ if 'health' in ps:
189
+ print(f"Final stats: Health={ps.get('health', '?')}, Food={ps.get('food', '?')}, Energy={ps.get('energy', '?')}")
190
+
191
+ # Check for any generation/LM events
192
+ print(f"\n{'='*60}")
193
+ print("šŸ“Š LM/GENERATION EVENT CHECK")
194
+ print(f"{'='*60}")
195
+
196
+ lm_events = conn.execute("""
197
+ SELECT COUNT(*)
198
+ FROM events
199
+ WHERE session_id IN (
200
+ SELECT DISTINCT s.session_id
201
+ FROM session_traces s
202
+ JOIN events e ON s.session_id = e.session_id
203
+ WHERE e.timestep_id IN (
204
+ SELECT id FROM session_timesteps
205
+ WHERE experiment_id = ?
206
+ )
207
+ ) AND (event_type = 'lm' OR event_type = 'generation')
208
+ """, [exp_id]).fetchone()[0]
209
+
210
+ print(f"LM/Generation events in this experiment: {lm_events}")
211
+
212
+ if lm_events == 0:
213
+ print("āš ļø No LM events found - the agent may not be generating proper decisions")
214
+
215
+ conn.close()
216
+
217
+ if __name__ == "__main__":
218
+ db_path = "./traces_v2_synth/traces.duckdb"
219
+ if Path(db_path).exists():
220
+ analyze_latest_run(db_path)
221
+ else:
222
+ print(f"āŒ Database not found at {db_path}")
@@ -0,0 +1,183 @@
1
+ #!/usr/bin/env python3
2
+ """Analyze LM traces to see what the agent is actually doing."""
3
+
4
+ import duckdb
5
+ import json
6
+ from pathlib import Path
7
+ from collections import Counter, defaultdict
8
+
9
+ def analyze_lm_traces(db_path: str):
10
+ """Analyze LM traces to understand agent behavior."""
11
+ conn = duckdb.connect(db_path, read_only=True)
12
+
13
+ print("šŸ” Analyzing LM traces after fix...\n")
14
+
15
+ # First, check if we have any LM events
16
+ print("šŸ“Š CHECKING FOR LM EVENTS")
17
+ print("=" * 50)
18
+
19
+ lm_events = conn.execute("""
20
+ SELECT COUNT(*)
21
+ FROM events
22
+ WHERE event_type = 'lm' OR event_type = 'generation'
23
+ """).fetchone()[0]
24
+
25
+ print(f"LM/Generation events found: {lm_events}")
26
+
27
+ # Get all unique event types
28
+ event_types = conn.execute("""
29
+ SELECT DISTINCT event_type, COUNT(*) as count
30
+ FROM events
31
+ GROUP BY event_type
32
+ ORDER BY count DESC
33
+ """).fetchall()
34
+
35
+ print("\nAll event types:")
36
+ for event_type, count in event_types:
37
+ print(f" {event_type}: {count}")
38
+
39
+ # Analyze sessions
40
+ print("\nšŸ“Š SESSION ANALYSIS")
41
+ print("=" * 50)
42
+
43
+ sessions = conn.execute("""
44
+ SELECT DISTINCT session_id
45
+ FROM events
46
+ ORDER BY session_id
47
+ LIMIT 5
48
+ """).fetchall()
49
+
50
+ for i, (session_id,) in enumerate(sessions):
51
+ print(f"\n--- Session {i+1}: {session_id} ---")
52
+
53
+ # Get all events for this session
54
+ events = conn.execute("""
55
+ SELECT event_type, metadata, system_state_after
56
+ FROM events
57
+ WHERE session_id = ?
58
+ ORDER BY id
59
+ """, [session_id]).fetchall()
60
+
61
+ print(f"Total events: {len(events)}")
62
+
63
+ # Count event types
64
+ event_type_counts = Counter()
65
+ actions_taken = []
66
+ achievements = set()
67
+
68
+ for event_type, metadata_str, state_after_str in events:
69
+ event_type_counts[event_type] += 1
70
+
71
+ if metadata_str:
72
+ metadata = json.loads(metadata_str)
73
+
74
+ # Track actions
75
+ if 'action_name' in metadata:
76
+ actions_taken.append(metadata['action_name'])
77
+ elif 'action' in metadata:
78
+ actions_taken.append(metadata['action'])
79
+
80
+ # Check for achievements
81
+ if state_after_str:
82
+ state_after = json.loads(state_after_str)
83
+ if 'public_state' in state_after:
84
+ public_state = state_after['public_state']
85
+ if 'achievements_status' in public_state:
86
+ for ach, unlocked in public_state['achievements_status'].items():
87
+ if unlocked:
88
+ achievements.add(ach)
89
+
90
+ print(f"Event type distribution: {dict(event_type_counts)}")
91
+ print(f"Total actions: {len(actions_taken)}")
92
+ if actions_taken:
93
+ action_counts = Counter(actions_taken)
94
+ print(f"Top actions: {action_counts.most_common(5)}")
95
+ print(f"Achievements unlocked: {achievements if achievements else 'None'}")
96
+
97
+ # Look for any LM-related metadata
98
+ print("\nšŸ“Š LM METADATA ANALYSIS")
99
+ print("=" * 50)
100
+
101
+ # Check for model information in metadata
102
+ model_events = conn.execute("""
103
+ SELECT metadata
104
+ FROM events
105
+ WHERE metadata LIKE '%model%'
106
+ LIMIT 10
107
+ """).fetchall()
108
+
109
+ if model_events:
110
+ print(f"Found {len(model_events)} events with model metadata")
111
+ for i, (metadata_str,) in enumerate(model_events[:3]):
112
+ metadata = json.loads(metadata_str)
113
+ print(f"\nEvent {i+1} metadata keys: {list(metadata.keys())}")
114
+ if 'model' in metadata:
115
+ print(f" Model: {metadata['model']}")
116
+ else:
117
+ print("No events with model metadata found")
118
+
119
+ # Check messages table
120
+ print("\nšŸ“Š MESSAGES ANALYSIS")
121
+ print("=" * 50)
122
+
123
+ message_count = conn.execute("SELECT COUNT(*) FROM messages").fetchone()[0]
124
+ print(f"Total messages: {message_count}")
125
+
126
+ if message_count > 0:
127
+ messages = conn.execute("""
128
+ SELECT message_type, content
129
+ FROM messages
130
+ LIMIT 10
131
+ """).fetchall()
132
+
133
+ for i, (msg_type, content_str) in enumerate(messages[:5]):
134
+ content = json.loads(content_str)
135
+ print(f"\nMessage {i+1} ({msg_type}):")
136
+ if isinstance(content, dict):
137
+ print(f" Keys: {list(content.keys())}")
138
+ if 'payload' in content:
139
+ payload = content['payload']
140
+ if isinstance(payload, dict) and 'inventory' in payload:
141
+ # Show non-zero inventory items
142
+ inv = payload['inventory']
143
+ non_zero = {k: v for k, v in inv.items() if v > 0}
144
+ if non_zero:
145
+ print(f" Non-zero inventory: {non_zero}")
146
+
147
+ # Check for tool calls or function calls
148
+ print("\nšŸ“Š TOOL CALL ANALYSIS")
149
+ print("=" * 50)
150
+
151
+ tool_events = conn.execute("""
152
+ SELECT metadata
153
+ FROM events
154
+ WHERE metadata LIKE '%tool%' OR metadata LIKE '%function%'
155
+ LIMIT 20
156
+ """).fetchall()
157
+
158
+ if tool_events:
159
+ print(f"Found {len(tool_events)} events with tool/function mentions")
160
+ tool_names = Counter()
161
+
162
+ for (metadata_str,) in tool_events:
163
+ metadata = json.loads(metadata_str)
164
+ if 'tool_name' in metadata:
165
+ tool_names[metadata['tool_name']] += 1
166
+ elif 'function' in metadata:
167
+ tool_names[metadata['function']] += 1
168
+
169
+ if tool_names:
170
+ print("Tool usage:")
171
+ for tool, count in tool_names.most_common():
172
+ print(f" {tool}: {count}")
173
+ else:
174
+ print("No tool/function events found")
175
+
176
+ conn.close()
177
+
178
+ if __name__ == "__main__":
179
+ db_path = "./traces_v2_synth/traces.duckdb"
180
+ if Path(db_path).exists():
181
+ analyze_lm_traces(db_path)
182
+ else:
183
+ print(f"āŒ Database not found at {db_path}")
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env python3
2
+ """Analyze why there are no rewards or achievements."""
3
+
4
+ import duckdb
5
+ import json
6
+ from pathlib import Path
7
+ from collections import Counter, defaultdict
8
+
9
+ def analyze_no_rewards(db_path: str):
10
+ """Analyze why there are no rewards despite actions being taken."""
11
+ conn = duckdb.connect(db_path, read_only=True)
12
+
13
+ print("šŸ” Analyzing why there are no rewards or achievements...\n")
14
+
15
+ # Get the latest experiment
16
+ latest_exp = conn.execute("""
17
+ SELECT experiment_id, experiment_name
18
+ FROM experiments
19
+ ORDER BY created_at DESC
20
+ LIMIT 1
21
+ """).fetchone()
22
+
23
+ if not latest_exp:
24
+ print("No experiments found")
25
+ return
26
+
27
+ exp_id, exp_name = latest_exp
28
+ print(f"šŸ“Š Latest Experiment: {exp_name}")
29
+ print(f" ID: {exp_id}\n")
30
+
31
+ # Get all sessions from latest experiment
32
+ sessions = conn.execute("""
33
+ SELECT DISTINCT e.session_id
34
+ FROM events e
35
+ JOIN session_timesteps st ON e.timestep_id = st.id
36
+ WHERE st.experiment_id = ?
37
+ ORDER BY e.session_id
38
+ """, [exp_id]).fetchall()
39
+
40
+ print(f"Found {len(sessions)} sessions\n")
41
+
42
+ # Analyze each session
43
+ for i, (session_id,) in enumerate(sessions[:3]): # First 3 sessions
44
+ print(f"\n{'='*60}")
45
+ print(f"SESSION {i}: {session_id}")
46
+ print(f"{'='*60}")
47
+
48
+ # Get all events for this session
49
+ events = conn.execute("""
50
+ SELECT event_type, metadata, system_state_after, reward
51
+ FROM events
52
+ WHERE session_id = ?
53
+ ORDER BY id
54
+ """, [session_id]).fetchall()
55
+
56
+ # Track actions and their results
57
+ action_results = []
58
+ total_reward = 0
59
+ achievements_timeline = []
60
+ inventory_timeline = []
61
+
62
+ for event_type, metadata_str, state_after_str, reward in events:
63
+ if event_type == 'runtime' and metadata_str:
64
+ metadata = json.loads(metadata_str)
65
+ action_name = metadata.get('action_name', 'unknown')
66
+
67
+ # Look for the corresponding environment event
68
+ result = {
69
+ 'action': action_name,
70
+ 'reward': reward or 0,
71
+ 'achievements_unlocked': []
72
+ }
73
+
74
+ if state_after_str:
75
+ state_after = json.loads(state_after_str)
76
+ if 'public_state' in state_after:
77
+ ps = state_after['public_state']
78
+
79
+ # Check achievements
80
+ if 'achievements_status' in ps:
81
+ unlocked = [k for k, v in ps['achievements_status'].items() if v]
82
+ achievements_timeline.append((action_name, unlocked))
83
+ if len(unlocked) > len(result['achievements_unlocked']):
84
+ result['achievements_unlocked'] = unlocked
85
+
86
+ # Check inventory
87
+ if 'inventory' in ps:
88
+ inv = ps['inventory']
89
+ non_zero = {k: v for k, v in inv.items() if v > 0 and k not in ['health', 'food', 'drink', 'energy']}
90
+ if non_zero:
91
+ inventory_timeline.append((action_name, non_zero))
92
+
93
+ action_results.append(result)
94
+ if reward:
95
+ total_reward += reward
96
+
97
+ # Analyze action effectiveness
98
+ print(f"\nšŸ“Š ACTION ANALYSIS")
99
+ print(f"Total actions: {len(action_results)}")
100
+ print(f"Total reward: {total_reward}")
101
+
102
+ # Count actions by type
103
+ action_counts = Counter([r['action'] for r in action_results])
104
+ print(f"\nAction distribution:")
105
+ for action, count in action_counts.most_common(10):
106
+ print(f" {action:20} {count:3}")
107
+
108
+ # Check for successful resource collection
109
+ print(f"\nšŸ“¦ RESOURCE COLLECTION")
110
+ successful_collections = []
111
+ for i, (action, inv) in enumerate(inventory_timeline):
112
+ if i > 0 and inv != inventory_timeline[i-1][1]:
113
+ # Inventory changed
114
+ prev_inv = inventory_timeline[i-1][1] if i > 0 else {}
115
+ new_items = {k: v for k, v in inv.items() if v > prev_inv.get(k, 0)}
116
+ if new_items:
117
+ successful_collections.append((action, new_items))
118
+
119
+ if successful_collections:
120
+ print("Successful collections:")
121
+ for action, items in successful_collections[:5]:
122
+ print(f" After '{action}': gained {items}")
123
+ else:
124
+ print("No successful resource collections detected!")
125
+
126
+ # Check specific action sequences
127
+ print(f"\nšŸ” ACTION SEQUENCE ANALYSIS")
128
+ # Look for 'do' actions and their context
129
+ do_actions = []
130
+ for i, result in enumerate(action_results):
131
+ if result['action'] == 'do':
132
+ context = {
133
+ 'prev_action': action_results[i-1]['action'] if i > 0 else 'start',
134
+ 'next_action': action_results[i+1]['action'] if i < len(action_results)-1 else 'end',
135
+ 'reward': result['reward']
136
+ }
137
+ do_actions.append(context)
138
+
139
+ if do_actions:
140
+ print(f"Found {len(do_actions)} 'do' actions")
141
+ # Check what happened before 'do' actions
142
+ prev_action_counts = Counter([d['prev_action'] for d in do_actions])
143
+ print("Actions before 'do':")
144
+ for action, count in prev_action_counts.most_common(5):
145
+ print(f" {action}: {count}")
146
+
147
+ # Check for make_wood_pickaxe attempts
148
+ pickaxe_attempts = [r for r in action_results if r['action'] == 'make_wood_pickaxe']
149
+ if pickaxe_attempts:
150
+ print(f"\nšŸ”Ø PICKAXE CRAFTING")
151
+ print(f"Attempted to make wood pickaxe {len(pickaxe_attempts)} times")
152
+
153
+ # Final inventory check
154
+ if inventory_timeline:
155
+ final_inv = inventory_timeline[-1][1]
156
+ print(f"\nšŸ“¦ FINAL INVENTORY: {final_inv if final_inv else 'Empty'}")
157
+
158
+ # Achievement check
159
+ if achievements_timeline:
160
+ final_achievements = achievements_timeline[-1][1]
161
+ print(f"šŸ† FINAL ACHIEVEMENTS: {final_achievements if final_achievements else 'None'}")
162
+
163
+ # Check for any rewards across all sessions
164
+ print(f"\n\n{'='*60}")
165
+ print("šŸ“Š OVERALL REWARD ANALYSIS")
166
+ print(f"{'='*60}")
167
+
168
+ total_rewards = conn.execute("""
169
+ SELECT SUM(reward) as total, COUNT(*) as count
170
+ FROM events
171
+ WHERE reward IS NOT NULL AND reward != 0
172
+ AND session_id IN (
173
+ SELECT DISTINCT e.session_id
174
+ FROM events e
175
+ JOIN session_timesteps st ON e.timestep_id = st.id
176
+ WHERE st.experiment_id = ?
177
+ )
178
+ """, [exp_id]).fetchone()
179
+
180
+ if total_rewards:
181
+ total, count = total_rewards
182
+ print(f"Total non-zero rewards: {total or 0}")
183
+ print(f"Number of reward events: {count or 0}")
184
+
185
+ # Check if rewards are being recorded at all
186
+ print("\nšŸ” REWARD RECORDING CHECK")
187
+ sample_rewards = conn.execute("""
188
+ SELECT reward, metadata
189
+ FROM events
190
+ WHERE event_type = 'environment'
191
+ AND session_id IN (
192
+ SELECT DISTINCT e.session_id
193
+ FROM events e
194
+ JOIN session_timesteps st ON e.timestep_id = st.id
195
+ WHERE st.experiment_id = ?
196
+ )
197
+ LIMIT 20
198
+ """, [exp_id]).fetchall()
199
+
200
+ reward_values = [r for r, _ in sample_rewards if r is not None]
201
+ print(f"Sample reward values: {reward_values[:10]}")
202
+
203
+ conn.close()
204
+
205
+ if __name__ == "__main__":
206
+ db_path = "./traces_v2_synth/traces.duckdb"
207
+ if Path(db_path).exists():
208
+ analyze_no_rewards(db_path)
209
+ else:
210
+ print(f"āŒ Database not found at {db_path}")