synth-ai 0.2.4.dev8__py3-none-any.whl ā 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/__init__.py +1 -1
- synth_ai/cli/__init__.py +6 -0
- synth_ai/cli/demo.py +68 -9
- synth_ai/cli/rl_demo.py +137 -0
- synth_ai/cli/root.py +65 -0
- synth_ai/demos/core/__init__.py +1 -0
- synth_ai/demos/core/cli.py +685 -0
- synth_ai/demos/demo_task_apps/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/core.py +374 -0
- synth_ai/demos/demo_task_apps/math/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/math/app.py +37 -0
- synth_ai/demos/demo_task_apps/math/config.toml +44 -0
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +60 -0
- synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +22 -0
- synth_ai/environments/examples/bandit/__init__.py +33 -0
- synth_ai/environments/examples/bandit/engine.py +294 -0
- synth_ai/environments/examples/bandit/environment.py +194 -0
- synth_ai/environments/examples/bandit/taskset.py +200 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +250 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +59 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +724 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +384 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +324 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +858 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +52 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +874 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +216 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
- synth_ai/environments/examples/crafter_classic/environment.py +41 -2
- synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
- synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
- synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
- synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
- synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +112 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +221 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +831 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +899 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +95 -0
- synth_ai/environments/service/app.py +8 -0
- synth_ai/install_sqld.sh +40 -0
- synth_ai-0.2.5.dist-info/METADATA +106 -0
- {synth_ai-0.2.4.dev8.dist-info ā synth_ai-0.2.5.dist-info}/RECORD +111 -12
- {synth_ai-0.2.4.dev8.dist-info ā synth_ai-0.2.5.dist-info}/entry_points.txt +1 -0
- synth_ai-0.2.4.dev8.dist-info/METADATA +0 -635
- {synth_ai-0.2.4.dev8.dist-info ā synth_ai-0.2.5.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev8.dist-info ā synth_ai-0.2.5.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev8.dist-info ā synth_ai-0.2.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Analyze the latest run to understand why no achievements were unlocked."""
|
|
3
|
+
|
|
4
|
+
import duckdb
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from collections import Counter, defaultdict
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
|
|
10
|
+
def analyze_latest_run(db_path: str):
|
|
11
|
+
"""Analyze the most recent run."""
|
|
12
|
+
conn = duckdb.connect(db_path, read_only=True)
|
|
13
|
+
|
|
14
|
+
print("š Analyzing latest run with 97 steps and 0 achievements...\n")
|
|
15
|
+
|
|
16
|
+
# Get the latest experiment
|
|
17
|
+
latest_experiment = conn.execute("""
|
|
18
|
+
SELECT experiment_id, experiment_name, created_at
|
|
19
|
+
FROM experiments
|
|
20
|
+
ORDER BY created_at DESC
|
|
21
|
+
LIMIT 1
|
|
22
|
+
""").fetchone()
|
|
23
|
+
|
|
24
|
+
if latest_experiment:
|
|
25
|
+
exp_id, exp_name, created_at = latest_experiment
|
|
26
|
+
print(f"š Latest Experiment: {exp_name}")
|
|
27
|
+
print(f" ID: {exp_id}")
|
|
28
|
+
print(f" Created: {created_at}")
|
|
29
|
+
|
|
30
|
+
# Get all sessions from latest experiment
|
|
31
|
+
sessions = conn.execute("""
|
|
32
|
+
SELECT DISTINCT s.session_id, s.num_timesteps, s.num_events
|
|
33
|
+
FROM session_traces s
|
|
34
|
+
JOIN events e ON s.session_id = e.session_id
|
|
35
|
+
WHERE e.timestep_id IN (
|
|
36
|
+
SELECT id FROM session_timesteps
|
|
37
|
+
WHERE experiment_id = ?
|
|
38
|
+
)
|
|
39
|
+
ORDER BY s.created_at DESC
|
|
40
|
+
""", [exp_id]).fetchall()
|
|
41
|
+
|
|
42
|
+
print(f"\nš Sessions in latest experiment: {len(sessions)}")
|
|
43
|
+
|
|
44
|
+
for session_id, num_timesteps, num_events in sessions:
|
|
45
|
+
print(f"\n{'='*60}")
|
|
46
|
+
print(f"SESSION: {session_id}")
|
|
47
|
+
print(f"Timesteps: {num_timesteps}, Events: {num_events}")
|
|
48
|
+
print(f"{'='*60}")
|
|
49
|
+
|
|
50
|
+
# Get all events for this session
|
|
51
|
+
events = conn.execute("""
|
|
52
|
+
SELECT event_type, metadata, system_state_after, reward
|
|
53
|
+
FROM events
|
|
54
|
+
WHERE session_id = ?
|
|
55
|
+
ORDER BY id
|
|
56
|
+
""", [session_id]).fetchall()
|
|
57
|
+
|
|
58
|
+
# Analyze actions taken
|
|
59
|
+
actions_taken = []
|
|
60
|
+
action_sequences = []
|
|
61
|
+
current_sequence = []
|
|
62
|
+
total_reward = 0
|
|
63
|
+
achievements_over_time = []
|
|
64
|
+
|
|
65
|
+
for event_type, metadata_str, state_after_str, reward in events:
|
|
66
|
+
if reward:
|
|
67
|
+
total_reward += reward
|
|
68
|
+
|
|
69
|
+
if metadata_str:
|
|
70
|
+
metadata = json.loads(metadata_str)
|
|
71
|
+
|
|
72
|
+
# Track runtime actions
|
|
73
|
+
if event_type == 'runtime' and 'action_name' in metadata:
|
|
74
|
+
action = metadata['action_name']
|
|
75
|
+
actions_taken.append(action)
|
|
76
|
+
current_sequence.append(action)
|
|
77
|
+
|
|
78
|
+
# Group actions into sequences (reset on certain actions)
|
|
79
|
+
if action in ['do', 'make_wood_pickaxe', 'place_table', 'sleep']:
|
|
80
|
+
if len(current_sequence) > 1:
|
|
81
|
+
action_sequences.append(current_sequence[:-1])
|
|
82
|
+
current_sequence = [action]
|
|
83
|
+
|
|
84
|
+
# Check achievements
|
|
85
|
+
if state_after_str:
|
|
86
|
+
state_after = json.loads(state_after_str)
|
|
87
|
+
if 'public_state' in state_after:
|
|
88
|
+
ps = state_after['public_state']
|
|
89
|
+
if 'achievements_status' in ps:
|
|
90
|
+
unlocked = [k for k, v in ps['achievements_status'].items() if v]
|
|
91
|
+
achievements_over_time.append(len(unlocked))
|
|
92
|
+
|
|
93
|
+
# Check inventory
|
|
94
|
+
if 'inventory' in ps and len(actions_taken) % 20 == 0: # Sample every 20 actions
|
|
95
|
+
inv = ps['inventory']
|
|
96
|
+
non_zero = {k: v for k, v in inv.items() if v > 0}
|
|
97
|
+
if non_zero:
|
|
98
|
+
print(f"\nš¦ Inventory at action {len(actions_taken)}: {non_zero}")
|
|
99
|
+
|
|
100
|
+
# Action analysis
|
|
101
|
+
print(f"\nš ACTION ANALYSIS")
|
|
102
|
+
print(f"Total actions taken: {len(actions_taken)}")
|
|
103
|
+
|
|
104
|
+
if actions_taken:
|
|
105
|
+
action_counts = Counter(actions_taken)
|
|
106
|
+
print(f"\nAction distribution:")
|
|
107
|
+
for action, count in action_counts.most_common():
|
|
108
|
+
percentage = (count / len(actions_taken)) * 100
|
|
109
|
+
print(f" {action:20} {count:4} ({percentage:5.1f}%)")
|
|
110
|
+
|
|
111
|
+
# Check for repetitive patterns
|
|
112
|
+
print(f"\nš REPETITIVE PATTERNS")
|
|
113
|
+
# Find consecutive repeated actions
|
|
114
|
+
consecutive_repeats = []
|
|
115
|
+
if actions_taken:
|
|
116
|
+
current_action = actions_taken[0]
|
|
117
|
+
repeat_count = 1
|
|
118
|
+
|
|
119
|
+
for action in actions_taken[1:]:
|
|
120
|
+
if action == current_action:
|
|
121
|
+
repeat_count += 1
|
|
122
|
+
else:
|
|
123
|
+
if repeat_count > 3:
|
|
124
|
+
consecutive_repeats.append((current_action, repeat_count))
|
|
125
|
+
current_action = action
|
|
126
|
+
repeat_count = 1
|
|
127
|
+
|
|
128
|
+
if repeat_count > 3:
|
|
129
|
+
consecutive_repeats.append((current_action, repeat_count))
|
|
130
|
+
|
|
131
|
+
if consecutive_repeats:
|
|
132
|
+
print("Found repetitive sequences:")
|
|
133
|
+
for action, count in consecutive_repeats[:5]:
|
|
134
|
+
print(f" {action} repeated {count} times consecutively")
|
|
135
|
+
else:
|
|
136
|
+
print("No significant repetitive patterns found")
|
|
137
|
+
|
|
138
|
+
# Check action sequences
|
|
139
|
+
if action_sequences:
|
|
140
|
+
print(f"\nšÆ ACTION SEQUENCES (movement -> action):")
|
|
141
|
+
seq_counter = Counter([' ā '.join(seq) for seq in action_sequences if len(seq) <= 5])
|
|
142
|
+
for seq, count in seq_counter.most_common(5):
|
|
143
|
+
print(f" {seq}: {count} times")
|
|
144
|
+
|
|
145
|
+
# Achievement progress
|
|
146
|
+
print(f"\nš ACHIEVEMENT PROGRESS")
|
|
147
|
+
print(f"Total reward: {total_reward}")
|
|
148
|
+
if achievements_over_time:
|
|
149
|
+
max_achievements = max(achievements_over_time)
|
|
150
|
+
print(f"Max achievements reached: {max_achievements}")
|
|
151
|
+
|
|
152
|
+
# Find when achievements were unlocked
|
|
153
|
+
achievement_unlocks = []
|
|
154
|
+
prev_count = 0
|
|
155
|
+
for i, count in enumerate(achievements_over_time):
|
|
156
|
+
if count > prev_count:
|
|
157
|
+
achievement_unlocks.append((i, count))
|
|
158
|
+
prev_count = count
|
|
159
|
+
|
|
160
|
+
if achievement_unlocks:
|
|
161
|
+
print("Achievement unlock timeline:")
|
|
162
|
+
for step, count in achievement_unlocks:
|
|
163
|
+
print(f" Step {step}: {count} achievements")
|
|
164
|
+
else:
|
|
165
|
+
print("No achievement data found")
|
|
166
|
+
|
|
167
|
+
# Check final state
|
|
168
|
+
final_event = events[-1] if events else None
|
|
169
|
+
if final_event and final_event[2]: # state_after
|
|
170
|
+
final_state = json.loads(final_event[2])
|
|
171
|
+
if 'public_state' in final_state:
|
|
172
|
+
ps = final_state['public_state']
|
|
173
|
+
|
|
174
|
+
print(f"\nš FINAL STATE")
|
|
175
|
+
|
|
176
|
+
# Final inventory
|
|
177
|
+
if 'inventory' in ps:
|
|
178
|
+
inv = ps['inventory']
|
|
179
|
+
non_zero = {k: v for k, v in inv.items() if v > 0}
|
|
180
|
+
print(f"Final inventory: {non_zero if non_zero else 'Empty'}")
|
|
181
|
+
|
|
182
|
+
# Final achievements
|
|
183
|
+
if 'achievements_status' in ps:
|
|
184
|
+
unlocked = [k for k, v in ps['achievements_status'].items() if v]
|
|
185
|
+
print(f"Final achievements: {unlocked if unlocked else 'None'}")
|
|
186
|
+
|
|
187
|
+
# Player stats
|
|
188
|
+
if 'health' in ps:
|
|
189
|
+
print(f"Final stats: Health={ps.get('health', '?')}, Food={ps.get('food', '?')}, Energy={ps.get('energy', '?')}")
|
|
190
|
+
|
|
191
|
+
# Check for any generation/LM events
|
|
192
|
+
print(f"\n{'='*60}")
|
|
193
|
+
print("š LM/GENERATION EVENT CHECK")
|
|
194
|
+
print(f"{'='*60}")
|
|
195
|
+
|
|
196
|
+
lm_events = conn.execute("""
|
|
197
|
+
SELECT COUNT(*)
|
|
198
|
+
FROM events
|
|
199
|
+
WHERE session_id IN (
|
|
200
|
+
SELECT DISTINCT s.session_id
|
|
201
|
+
FROM session_traces s
|
|
202
|
+
JOIN events e ON s.session_id = e.session_id
|
|
203
|
+
WHERE e.timestep_id IN (
|
|
204
|
+
SELECT id FROM session_timesteps
|
|
205
|
+
WHERE experiment_id = ?
|
|
206
|
+
)
|
|
207
|
+
) AND (event_type = 'lm' OR event_type = 'generation')
|
|
208
|
+
""", [exp_id]).fetchone()[0]
|
|
209
|
+
|
|
210
|
+
print(f"LM/Generation events in this experiment: {lm_events}")
|
|
211
|
+
|
|
212
|
+
if lm_events == 0:
|
|
213
|
+
print("ā ļø No LM events found - the agent may not be generating proper decisions")
|
|
214
|
+
|
|
215
|
+
conn.close()
|
|
216
|
+
|
|
217
|
+
if __name__ == "__main__":
|
|
218
|
+
db_path = "./traces_v2_synth/traces.duckdb"
|
|
219
|
+
if Path(db_path).exists():
|
|
220
|
+
analyze_latest_run(db_path)
|
|
221
|
+
else:
|
|
222
|
+
print(f"ā Database not found at {db_path}")
|
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Analyze LM traces to see what the agent is actually doing."""
|
|
3
|
+
|
|
4
|
+
import duckdb
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from collections import Counter, defaultdict
|
|
8
|
+
|
|
9
|
+
def analyze_lm_traces(db_path: str):
|
|
10
|
+
"""Analyze LM traces to understand agent behavior."""
|
|
11
|
+
conn = duckdb.connect(db_path, read_only=True)
|
|
12
|
+
|
|
13
|
+
print("š Analyzing LM traces after fix...\n")
|
|
14
|
+
|
|
15
|
+
# First, check if we have any LM events
|
|
16
|
+
print("š CHECKING FOR LM EVENTS")
|
|
17
|
+
print("=" * 50)
|
|
18
|
+
|
|
19
|
+
lm_events = conn.execute("""
|
|
20
|
+
SELECT COUNT(*)
|
|
21
|
+
FROM events
|
|
22
|
+
WHERE event_type = 'lm' OR event_type = 'generation'
|
|
23
|
+
""").fetchone()[0]
|
|
24
|
+
|
|
25
|
+
print(f"LM/Generation events found: {lm_events}")
|
|
26
|
+
|
|
27
|
+
# Get all unique event types
|
|
28
|
+
event_types = conn.execute("""
|
|
29
|
+
SELECT DISTINCT event_type, COUNT(*) as count
|
|
30
|
+
FROM events
|
|
31
|
+
GROUP BY event_type
|
|
32
|
+
ORDER BY count DESC
|
|
33
|
+
""").fetchall()
|
|
34
|
+
|
|
35
|
+
print("\nAll event types:")
|
|
36
|
+
for event_type, count in event_types:
|
|
37
|
+
print(f" {event_type}: {count}")
|
|
38
|
+
|
|
39
|
+
# Analyze sessions
|
|
40
|
+
print("\nš SESSION ANALYSIS")
|
|
41
|
+
print("=" * 50)
|
|
42
|
+
|
|
43
|
+
sessions = conn.execute("""
|
|
44
|
+
SELECT DISTINCT session_id
|
|
45
|
+
FROM events
|
|
46
|
+
ORDER BY session_id
|
|
47
|
+
LIMIT 5
|
|
48
|
+
""").fetchall()
|
|
49
|
+
|
|
50
|
+
for i, (session_id,) in enumerate(sessions):
|
|
51
|
+
print(f"\n--- Session {i+1}: {session_id} ---")
|
|
52
|
+
|
|
53
|
+
# Get all events for this session
|
|
54
|
+
events = conn.execute("""
|
|
55
|
+
SELECT event_type, metadata, system_state_after
|
|
56
|
+
FROM events
|
|
57
|
+
WHERE session_id = ?
|
|
58
|
+
ORDER BY id
|
|
59
|
+
""", [session_id]).fetchall()
|
|
60
|
+
|
|
61
|
+
print(f"Total events: {len(events)}")
|
|
62
|
+
|
|
63
|
+
# Count event types
|
|
64
|
+
event_type_counts = Counter()
|
|
65
|
+
actions_taken = []
|
|
66
|
+
achievements = set()
|
|
67
|
+
|
|
68
|
+
for event_type, metadata_str, state_after_str in events:
|
|
69
|
+
event_type_counts[event_type] += 1
|
|
70
|
+
|
|
71
|
+
if metadata_str:
|
|
72
|
+
metadata = json.loads(metadata_str)
|
|
73
|
+
|
|
74
|
+
# Track actions
|
|
75
|
+
if 'action_name' in metadata:
|
|
76
|
+
actions_taken.append(metadata['action_name'])
|
|
77
|
+
elif 'action' in metadata:
|
|
78
|
+
actions_taken.append(metadata['action'])
|
|
79
|
+
|
|
80
|
+
# Check for achievements
|
|
81
|
+
if state_after_str:
|
|
82
|
+
state_after = json.loads(state_after_str)
|
|
83
|
+
if 'public_state' in state_after:
|
|
84
|
+
public_state = state_after['public_state']
|
|
85
|
+
if 'achievements_status' in public_state:
|
|
86
|
+
for ach, unlocked in public_state['achievements_status'].items():
|
|
87
|
+
if unlocked:
|
|
88
|
+
achievements.add(ach)
|
|
89
|
+
|
|
90
|
+
print(f"Event type distribution: {dict(event_type_counts)}")
|
|
91
|
+
print(f"Total actions: {len(actions_taken)}")
|
|
92
|
+
if actions_taken:
|
|
93
|
+
action_counts = Counter(actions_taken)
|
|
94
|
+
print(f"Top actions: {action_counts.most_common(5)}")
|
|
95
|
+
print(f"Achievements unlocked: {achievements if achievements else 'None'}")
|
|
96
|
+
|
|
97
|
+
# Look for any LM-related metadata
|
|
98
|
+
print("\nš LM METADATA ANALYSIS")
|
|
99
|
+
print("=" * 50)
|
|
100
|
+
|
|
101
|
+
# Check for model information in metadata
|
|
102
|
+
model_events = conn.execute("""
|
|
103
|
+
SELECT metadata
|
|
104
|
+
FROM events
|
|
105
|
+
WHERE metadata LIKE '%model%'
|
|
106
|
+
LIMIT 10
|
|
107
|
+
""").fetchall()
|
|
108
|
+
|
|
109
|
+
if model_events:
|
|
110
|
+
print(f"Found {len(model_events)} events with model metadata")
|
|
111
|
+
for i, (metadata_str,) in enumerate(model_events[:3]):
|
|
112
|
+
metadata = json.loads(metadata_str)
|
|
113
|
+
print(f"\nEvent {i+1} metadata keys: {list(metadata.keys())}")
|
|
114
|
+
if 'model' in metadata:
|
|
115
|
+
print(f" Model: {metadata['model']}")
|
|
116
|
+
else:
|
|
117
|
+
print("No events with model metadata found")
|
|
118
|
+
|
|
119
|
+
# Check messages table
|
|
120
|
+
print("\nš MESSAGES ANALYSIS")
|
|
121
|
+
print("=" * 50)
|
|
122
|
+
|
|
123
|
+
message_count = conn.execute("SELECT COUNT(*) FROM messages").fetchone()[0]
|
|
124
|
+
print(f"Total messages: {message_count}")
|
|
125
|
+
|
|
126
|
+
if message_count > 0:
|
|
127
|
+
messages = conn.execute("""
|
|
128
|
+
SELECT message_type, content
|
|
129
|
+
FROM messages
|
|
130
|
+
LIMIT 10
|
|
131
|
+
""").fetchall()
|
|
132
|
+
|
|
133
|
+
for i, (msg_type, content_str) in enumerate(messages[:5]):
|
|
134
|
+
content = json.loads(content_str)
|
|
135
|
+
print(f"\nMessage {i+1} ({msg_type}):")
|
|
136
|
+
if isinstance(content, dict):
|
|
137
|
+
print(f" Keys: {list(content.keys())}")
|
|
138
|
+
if 'payload' in content:
|
|
139
|
+
payload = content['payload']
|
|
140
|
+
if isinstance(payload, dict) and 'inventory' in payload:
|
|
141
|
+
# Show non-zero inventory items
|
|
142
|
+
inv = payload['inventory']
|
|
143
|
+
non_zero = {k: v for k, v in inv.items() if v > 0}
|
|
144
|
+
if non_zero:
|
|
145
|
+
print(f" Non-zero inventory: {non_zero}")
|
|
146
|
+
|
|
147
|
+
# Check for tool calls or function calls
|
|
148
|
+
print("\nš TOOL CALL ANALYSIS")
|
|
149
|
+
print("=" * 50)
|
|
150
|
+
|
|
151
|
+
tool_events = conn.execute("""
|
|
152
|
+
SELECT metadata
|
|
153
|
+
FROM events
|
|
154
|
+
WHERE metadata LIKE '%tool%' OR metadata LIKE '%function%'
|
|
155
|
+
LIMIT 20
|
|
156
|
+
""").fetchall()
|
|
157
|
+
|
|
158
|
+
if tool_events:
|
|
159
|
+
print(f"Found {len(tool_events)} events with tool/function mentions")
|
|
160
|
+
tool_names = Counter()
|
|
161
|
+
|
|
162
|
+
for (metadata_str,) in tool_events:
|
|
163
|
+
metadata = json.loads(metadata_str)
|
|
164
|
+
if 'tool_name' in metadata:
|
|
165
|
+
tool_names[metadata['tool_name']] += 1
|
|
166
|
+
elif 'function' in metadata:
|
|
167
|
+
tool_names[metadata['function']] += 1
|
|
168
|
+
|
|
169
|
+
if tool_names:
|
|
170
|
+
print("Tool usage:")
|
|
171
|
+
for tool, count in tool_names.most_common():
|
|
172
|
+
print(f" {tool}: {count}")
|
|
173
|
+
else:
|
|
174
|
+
print("No tool/function events found")
|
|
175
|
+
|
|
176
|
+
conn.close()
|
|
177
|
+
|
|
178
|
+
if __name__ == "__main__":
|
|
179
|
+
db_path = "./traces_v2_synth/traces.duckdb"
|
|
180
|
+
if Path(db_path).exists():
|
|
181
|
+
analyze_lm_traces(db_path)
|
|
182
|
+
else:
|
|
183
|
+
print(f"ā Database not found at {db_path}")
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Analyze why there are no rewards or achievements."""
|
|
3
|
+
|
|
4
|
+
import duckdb
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from collections import Counter, defaultdict
|
|
8
|
+
|
|
9
|
+
def analyze_no_rewards(db_path: str):
|
|
10
|
+
"""Analyze why there are no rewards despite actions being taken."""
|
|
11
|
+
conn = duckdb.connect(db_path, read_only=True)
|
|
12
|
+
|
|
13
|
+
print("š Analyzing why there are no rewards or achievements...\n")
|
|
14
|
+
|
|
15
|
+
# Get the latest experiment
|
|
16
|
+
latest_exp = conn.execute("""
|
|
17
|
+
SELECT experiment_id, experiment_name
|
|
18
|
+
FROM experiments
|
|
19
|
+
ORDER BY created_at DESC
|
|
20
|
+
LIMIT 1
|
|
21
|
+
""").fetchone()
|
|
22
|
+
|
|
23
|
+
if not latest_exp:
|
|
24
|
+
print("No experiments found")
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
exp_id, exp_name = latest_exp
|
|
28
|
+
print(f"š Latest Experiment: {exp_name}")
|
|
29
|
+
print(f" ID: {exp_id}\n")
|
|
30
|
+
|
|
31
|
+
# Get all sessions from latest experiment
|
|
32
|
+
sessions = conn.execute("""
|
|
33
|
+
SELECT DISTINCT e.session_id
|
|
34
|
+
FROM events e
|
|
35
|
+
JOIN session_timesteps st ON e.timestep_id = st.id
|
|
36
|
+
WHERE st.experiment_id = ?
|
|
37
|
+
ORDER BY e.session_id
|
|
38
|
+
""", [exp_id]).fetchall()
|
|
39
|
+
|
|
40
|
+
print(f"Found {len(sessions)} sessions\n")
|
|
41
|
+
|
|
42
|
+
# Analyze each session
|
|
43
|
+
for i, (session_id,) in enumerate(sessions[:3]): # First 3 sessions
|
|
44
|
+
print(f"\n{'='*60}")
|
|
45
|
+
print(f"SESSION {i}: {session_id}")
|
|
46
|
+
print(f"{'='*60}")
|
|
47
|
+
|
|
48
|
+
# Get all events for this session
|
|
49
|
+
events = conn.execute("""
|
|
50
|
+
SELECT event_type, metadata, system_state_after, reward
|
|
51
|
+
FROM events
|
|
52
|
+
WHERE session_id = ?
|
|
53
|
+
ORDER BY id
|
|
54
|
+
""", [session_id]).fetchall()
|
|
55
|
+
|
|
56
|
+
# Track actions and their results
|
|
57
|
+
action_results = []
|
|
58
|
+
total_reward = 0
|
|
59
|
+
achievements_timeline = []
|
|
60
|
+
inventory_timeline = []
|
|
61
|
+
|
|
62
|
+
for event_type, metadata_str, state_after_str, reward in events:
|
|
63
|
+
if event_type == 'runtime' and metadata_str:
|
|
64
|
+
metadata = json.loads(metadata_str)
|
|
65
|
+
action_name = metadata.get('action_name', 'unknown')
|
|
66
|
+
|
|
67
|
+
# Look for the corresponding environment event
|
|
68
|
+
result = {
|
|
69
|
+
'action': action_name,
|
|
70
|
+
'reward': reward or 0,
|
|
71
|
+
'achievements_unlocked': []
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if state_after_str:
|
|
75
|
+
state_after = json.loads(state_after_str)
|
|
76
|
+
if 'public_state' in state_after:
|
|
77
|
+
ps = state_after['public_state']
|
|
78
|
+
|
|
79
|
+
# Check achievements
|
|
80
|
+
if 'achievements_status' in ps:
|
|
81
|
+
unlocked = [k for k, v in ps['achievements_status'].items() if v]
|
|
82
|
+
achievements_timeline.append((action_name, unlocked))
|
|
83
|
+
if len(unlocked) > len(result['achievements_unlocked']):
|
|
84
|
+
result['achievements_unlocked'] = unlocked
|
|
85
|
+
|
|
86
|
+
# Check inventory
|
|
87
|
+
if 'inventory' in ps:
|
|
88
|
+
inv = ps['inventory']
|
|
89
|
+
non_zero = {k: v for k, v in inv.items() if v > 0 and k not in ['health', 'food', 'drink', 'energy']}
|
|
90
|
+
if non_zero:
|
|
91
|
+
inventory_timeline.append((action_name, non_zero))
|
|
92
|
+
|
|
93
|
+
action_results.append(result)
|
|
94
|
+
if reward:
|
|
95
|
+
total_reward += reward
|
|
96
|
+
|
|
97
|
+
# Analyze action effectiveness
|
|
98
|
+
print(f"\nš ACTION ANALYSIS")
|
|
99
|
+
print(f"Total actions: {len(action_results)}")
|
|
100
|
+
print(f"Total reward: {total_reward}")
|
|
101
|
+
|
|
102
|
+
# Count actions by type
|
|
103
|
+
action_counts = Counter([r['action'] for r in action_results])
|
|
104
|
+
print(f"\nAction distribution:")
|
|
105
|
+
for action, count in action_counts.most_common(10):
|
|
106
|
+
print(f" {action:20} {count:3}")
|
|
107
|
+
|
|
108
|
+
# Check for successful resource collection
|
|
109
|
+
print(f"\nš¦ RESOURCE COLLECTION")
|
|
110
|
+
successful_collections = []
|
|
111
|
+
for i, (action, inv) in enumerate(inventory_timeline):
|
|
112
|
+
if i > 0 and inv != inventory_timeline[i-1][1]:
|
|
113
|
+
# Inventory changed
|
|
114
|
+
prev_inv = inventory_timeline[i-1][1] if i > 0 else {}
|
|
115
|
+
new_items = {k: v for k, v in inv.items() if v > prev_inv.get(k, 0)}
|
|
116
|
+
if new_items:
|
|
117
|
+
successful_collections.append((action, new_items))
|
|
118
|
+
|
|
119
|
+
if successful_collections:
|
|
120
|
+
print("Successful collections:")
|
|
121
|
+
for action, items in successful_collections[:5]:
|
|
122
|
+
print(f" After '{action}': gained {items}")
|
|
123
|
+
else:
|
|
124
|
+
print("No successful resource collections detected!")
|
|
125
|
+
|
|
126
|
+
# Check specific action sequences
|
|
127
|
+
print(f"\nš ACTION SEQUENCE ANALYSIS")
|
|
128
|
+
# Look for 'do' actions and their context
|
|
129
|
+
do_actions = []
|
|
130
|
+
for i, result in enumerate(action_results):
|
|
131
|
+
if result['action'] == 'do':
|
|
132
|
+
context = {
|
|
133
|
+
'prev_action': action_results[i-1]['action'] if i > 0 else 'start',
|
|
134
|
+
'next_action': action_results[i+1]['action'] if i < len(action_results)-1 else 'end',
|
|
135
|
+
'reward': result['reward']
|
|
136
|
+
}
|
|
137
|
+
do_actions.append(context)
|
|
138
|
+
|
|
139
|
+
if do_actions:
|
|
140
|
+
print(f"Found {len(do_actions)} 'do' actions")
|
|
141
|
+
# Check what happened before 'do' actions
|
|
142
|
+
prev_action_counts = Counter([d['prev_action'] for d in do_actions])
|
|
143
|
+
print("Actions before 'do':")
|
|
144
|
+
for action, count in prev_action_counts.most_common(5):
|
|
145
|
+
print(f" {action}: {count}")
|
|
146
|
+
|
|
147
|
+
# Check for make_wood_pickaxe attempts
|
|
148
|
+
pickaxe_attempts = [r for r in action_results if r['action'] == 'make_wood_pickaxe']
|
|
149
|
+
if pickaxe_attempts:
|
|
150
|
+
print(f"\nšØ PICKAXE CRAFTING")
|
|
151
|
+
print(f"Attempted to make wood pickaxe {len(pickaxe_attempts)} times")
|
|
152
|
+
|
|
153
|
+
# Final inventory check
|
|
154
|
+
if inventory_timeline:
|
|
155
|
+
final_inv = inventory_timeline[-1][1]
|
|
156
|
+
print(f"\nš¦ FINAL INVENTORY: {final_inv if final_inv else 'Empty'}")
|
|
157
|
+
|
|
158
|
+
# Achievement check
|
|
159
|
+
if achievements_timeline:
|
|
160
|
+
final_achievements = achievements_timeline[-1][1]
|
|
161
|
+
print(f"š FINAL ACHIEVEMENTS: {final_achievements if final_achievements else 'None'}")
|
|
162
|
+
|
|
163
|
+
# Check for any rewards across all sessions
|
|
164
|
+
print(f"\n\n{'='*60}")
|
|
165
|
+
print("š OVERALL REWARD ANALYSIS")
|
|
166
|
+
print(f"{'='*60}")
|
|
167
|
+
|
|
168
|
+
total_rewards = conn.execute("""
|
|
169
|
+
SELECT SUM(reward) as total, COUNT(*) as count
|
|
170
|
+
FROM events
|
|
171
|
+
WHERE reward IS NOT NULL AND reward != 0
|
|
172
|
+
AND session_id IN (
|
|
173
|
+
SELECT DISTINCT e.session_id
|
|
174
|
+
FROM events e
|
|
175
|
+
JOIN session_timesteps st ON e.timestep_id = st.id
|
|
176
|
+
WHERE st.experiment_id = ?
|
|
177
|
+
)
|
|
178
|
+
""", [exp_id]).fetchone()
|
|
179
|
+
|
|
180
|
+
if total_rewards:
|
|
181
|
+
total, count = total_rewards
|
|
182
|
+
print(f"Total non-zero rewards: {total or 0}")
|
|
183
|
+
print(f"Number of reward events: {count or 0}")
|
|
184
|
+
|
|
185
|
+
# Check if rewards are being recorded at all
|
|
186
|
+
print("\nš REWARD RECORDING CHECK")
|
|
187
|
+
sample_rewards = conn.execute("""
|
|
188
|
+
SELECT reward, metadata
|
|
189
|
+
FROM events
|
|
190
|
+
WHERE event_type = 'environment'
|
|
191
|
+
AND session_id IN (
|
|
192
|
+
SELECT DISTINCT e.session_id
|
|
193
|
+
FROM events e
|
|
194
|
+
JOIN session_timesteps st ON e.timestep_id = st.id
|
|
195
|
+
WHERE st.experiment_id = ?
|
|
196
|
+
)
|
|
197
|
+
LIMIT 20
|
|
198
|
+
""", [exp_id]).fetchall()
|
|
199
|
+
|
|
200
|
+
reward_values = [r for r, _ in sample_rewards if r is not None]
|
|
201
|
+
print(f"Sample reward values: {reward_values[:10]}")
|
|
202
|
+
|
|
203
|
+
conn.close()
|
|
204
|
+
|
|
205
|
+
if __name__ == "__main__":
|
|
206
|
+
db_path = "./traces_v2_synth/traces.duckdb"
|
|
207
|
+
if Path(db_path).exists():
|
|
208
|
+
analyze_no_rewards(db_path)
|
|
209
|
+
else:
|
|
210
|
+
print(f"ā Database not found at {db_path}")
|