synth-ai 0.2.4.dev7__py3-none-any.whl → 0.2.4.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (154) hide show
  1. synth_ai/__init__.py +1 -1
  2. synth_ai/cli/__init__.py +6 -0
  3. synth_ai/cli/balance.py +3 -15
  4. synth_ai/cli/demo.py +68 -9
  5. synth_ai/cli/rl_demo.py +137 -0
  6. synth_ai/cli/root.py +65 -0
  7. synth_ai/config/base_url.py +47 -0
  8. synth_ai/demos/core/__init__.py +1 -0
  9. synth_ai/demos/core/cli.py +621 -0
  10. synth_ai/demos/demo_task_apps/__init__.py +1 -0
  11. synth_ai/demos/demo_task_apps/core.py +374 -0
  12. synth_ai/demos/demo_task_apps/math/__init__.py +1 -0
  13. synth_ai/demos/demo_task_apps/math/app.py +37 -0
  14. synth_ai/demos/demo_task_apps/math/config.toml +44 -0
  15. synth_ai/demos/demo_task_apps/math/deploy_modal.py +60 -0
  16. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +22 -0
  17. synth_ai/environments/examples/bandit/__init__.py +33 -0
  18. synth_ai/environments/examples/bandit/engine.py +294 -0
  19. synth_ai/environments/examples/bandit/environment.py +194 -0
  20. synth_ai/environments/examples/bandit/taskset.py +200 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +250 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +59 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  26. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
  27. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
  28. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +724 -0
  29. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +384 -0
  30. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
  31. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
  32. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
  33. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
  34. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
  35. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
  36. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
  37. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
  38. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
  39. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
  40. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +324 -0
  41. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  42. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
  43. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
  44. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
  45. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
  46. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
  47. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
  48. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
  49. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
  50. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
  51. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
  52. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
  53. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +858 -0
  54. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +52 -0
  55. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +874 -0
  56. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  57. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +216 -0
  58. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
  59. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
  60. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
  61. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
  62. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
  63. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
  64. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
  65. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
  66. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
  67. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
  68. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
  69. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
  70. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
  71. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
  72. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
  73. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
  74. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
  75. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
  76. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
  77. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
  78. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
  79. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
  80. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
  81. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
  82. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
  83. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
  84. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
  85. synth_ai/environments/examples/crafter_classic/environment.py +41 -2
  86. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
  87. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
  88. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
  89. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
  90. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
  91. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
  92. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
  93. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
  94. synth_ai/environments/examples/enron/units/keyword_stats.py +112 -0
  95. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  96. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +48 -0
  97. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  98. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +221 -0
  99. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  100. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  101. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +831 -0
  102. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  103. synth_ai/environments/examples/red/units/__init__.py +1 -0
  104. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +899 -0
  105. synth_ai/environments/examples/sokoban/units/astar_common.py +95 -0
  106. synth_ai/environments/service/app.py +8 -0
  107. synth_ai/http.py +102 -0
  108. synth_ai/inference/__init__.py +7 -0
  109. synth_ai/inference/client.py +20 -0
  110. synth_ai/install_sqld.sh +40 -0
  111. synth_ai/jobs/client.py +246 -0
  112. synth_ai/learning/__init__.py +24 -0
  113. synth_ai/learning/client.py +149 -0
  114. synth_ai/learning/config.py +43 -0
  115. synth_ai/learning/constants.py +29 -0
  116. synth_ai/learning/ft_client.py +59 -0
  117. synth_ai/learning/health.py +43 -0
  118. synth_ai/learning/jobs.py +205 -0
  119. synth_ai/learning/rl_client.py +256 -0
  120. synth_ai/learning/sse.py +58 -0
  121. synth_ai/learning/validators.py +48 -0
  122. synth_ai/lm/core/main_v3.py +13 -0
  123. synth_ai/lm/core/synth_models.py +48 -0
  124. synth_ai/lm/core/vendor_clients.py +9 -6
  125. synth_ai/lm/vendors/core/openai_api.py +31 -3
  126. synth_ai/lm/vendors/openai_standard.py +45 -14
  127. synth_ai/lm/vendors/supported/custom_endpoint.py +12 -2
  128. synth_ai/lm/vendors/synth_client.py +372 -28
  129. synth_ai/rl/__init__.py +30 -0
  130. synth_ai/rl/contracts.py +32 -0
  131. synth_ai/rl/env_keys.py +137 -0
  132. synth_ai/rl/secrets.py +19 -0
  133. synth_ai/scripts/verify_rewards.py +100 -0
  134. synth_ai/task/__init__.py +10 -0
  135. synth_ai/task/contracts.py +120 -0
  136. synth_ai/task/health.py +28 -0
  137. synth_ai/task/validators.py +12 -0
  138. synth_ai/tracing_v3/hooks.py +3 -1
  139. synth_ai/tracing_v3/session_tracer.py +123 -2
  140. synth_ai/tracing_v3/turso/manager.py +218 -0
  141. synth_ai/tracing_v3/turso/models.py +53 -0
  142. synth_ai-0.2.4.dev9.dist-info/METADATA +91 -0
  143. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/RECORD +147 -30
  144. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/entry_points.txt +1 -0
  145. synth_ai/tui/__init__.py +0 -1
  146. synth_ai/tui/__main__.py +0 -13
  147. synth_ai/tui/cli/__init__.py +0 -1
  148. synth_ai/tui/cli/query_experiments.py +0 -164
  149. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  150. synth_ai/tui/dashboard.py +0 -340
  151. synth_ai-0.2.4.dev7.dist-info/METADATA +0 -193
  152. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/WHEEL +0 -0
  153. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/licenses/LICENSE +0 -0
  154. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Evaluate traces grouped by difficulty level.
4
+ """
5
+
6
+ import json
7
+ from pathlib import Path
8
+ from collections import defaultdict
9
+ from trace_eval import evaluate_trace, WEIGHTS
10
+
11
+ def get_trace_difficulty(trace_path: Path) -> str:
12
+ """Extract difficulty from trace metadata."""
13
+ try:
14
+ with open(trace_path, 'r') as f:
15
+ data = json.load(f)
16
+
17
+ # Try to find difficulty in metadata
18
+ metadata = data.get('metadata', {})
19
+ if 'difficulty' in metadata:
20
+ return metadata['difficulty']
21
+
22
+ # Try to find in task instance metadata
23
+ if 'task_instance' in metadata:
24
+ task_metadata = metadata['task_instance'].get('metadata', {})
25
+ if 'difficulty' in task_metadata:
26
+ return task_metadata['difficulty']
27
+
28
+ return 'unknown'
29
+ except:
30
+ return 'unknown'
31
+
32
+ def main():
33
+ traces_dir = Path("traces")
34
+ if not traces_dir.exists():
35
+ print(f"Traces directory not found: {traces_dir}")
36
+ return
37
+
38
+ # Group traces by difficulty
39
+ traces_by_difficulty = defaultdict(list)
40
+
41
+ for trace_file in traces_dir.glob("*.json"):
42
+ difficulty = get_trace_difficulty(trace_file)
43
+ result = evaluate_trace(trace_file)
44
+ traces_by_difficulty[difficulty].append(result)
45
+
46
+ # Sort difficulties
47
+ difficulty_order = ['easy', 'medium', 'hard', 'unknown']
48
+
49
+ print("=" * 80)
50
+ print("CRAFTER EVALUATION BY DIFFICULTY")
51
+ print("=" * 80)
52
+
53
+ for difficulty in difficulty_order:
54
+ traces = traces_by_difficulty[difficulty]
55
+ if not traces:
56
+ continue
57
+
58
+ print(f"\n{difficulty.upper()} ({len(traces)} traces)")
59
+ print("-" * 40)
60
+
61
+ # Calculate statistics
62
+ scores = [t['total_score'] for t in traces]
63
+ avg_score = sum(scores) / len(scores) if scores else 0
64
+ max_score = max(scores) if scores else 0
65
+ min_score = min(scores) if scores else 0
66
+
67
+ # Count achievements and invalid actions
68
+ total_easy = sum(t['counts'].get('easy_achievement', 0) for t in traces)
69
+ total_medium = sum(t['counts'].get('medium_achievement', 0) for t in traces)
70
+ total_hard = sum(t['counts'].get('hard_achievement', 0) for t in traces)
71
+ total_invalid = sum(t['counts'].get('invalid_action', 0) for t in traces)
72
+
73
+ print(f"Average Score: {avg_score:.2f}")
74
+ print(f"Score Range: {min_score:.2f} to {max_score:.2f}")
75
+ print(f"\nAchievements per trace:")
76
+ print(f" Easy: {total_easy / len(traces):.2f}")
77
+ print(f" Medium: {total_medium / len(traces):.2f}")
78
+ print(f" Hard: {total_hard / len(traces):.2f}")
79
+ print(f"\nInvalid actions per trace: {total_invalid / len(traces):.2f}")
80
+
81
+ # Show score distribution
82
+ positive_scores = [s for s in scores if s > 0]
83
+ zero_scores = [s for s in scores if s == 0]
84
+ negative_scores = [s for s in scores if s < 0]
85
+
86
+ print(f"\nScore distribution:")
87
+ print(f" Positive: {len(positive_scores)} ({len(positive_scores)/len(scores)*100:.1f}%)")
88
+ print(f" Zero: {len(zero_scores)} ({len(zero_scores)/len(scores)*100:.1f}%)")
89
+ print(f" Negative: {len(negative_scores)} ({len(negative_scores)/len(scores)*100:.1f}%)")
90
+
91
+ # Show top 3 traces
92
+ traces_sorted = sorted(traces, key=lambda x: x['total_score'], reverse=True)
93
+ print(f"\nTop 3 traces:")
94
+ for i, trace in enumerate(traces_sorted[:3], 1):
95
+ print(f" {i}. Score: {trace['total_score']:.2f}, Trajectory: {trace['trajectory'][:50]}")
96
+
97
+ # Overall summary
98
+ print("\n" + "=" * 80)
99
+ print("OVERALL SUMMARY")
100
+ print("=" * 80)
101
+
102
+ all_traces = []
103
+ for traces in traces_by_difficulty.values():
104
+ all_traces.extend(traces)
105
+
106
+ if all_traces:
107
+ all_scores = [t['total_score'] for t in all_traces]
108
+ print(f"Total traces evaluated: {len(all_traces)}")
109
+ print(f"Overall average score: {sum(all_scores) / len(all_scores):.2f}")
110
+
111
+ # Achievement type distribution
112
+ total_achievements = defaultdict(int)
113
+ for trace in all_traces:
114
+ for achievement_type in ['easy_achievement', 'medium_achievement', 'hard_achievement']:
115
+ total_achievements[achievement_type] += trace['counts'].get(achievement_type, 0)
116
+
117
+ print(f"\nTotal achievements unlocked:")
118
+ print(f" Easy: {total_achievements['easy_achievement']} (worth {total_achievements['easy_achievement'] * WEIGHTS['easy_achievement']:.1f} points)")
119
+ print(f" Medium: {total_achievements['medium_achievement']} (worth {total_achievements['medium_achievement'] * WEIGHTS['medium_achievement']:.1f} points)")
120
+ print(f" Hard: {total_achievements['hard_achievement']} (worth {total_achievements['hard_achievement'] * WEIGHTS['hard_achievement']:.1f} points)")
121
+
122
+ total_invalid = sum(t['counts'].get('invalid_action', 0) for t in all_traces)
123
+ print(f"\nTotal invalid actions: {total_invalid} (penalty: {total_invalid * WEIGHTS['invalid_action']:.1f} points)")
124
+
125
+ if __name__ == "__main__":
126
+ main()
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Example of using the trace evaluation system programmatically.
4
+ """
5
+
6
+ from pathlib import Path
7
+ from trace_eval import evaluate_trace, evaluate_all_traces, print_trace_evaluation, print_evaluation_summary
8
+
9
+ def main():
10
+ # Example 1: Evaluate a single trace
11
+ print("=" * 60)
12
+ print("Example 1: Evaluating a single trace")
13
+ print("=" * 60)
14
+
15
+ # Pick a high-scoring trace
16
+ trace_path = Path("traces/session_crafter_episode_1_f2cea96d-34b6-46a3-9991-fe74ef263462_20250724_162140.json")
17
+ if trace_path.exists():
18
+ result = evaluate_trace(trace_path)
19
+ print_trace_evaluation(result)
20
+ else:
21
+ print(f"Trace file not found: {trace_path}")
22
+
23
+ # Example 2: Evaluate all traces and show top 5
24
+ print("\n" + "=" * 60)
25
+ print("Example 2: Top 5 traces by score")
26
+ print("=" * 60)
27
+
28
+ traces_dir = Path("traces")
29
+ if traces_dir.exists():
30
+ all_results = evaluate_all_traces(traces_dir)
31
+
32
+ # Show only top 5
33
+ print(f"\nFound {len(all_results)} traces. Showing top 5:\n")
34
+ for i, result in enumerate(all_results[:5], 1):
35
+ print(f"{i}. {result['trace_file']}")
36
+ print(f" Score: {result['total_score']:.2f}")
37
+ print(f" Trajectory: {result['trajectory']}")
38
+ if result['counts']:
39
+ print(" Breakdown:")
40
+ for score_type, count in result['counts'].items():
41
+ weight = {
42
+ 'easy_achievement': 1.0,
43
+ 'medium_achievement': 2.5,
44
+ 'hard_achievement': 5.0,
45
+ 'invalid_action': -0.05
46
+ }[score_type]
47
+ print(f" {score_type}: {count} Ɨ {weight} = {count * weight:.2f}")
48
+ print()
49
+
50
+ # Example 3: Score distribution analysis
51
+ print("=" * 60)
52
+ print("Example 3: Score distribution analysis")
53
+ print("=" * 60)
54
+
55
+ if traces_dir.exists():
56
+ all_results = evaluate_all_traces(traces_dir)
57
+ scores = [r['total_score'] for r in all_results]
58
+
59
+ # Group by score ranges
60
+ score_ranges = {
61
+ "Negative (<0)": 0,
62
+ "Low (0-0.5)": 0,
63
+ "Medium (0.5-1.5)": 0,
64
+ "High (1.5-2.5)": 0,
65
+ "Very High (>2.5)": 0
66
+ }
67
+
68
+ for score in scores:
69
+ if score < 0:
70
+ score_ranges["Negative (<0)"] += 1
71
+ elif score <= 0.5:
72
+ score_ranges["Low (0-0.5)"] += 1
73
+ elif score <= 1.5:
74
+ score_ranges["Medium (0.5-1.5)"] += 1
75
+ elif score <= 2.5:
76
+ score_ranges["High (1.5-2.5)"] += 1
77
+ else:
78
+ score_ranges["Very High (>2.5)"] += 1
79
+
80
+ print(f"\nScore distribution across {len(scores)} traces:")
81
+ for range_name, count in score_ranges.items():
82
+ percentage = (count / len(scores) * 100) if scores else 0
83
+ bar = "ā–ˆ" * int(percentage / 2) # Scale to 50 chars max
84
+ print(f" {range_name:<20} {count:3d} ({percentage:5.1f}%) {bar}")
85
+
86
+ # Additional statistics
87
+ if scores:
88
+ print(f"\nStatistics:")
89
+ print(f" Mean score: {sum(scores) / len(scores):.2f}")
90
+ print(f" Median score: {sorted(scores)[len(scores)//2]:.2f}")
91
+ print(f" Std deviation: {(sum((x - sum(scores)/len(scores))**2 for x in scores) / len(scores))**0.5:.2f}")
92
+
93
+ if __name__ == "__main__":
94
+ main()
@@ -0,0 +1,142 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Explore and visualize saved Crafter states
4
+ ==========================================
5
+ """
6
+
7
+ import gzip
8
+ import pickle
9
+ import json
10
+ from pathlib import Path
11
+ from datetime import datetime
12
+ import asyncio
13
+ from uuid import uuid4
14
+
15
+ from synth_ai.environments.examples.crafter_classic.environment import CrafterClassicEnvironment
16
+ from synth_ai.environments.examples.crafter_classic.taskset import CrafterTaskInstance, CrafterTaskInstanceMetadata
17
+ from synth_ai.environments.tasks.core import Impetus, Intent
18
+
19
+ STATES_DIR = Path("synth_ai/environments/examples/crafter_classic/env_states")
20
+
21
+
22
+ def list_sessions():
23
+ """List all saved MCTS sessions."""
24
+ if not STATES_DIR.exists():
25
+ print("No env_states directory found!")
26
+ return []
27
+
28
+ sessions = []
29
+ for session_dir in sorted(STATES_DIR.iterdir()):
30
+ if session_dir.is_dir() and session_dir.name.startswith("mcts_"):
31
+ metadata_file = session_dir / "session_metadata.json"
32
+ if metadata_file.exists():
33
+ metadata = json.loads(metadata_file.read_text())
34
+ sessions.append((session_dir, metadata))
35
+
36
+ return sessions
37
+
38
+
39
+ async def explore_state_file(state_file: Path):
40
+ """Load and display info about a single state file."""
41
+ print(f"\nšŸ“„ State file: {state_file.name}")
42
+ print(f" Size: {state_file.stat().st_size / 1024:.1f} KB")
43
+
44
+ # Load the state
45
+ try:
46
+ env_blob = gzip.decompress(state_file.read_bytes())
47
+ env_snapshot = pickle.loads(env_blob)
48
+
49
+ # Create dummy task for deserialization
50
+ task = CrafterTaskInstance(
51
+ id=uuid4(),
52
+ impetus=Impetus(instructions="Analysis"),
53
+ intent=Intent(rubric={"goal": "Analysis"}, gold_trajectories=None, gold_state_diff={}),
54
+ metadata=CrafterTaskInstanceMetadata(
55
+ difficulty="easy", seed=0, num_trees_radius=0,
56
+ num_cows_radius=0, num_hostiles_radius=0
57
+ ),
58
+ is_reproducible=True,
59
+ initial_engine_snapshot=None
60
+ )
61
+
62
+ env = await CrafterClassicEnvironment._deserialize_engine(env_snapshot, task)
63
+ pub = env.engine._get_public_state_from_env()
64
+ priv = env.engine._get_private_state_from_env(0, False, False)
65
+
66
+ print(f" Position: {pub.player_position}")
67
+ print(f" Health: {priv.player_internal_stats.get('health', 0)}")
68
+ print(f" Steps: {pub.num_steps_taken}")
69
+
70
+ # Show achievements
71
+ achievements = [k for k, v in pub.achievements_status.items() if v]
72
+ if achievements:
73
+ print(f" Achievements ({len(achievements)}): {', '.join(achievements[:3])}{'...' if len(achievements) > 3 else ''}")
74
+
75
+ # Show inventory
76
+ inventory = [(k, v) for k, v in pub.inventory.items()
77
+ if v > 0 and k not in ['health', 'food', 'drink', 'energy']]
78
+ if inventory:
79
+ print(f" Inventory: {dict(inventory)}")
80
+
81
+ except Exception as e:
82
+ print(f" āŒ Error loading state: {e}")
83
+
84
+
85
+ async def main():
86
+ print("šŸ” Exploring Saved Crafter States")
87
+ print("=" * 60)
88
+
89
+ sessions = list_sessions()
90
+
91
+ if not sessions:
92
+ print("No saved sessions found!")
93
+ return
94
+
95
+ print(f"Found {len(sessions)} saved sessions:\n")
96
+
97
+ for i, (session_dir, metadata) in enumerate(sessions):
98
+ print(f"{i+1}. {session_dir.name}")
99
+ print(f" Seed: {metadata['seed']}")
100
+ print(f" Time: {metadata['timestamp']}")
101
+ print(f" States: {len(list(session_dir.glob('*.snapshot.gz')))}")
102
+
103
+ # Explore the most recent session
104
+ if sessions:
105
+ latest_session, latest_metadata = sessions[-1]
106
+ print(f"\nšŸ“‚ Exploring latest session: {latest_session.name}")
107
+ print("-" * 60)
108
+
109
+ # Get all state files
110
+ state_files = sorted(latest_session.glob("*.snapshot.gz"))
111
+ print(f"Total states saved: {len(state_files)}")
112
+
113
+ # Show the root state
114
+ root_id = latest_metadata['root_node_id']
115
+ root_file = latest_session / f"{root_id}.snapshot.gz"
116
+ if root_file.exists():
117
+ print("\n🌱 Root state:")
118
+ await explore_state_file(root_file)
119
+
120
+ # Show a few random states
121
+ import random
122
+ sample_size = min(3, len(state_files) - 1)
123
+ if sample_size > 0:
124
+ print("\nšŸŽ² Random sample states:")
125
+ for state_file in random.sample(state_files[1:], sample_size):
126
+ await explore_state_file(state_file)
127
+
128
+ # Show summary if it exists
129
+ summary_file = latest_session / "summary.txt"
130
+ if summary_file.exists():
131
+ print("\nšŸ“Š Session Summary:")
132
+ print("-" * 40)
133
+ print(summary_file.read_text())
134
+
135
+ print("\nāœ… Done exploring saved states!")
136
+ print(f"\nšŸ’” Tip: States are saved in: {STATES_DIR}")
137
+ print(" Each .snapshot.gz file contains a complete game state")
138
+ print(" You can analyze these to understand MCTS exploration patterns")
139
+
140
+
141
+ if __name__ == "__main__":
142
+ asyncio.run(main())
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ DEPRECATED: This script reads individual JSON trace files which are no longer generated.
4
+ Please use filter_traces_sft_duckdb.py instead, which reads from DuckDB.
5
+
6
+ Original functionality:
7
+ - Filter traces to create OpenAI SFT-ready .jsonl files
8
+ - Supports trajectory-level and window-based filtering
9
+
10
+ Migration guide:
11
+ 1. Run your agent with DuckDB enabled (this happens automatically now)
12
+ 2. Use filter_traces_sft_duckdb.py to filter and extract training data
13
+ 3. Example: python filter_traces_sft_duckdb.py -d crafter_traces.duckdb -o training.jsonl
14
+
15
+ This file is kept for reference only and will be removed in a future version.
16
+ """
17
+
18
+ import sys
19
+ print("=" * 80)
20
+ print("DEPRECATED: This script is no longer supported.")
21
+ print("Please use filter_traces_sft_duckdb.py instead.")
22
+ print("=" * 80)
23
+ print("\nExample usage:")
24
+ print(" python filter_traces_sft_duckdb.py -d crafter_traces.duckdb -o training.jsonl")
25
+ print("\nSee DUCKDB_FILTERING_GUIDE.md for more information.")
26
+ sys.exit(1)