synth-ai 0.2.4.dev7__py3-none-any.whl → 0.2.4.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (154) hide show
  1. synth_ai/__init__.py +1 -1
  2. synth_ai/cli/__init__.py +6 -0
  3. synth_ai/cli/balance.py +3 -15
  4. synth_ai/cli/demo.py +68 -9
  5. synth_ai/cli/rl_demo.py +137 -0
  6. synth_ai/cli/root.py +65 -0
  7. synth_ai/config/base_url.py +47 -0
  8. synth_ai/demos/core/__init__.py +1 -0
  9. synth_ai/demos/core/cli.py +621 -0
  10. synth_ai/demos/demo_task_apps/__init__.py +1 -0
  11. synth_ai/demos/demo_task_apps/core.py +374 -0
  12. synth_ai/demos/demo_task_apps/math/__init__.py +1 -0
  13. synth_ai/demos/demo_task_apps/math/app.py +37 -0
  14. synth_ai/demos/demo_task_apps/math/config.toml +44 -0
  15. synth_ai/demos/demo_task_apps/math/deploy_modal.py +60 -0
  16. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +22 -0
  17. synth_ai/environments/examples/bandit/__init__.py +33 -0
  18. synth_ai/environments/examples/bandit/engine.py +294 -0
  19. synth_ai/environments/examples/bandit/environment.py +194 -0
  20. synth_ai/environments/examples/bandit/taskset.py +200 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +250 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +59 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  26. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
  27. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
  28. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +724 -0
  29. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +384 -0
  30. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
  31. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
  32. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
  33. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
  34. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
  35. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
  36. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
  37. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
  38. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
  39. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
  40. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +324 -0
  41. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  42. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
  43. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
  44. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
  45. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
  46. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
  47. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
  48. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
  49. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
  50. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
  51. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
  52. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
  53. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +858 -0
  54. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +52 -0
  55. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +874 -0
  56. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  57. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +216 -0
  58. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
  59. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
  60. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
  61. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
  62. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
  63. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
  64. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
  65. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
  66. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
  67. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
  68. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
  69. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
  70. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
  71. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
  72. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
  73. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
  74. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
  75. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
  76. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
  77. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
  78. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
  79. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
  80. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
  81. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
  82. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
  83. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
  84. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
  85. synth_ai/environments/examples/crafter_classic/environment.py +41 -2
  86. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
  87. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
  88. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
  89. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
  90. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
  91. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
  92. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
  93. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
  94. synth_ai/environments/examples/enron/units/keyword_stats.py +112 -0
  95. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  96. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +48 -0
  97. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  98. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +221 -0
  99. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  100. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  101. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +831 -0
  102. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  103. synth_ai/environments/examples/red/units/__init__.py +1 -0
  104. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +899 -0
  105. synth_ai/environments/examples/sokoban/units/astar_common.py +95 -0
  106. synth_ai/environments/service/app.py +8 -0
  107. synth_ai/http.py +102 -0
  108. synth_ai/inference/__init__.py +7 -0
  109. synth_ai/inference/client.py +20 -0
  110. synth_ai/install_sqld.sh +40 -0
  111. synth_ai/jobs/client.py +246 -0
  112. synth_ai/learning/__init__.py +24 -0
  113. synth_ai/learning/client.py +149 -0
  114. synth_ai/learning/config.py +43 -0
  115. synth_ai/learning/constants.py +29 -0
  116. synth_ai/learning/ft_client.py +59 -0
  117. synth_ai/learning/health.py +43 -0
  118. synth_ai/learning/jobs.py +205 -0
  119. synth_ai/learning/rl_client.py +256 -0
  120. synth_ai/learning/sse.py +58 -0
  121. synth_ai/learning/validators.py +48 -0
  122. synth_ai/lm/core/main_v3.py +13 -0
  123. synth_ai/lm/core/synth_models.py +48 -0
  124. synth_ai/lm/core/vendor_clients.py +9 -6
  125. synth_ai/lm/vendors/core/openai_api.py +31 -3
  126. synth_ai/lm/vendors/openai_standard.py +45 -14
  127. synth_ai/lm/vendors/supported/custom_endpoint.py +12 -2
  128. synth_ai/lm/vendors/synth_client.py +372 -28
  129. synth_ai/rl/__init__.py +30 -0
  130. synth_ai/rl/contracts.py +32 -0
  131. synth_ai/rl/env_keys.py +137 -0
  132. synth_ai/rl/secrets.py +19 -0
  133. synth_ai/scripts/verify_rewards.py +100 -0
  134. synth_ai/task/__init__.py +10 -0
  135. synth_ai/task/contracts.py +120 -0
  136. synth_ai/task/health.py +28 -0
  137. synth_ai/task/validators.py +12 -0
  138. synth_ai/tracing_v3/hooks.py +3 -1
  139. synth_ai/tracing_v3/session_tracer.py +123 -2
  140. synth_ai/tracing_v3/turso/manager.py +218 -0
  141. synth_ai/tracing_v3/turso/models.py +53 -0
  142. synth_ai-0.2.4.dev9.dist-info/METADATA +91 -0
  143. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/RECORD +147 -30
  144. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/entry_points.txt +1 -0
  145. synth_ai/tui/__init__.py +0 -1
  146. synth_ai/tui/__main__.py +0 -13
  147. synth_ai/tui/cli/__init__.py +0 -1
  148. synth_ai/tui/cli/query_experiments.py +0 -164
  149. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  150. synth_ai/tui/dashboard.py +0 -340
  151. synth_ai-0.2.4.dev7.dist-info/METADATA +0 -193
  152. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/WHEEL +0 -0
  153. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/licenses/LICENSE +0 -0
  154. {synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,159 @@
1
+ """
2
+ Detailed analysis of why diamonds aren't spawning despite high probability.
3
+ """
4
+
5
+ import numpy as np
6
+ import opensimplex
7
+ from crafter.config import WorldGenConfig
8
+ from crafter.worldgen import _simplex
9
+
10
+ def trace_single_position(x, y, config, simplex):
11
+ """Trace through the exact logic for a single position."""
12
+ print(f"\nTracing position ({x}, {y}):")
13
+
14
+ # Terrain generation
15
+ water = _simplex(simplex, x, y, 3, {15: 1, 5: 0.15}, False) + 0.1
16
+ mountain = _simplex(simplex, x, y, 0, {15: 1, 5: 0.3})
17
+ mountain -= 0.3 * water
18
+
19
+ print(f" Water value: {water:.3f}")
20
+ print(f" Mountain value: {mountain:.3f}")
21
+ print(f" Mountain threshold: {config.mountain_threshold}")
22
+
23
+ if mountain <= config.mountain_threshold:
24
+ print(f" ❌ Not in mountain area (mountain={mountain:.3f} <= threshold={config.mountain_threshold})")
25
+ return False
26
+
27
+ print(f" ✓ In mountain area")
28
+
29
+ # Check each condition in order
30
+ # Cave check
31
+ cave1 = _simplex(simplex, x, y, 6, 7)
32
+ cave2 = _simplex(simplex, x, y, 6, 5)
33
+ print(f" Cave check 1: {cave1:.3f} > 0.15 and mountain > 0.3? {cave1 > 0.15 and mountain > 0.3}")
34
+ print(f" Cave check 2: {cave2:.3f} > {config.cave_threshold}? {cave2 > config.cave_threshold}")
35
+
36
+ if (cave1 > 0.15 and mountain > 0.3) or cave2 > config.cave_threshold:
37
+ print(f" ❌ Blocked by cave")
38
+ return False
39
+
40
+ # Tunnel checks
41
+ h_tunnel = _simplex(simplex, 2 * x, y / 5, 7, 3)
42
+ v_tunnel = _simplex(simplex, x / 5, 2 * y, 7, 3)
43
+ print(f" Horizontal tunnel: {h_tunnel:.3f} > 0.4? {h_tunnel > 0.4}")
44
+ print(f" Vertical tunnel: {v_tunnel:.3f} > 0.4? {v_tunnel > 0.4}")
45
+
46
+ if h_tunnel > 0.4 or v_tunnel > 0.4:
47
+ print(f" ❌ Blocked by tunnel")
48
+ return False
49
+
50
+ # Coal check
51
+ coal_noise = _simplex(simplex, x, y, 1, 8)
52
+ coal_random = np.random.uniform()
53
+ print(f" Coal noise: {coal_noise:.3f} > {config.coal_threshold}? {coal_noise > config.coal_threshold}")
54
+ print(f" Coal random: {coal_random:.3f} > {1 - config.coal_probability:.3f}? {coal_random > (1 - config.coal_probability)}")
55
+
56
+ if coal_noise > config.coal_threshold and coal_random > (1 - config.coal_probability):
57
+ print(f" ❌ Blocked by coal")
58
+ return False
59
+
60
+ # Iron check
61
+ iron_noise = _simplex(simplex, x, y, 2, 6)
62
+ iron_random = np.random.uniform()
63
+ print(f" Iron noise: {iron_noise:.3f} > {config.iron_threshold}? {iron_noise > config.iron_threshold}")
64
+ print(f" Iron random: {iron_random:.3f} > {1 - config.iron_probability:.3f}? {iron_random > (1 - config.iron_probability)}")
65
+
66
+ if iron_noise > config.iron_threshold and iron_random > (1 - config.iron_probability):
67
+ print(f" ❌ Blocked by iron")
68
+ return False
69
+
70
+ # Diamond check
71
+ print(f" Diamond threshold check: {mountain:.3f} > {config.diamond_threshold}? {mountain > config.diamond_threshold}")
72
+
73
+ if mountain > config.diamond_threshold:
74
+ diamond_random = np.random.uniform()
75
+ print(f" ✓ Diamond threshold met!")
76
+ print(f" Diamond random: {diamond_random:.3f} > {1 - config.diamond_probability:.3f}? {diamond_random > (1 - config.diamond_probability)}")
77
+
78
+ if diamond_random > (1 - config.diamond_probability):
79
+ print(f" ✅ DIAMOND SPAWNED!")
80
+ return True
81
+ else:
82
+ print(f" ❌ Diamond probability check failed")
83
+ else:
84
+ print(f" ❌ Mountain value too low for diamonds")
85
+
86
+ return False
87
+
88
+ def find_diamond_candidates(config, num_positions=1000, seed=42):
89
+ """Find positions that meet the diamond threshold."""
90
+ np.random.seed(seed)
91
+ simplex = opensimplex.OpenSimplex(seed=seed)
92
+
93
+ candidates = []
94
+
95
+ for i in range(num_positions):
96
+ x = np.random.randint(10, 200)
97
+ y = np.random.randint(10, 200)
98
+
99
+ water = _simplex(simplex, x, y, 3, {15: 1, 5: 0.15}, False) + 0.1
100
+ mountain = _simplex(simplex, x, y, 0, {15: 1, 5: 0.3})
101
+ mountain -= 0.3 * water
102
+
103
+ if mountain > config.diamond_threshold:
104
+ # Check if it would pass all other checks
105
+ cave1 = _simplex(simplex, x, y, 6, 7) > 0.15 and mountain > 0.3
106
+ cave2 = _simplex(simplex, x, y, 6, 5) > config.cave_threshold
107
+ h_tunnel = _simplex(simplex, 2 * x, y / 5, 7, 3) > 0.4
108
+ v_tunnel = _simplex(simplex, x / 5, 2 * y, 7, 3) > 0.4
109
+
110
+ if not (cave1 or cave2 or h_tunnel or v_tunnel):
111
+ # This position could potentially spawn a diamond
112
+ candidates.append((x, y, mountain))
113
+
114
+ return candidates
115
+
116
+ # The key issue: the probability condition
117
+ print("THE KEY ISSUE WITH DIAMOND SPAWNING:")
118
+ print("=" * 50)
119
+ print("\nIn the worldgen.py code, line 55:")
120
+ print(" elif mountain > config.diamond_threshold and uniform() > (1 - config.diamond_probability):")
121
+ print()
122
+ print("With default diamond_probability = 0.006:")
123
+ print(f" The condition uniform() > (1 - 0.006) means uniform() > 0.994")
124
+ print(f" This means we need to roll > 0.994 to spawn a diamond")
125
+ print(f" That's only a 0.6% chance!")
126
+ print()
127
+ print("The comment says 'high probability' but 0.006 is actually VERY LOW!")
128
+ print()
129
+ print("To have 'high probability', diamond_probability should be something like:")
130
+ print(" - 0.5 for 50% chance")
131
+ print(" - 0.8 for 80% chance")
132
+ print(" - 0.95 for 95% chance")
133
+ print()
134
+
135
+ # Demonstrate with actual positions
136
+ print("\nDemonstration with a few random positions:")
137
+ print("=" * 50)
138
+
139
+ config = WorldGenConfig(diamond_probability=0.006) # Default low probability
140
+ np.random.seed(42)
141
+ simplex = opensimplex.OpenSimplex(seed=42)
142
+
143
+ # Find some candidate positions
144
+ candidates = find_diamond_candidates(config, num_positions=5000)
145
+ print(f"\nFound {len(candidates)} positions that could spawn diamonds")
146
+
147
+ if candidates:
148
+ # Trace a few
149
+ for i in range(min(3, len(candidates))):
150
+ x, y, mountain = candidates[i]
151
+ trace_single_position(x, y, config, simplex)
152
+
153
+ print("\n" + "=" * 50)
154
+ print("\nNow testing with HIGH probability (0.95):")
155
+ config_high = WorldGenConfig(diamond_probability=0.95)
156
+
157
+ if candidates:
158
+ x, y, mountain = candidates[0]
159
+ trace_single_position(x, y, config_high, simplex)
@@ -0,0 +1,158 @@
1
+ """
2
+ Analyze diamond spawning in Crafter world generation.
3
+ This script helps understand why diamonds aren't appearing with high probability.
4
+ """
5
+
6
+ import numpy as np
7
+ import opensimplex
8
+ from crafter.config import WorldGenConfig
9
+ from crafter.worldgen import _simplex
10
+
11
+ def analyze_diamond_conditions(config=None, num_samples=10000, seed=42):
12
+ """Analyze how often diamond spawning conditions are met."""
13
+ if config is None:
14
+ config = WorldGenConfig()
15
+
16
+ np.random.seed(seed)
17
+ simplex = opensimplex.OpenSimplex(seed=seed)
18
+
19
+ # Track statistics
20
+ stats = {
21
+ 'total_positions': num_samples,
22
+ 'mountain_positions': 0,
23
+ 'diamond_threshold_met': 0,
24
+ 'diamonds_spawned': 0,
25
+ 'blocked_by_coal': 0,
26
+ 'blocked_by_iron': 0,
27
+ 'blocked_by_caves': 0,
28
+ 'blocked_by_tunnels': 0,
29
+ 'blocked_by_lava': 0,
30
+ 'mountain_values': [],
31
+ 'positions_checked': []
32
+ }
33
+
34
+ # Simulate world generation at random positions
35
+ for i in range(num_samples):
36
+ x = np.random.randint(10, 200) # Avoid spawn area
37
+ y = np.random.randint(10, 200)
38
+
39
+ # Simulate terrain generation (simplified - no player spawn adjustment)
40
+ water = _simplex(simplex, x, y, 3, {15: 1, 5: 0.15}, False) + 0.1
41
+ mountain = _simplex(simplex, x, y, 0, {15: 1, 5: 0.3})
42
+ mountain -= 0.3 * water # Simplified without spawn adjustment
43
+
44
+ stats['mountain_values'].append(mountain)
45
+
46
+ # Check if in mountain area
47
+ if mountain > config.mountain_threshold:
48
+ stats['mountain_positions'] += 1
49
+
50
+ # Check cave conditions
51
+ cave_check1 = _simplex(simplex, x, y, 6, 7) > 0.15 and mountain > 0.3
52
+ cave_check2 = _simplex(simplex, x, y, 6, 5) > config.cave_threshold
53
+ if cave_check1 or cave_check2:
54
+ stats['blocked_by_caves'] += 1
55
+ continue
56
+
57
+ # Check tunnel conditions
58
+ h_tunnel = _simplex(simplex, 2 * x, y / 5, 7, 3) > 0.4
59
+ v_tunnel = _simplex(simplex, x / 5, 2 * y, 7, 3) > 0.4
60
+ if h_tunnel or v_tunnel:
61
+ stats['blocked_by_tunnels'] += 1
62
+ continue
63
+
64
+ # Check coal condition
65
+ coal_noise = _simplex(simplex, x, y, 1, 8) > config.coal_threshold
66
+ coal_prob = np.random.uniform() > (1 - config.coal_probability)
67
+ if coal_noise and coal_prob:
68
+ stats['blocked_by_coal'] += 1
69
+ continue
70
+
71
+ # Check iron condition
72
+ iron_noise = _simplex(simplex, x, y, 2, 6) > config.iron_threshold
73
+ iron_prob = np.random.uniform() > (1 - config.iron_probability)
74
+ if iron_noise and iron_prob:
75
+ stats['blocked_by_iron'] += 1
76
+ continue
77
+
78
+ # Check diamond threshold
79
+ if mountain > config.diamond_threshold:
80
+ stats['diamond_threshold_met'] += 1
81
+
82
+ # Check if diamond probability passes
83
+ if np.random.uniform() > (1 - config.diamond_probability):
84
+ # Check lava condition (which comes after diamond)
85
+ lava_check = mountain > 0.3 and _simplex(simplex, x, y, 6, 5) > config.lava_threshold
86
+ if not lava_check:
87
+ stats['diamonds_spawned'] += 1
88
+ stats['positions_checked'].append((x, y, mountain))
89
+ else:
90
+ stats['blocked_by_lava'] += 1
91
+
92
+ return stats
93
+
94
+ def print_analysis(stats, config):
95
+ """Print analysis results."""
96
+ print("Diamond Spawning Analysis")
97
+ print("=" * 50)
98
+ print(f"Configuration:")
99
+ print(f" Mountain threshold: {config.mountain_threshold}")
100
+ print(f" Diamond threshold: {config.diamond_threshold}")
101
+ print(f" Diamond probability: {config.diamond_probability}")
102
+ print(f" Coal threshold: {config.coal_threshold}")
103
+ print(f" Coal probability: {config.coal_probability}")
104
+ print(f" Iron threshold: {config.iron_threshold}")
105
+ print(f" Iron probability: {config.iron_probability}")
106
+ print()
107
+
108
+ print(f"Results from {stats['total_positions']} positions:")
109
+ print(f" Mountain positions: {stats['mountain_positions']} ({100 * stats['mountain_positions'] / stats['total_positions']:.2f}%)")
110
+
111
+ if stats['mountain_positions'] > 0:
112
+ print(f" Within mountain areas:")
113
+ print(f" Blocked by caves: {stats['blocked_by_caves']} ({100 * stats['blocked_by_caves'] / stats['mountain_positions']:.2f}%)")
114
+ print(f" Blocked by tunnels: {stats['blocked_by_tunnels']} ({100 * stats['blocked_by_tunnels'] / stats['mountain_positions']:.2f}%)")
115
+ print(f" Blocked by coal: {stats['blocked_by_coal']} ({100 * stats['blocked_by_coal'] / stats['mountain_positions']:.2f}%)")
116
+ print(f" Blocked by iron: {stats['blocked_by_iron']} ({100 * stats['blocked_by_iron'] / stats['mountain_positions']:.2f}%)")
117
+ print(f" Diamond threshold met: {stats['diamond_threshold_met']} ({100 * stats['diamond_threshold_met'] / stats['mountain_positions']:.2f}%)")
118
+
119
+ if stats['diamond_threshold_met'] > 0:
120
+ print(f" Of positions meeting diamond threshold:")
121
+ print(f" Diamonds spawned: {stats['diamonds_spawned']} ({100 * stats['diamonds_spawned'] / stats['diamond_threshold_met']:.2f}%)")
122
+ print(f" Blocked by lava: {stats['blocked_by_lava']} ({100 * stats['blocked_by_lava'] / stats['diamond_threshold_met']:.2f}%)")
123
+
124
+ print(f"\n Overall diamond spawn rate: {stats['diamonds_spawned']} ({100 * stats['diamonds_spawned'] / stats['total_positions']:.4f}%)")
125
+
126
+ # Mountain value statistics
127
+ mountain_values = np.array(stats['mountain_values'])
128
+ print(f"\nMountain value statistics:")
129
+ print(f" Min: {mountain_values.min():.3f}")
130
+ print(f" Max: {mountain_values.max():.3f}")
131
+ print(f" Mean: {mountain_values.mean():.3f}")
132
+ print(f" Std: {mountain_values.std():.3f}")
133
+ print(f" Percentiles:")
134
+ for p in [50, 75, 90, 95, 99]:
135
+ print(f" {p}th: {np.percentile(mountain_values, p):.3f}")
136
+
137
+ if __name__ == "__main__":
138
+ # Test with default config
139
+ print("Testing with default configuration:")
140
+ config = WorldGenConfig()
141
+ stats = analyze_diamond_conditions(config)
142
+ print_analysis(stats, config)
143
+
144
+ print("\n" + "=" * 50 + "\n")
145
+
146
+ # Test with modified config
147
+ print("Testing with increased diamond probability (0.5):")
148
+ config_high_prob = WorldGenConfig(diamond_probability=0.5)
149
+ stats_high = analyze_diamond_conditions(config_high_prob)
150
+ print_analysis(stats_high, config_high_prob)
151
+
152
+ print("\n" + "=" * 50 + "\n")
153
+
154
+ # Test with lower diamond threshold
155
+ print("Testing with lower diamond threshold (0.16):")
156
+ config_low_threshold = WorldGenConfig(diamond_threshold=0.16)
157
+ stats_low = analyze_diamond_conditions(config_low_threshold)
158
+ print_analysis(stats_low, config_low_threshold)
@@ -0,0 +1,71 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Compare agent performance across different world configurations.
4
+ """
5
+
6
+ import subprocess
7
+ import json
8
+ from pathlib import Path
9
+ import sys
10
+
11
+ def run_evaluation(world_config, episodes=2, max_turns=20):
12
+ """Run evaluation with a specific world configuration."""
13
+ print(f"\n{'='*60}")
14
+ print(f"Testing {world_config.upper()} world configuration")
15
+ print(f"{'='*60}")
16
+
17
+ cmd = [
18
+ sys.executable, "-m",
19
+ "synth_ai.environments.examples.crafter_custom.agent_demos.test_crafter_custom_agent",
20
+ "--model", "gpt-4.1-nano",
21
+ "--world-config", world_config,
22
+ "--episodes", str(episodes),
23
+ "--max-turns", str(max_turns),
24
+ "--evaluate-traces"
25
+ ]
26
+
27
+ result = subprocess.run(cmd, capture_output=True, text=True)
28
+
29
+ # Extract key metrics from output
30
+ lines = result.stdout.split('\n')
31
+ metrics = {}
32
+
33
+ for line in lines:
34
+ if "Mean Score" in line:
35
+ metrics["mean_score"] = float(line.split()[-1])
36
+ elif "Avg Achievements/Episode" in line:
37
+ metrics["avg_achievements"] = float(line.split()[-1])
38
+ elif "Average Score:" in line and "📊" not in line:
39
+ metrics["trace_score"] = float(line.split()[-1])
40
+
41
+ return metrics
42
+
43
+ def main():
44
+ print("🎮 Crafter World Configuration Comparison")
45
+ print("=" * 60)
46
+
47
+ configs = ["peaceful", "easy", "normal", "hard"]
48
+ results = {}
49
+
50
+ for config in configs:
51
+ results[config] = run_evaluation(config, episodes=2, max_turns=15)
52
+ print(f"\nResults for {config}: {results[config]}")
53
+
54
+ # Summary comparison
55
+ print("\n" + "="*60)
56
+ print("📊 COMPARISON SUMMARY")
57
+ print("="*60)
58
+ print(f"{'Config':<10} {'Mean Score':<12} {'Avg Achievements':<18} {'Trace Score':<12}")
59
+ print("-"*60)
60
+
61
+ for config in configs:
62
+ r = results[config]
63
+ print(f"{config:<10} {r.get('mean_score', 0):<12.2f} {r.get('avg_achievements', 0):<18.2f} {r.get('trace_score', 0):<12.2f}")
64
+
65
+ print("\n💡 Analysis:")
66
+ print("- Peaceful worlds should have highest scores (no enemies)")
67
+ print("- Hard worlds should have lowest scores (many enemies, few resources)")
68
+ print("- Resource availability directly impacts achievement unlocking")
69
+
70
+ if __name__ == "__main__":
71
+ main()
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Display statistics about Crafter datasets
4
+ """
5
+
6
+ import json
7
+ from pathlib import Path
8
+ from collections import Counter
9
+ import sys
10
+
11
+
12
+ def analyze_dataset(dataset_path: Path):
13
+ """Analyze a single dataset"""
14
+ # Load metadata
15
+ with open(dataset_path / "metadata.json", "r") as f:
16
+ metadata = json.load(f)
17
+
18
+ # Load instances
19
+ with open(dataset_path / "instances.json", "r") as f:
20
+ instances = json.load(f)
21
+
22
+ print(f"\nDataset: {metadata['name']}")
23
+ print(f"Description: {metadata['description']}")
24
+ print(f"Total instances: {metadata['num_instances']}")
25
+ print(f"Train/Val/Test split: {len(instances) - len(metadata['split_info']['val_instance_ids']) - len(metadata['split_info']['test_instance_ids'])}/{len(metadata['split_info']['val_instance_ids'])}/{len(metadata['split_info']['test_instance_ids'])}")
26
+
27
+ # Analyze by difficulty
28
+ difficulties = Counter(inst['metadata']['difficulty'] for inst in instances)
29
+ print(f"\nInstances by difficulty:")
30
+ for diff, count in sorted(difficulties.items()):
31
+ print(f" {diff}: {count} ({count/len(instances)*100:.1f}%)")
32
+
33
+ # Analyze by impetus type
34
+ impetus_types = Counter()
35
+ for inst in instances:
36
+ instructions = inst['impetus']['instructions'].lower()
37
+ if 'speedrun' in instructions:
38
+ impetus_types['speedrun'] += 1
39
+ elif 'focus on' in instructions:
40
+ impetus_types['focused'] += 1
41
+ else:
42
+ impetus_types['general'] += 1
43
+
44
+ print(f"\nInstances by type:")
45
+ for type_name, count in sorted(impetus_types.items()):
46
+ print(f" {type_name}: {count} ({count/len(instances)*100:.1f}%)")
47
+
48
+ # Analyze achievement focuses
49
+ focus_counts = Counter()
50
+ speedrun_targets = Counter()
51
+
52
+ for inst in instances:
53
+ if inst['impetus'].get('achievement_focus'):
54
+ for ach in inst['impetus']['achievement_focus']:
55
+ focus_counts[ach] += 1
56
+
57
+ if 'speedrun' in inst['impetus']['instructions'].lower():
58
+ # Extract speedrun target
59
+ instructions = inst['impetus']['instructions']
60
+ if ':' in instructions:
61
+ target = instructions.split(':')[1].strip().split(' ')[0]
62
+ speedrun_targets[target] += 1
63
+
64
+ if focus_counts:
65
+ print(f"\nTop achievement focuses:")
66
+ for ach, count in focus_counts.most_common(10):
67
+ print(f" {ach}: {count}")
68
+
69
+ if speedrun_targets:
70
+ print(f"\nSpeedrun targets:")
71
+ for target, count in speedrun_targets.most_common():
72
+ print(f" {target}: {count}")
73
+
74
+ # Sample some instances
75
+ print(f"\nSample instances:")
76
+ for i, inst in enumerate(instances[:3]):
77
+ print(f"\n Instance {i+1}:")
78
+ print(f" ID: {inst['id']}")
79
+ print(f" Difficulty: {inst['metadata']['difficulty']}")
80
+ print(f" Seed: {inst['metadata']['world_seed']}")
81
+ print(f" Instructions: {inst['impetus']['instructions'][:80]}...")
82
+
83
+
84
+ def main():
85
+ dataset_dir = Path("dataset")
86
+
87
+ print("Crafter Dataset Statistics")
88
+ print("=" * 60)
89
+
90
+ # Find all datasets
91
+ datasets = [d for d in dataset_dir.iterdir() if d.is_dir() and (d / "metadata.json").exists()]
92
+
93
+ if not datasets:
94
+ print("No datasets found in dataset/")
95
+ return
96
+
97
+ print(f"Found {len(datasets)} dataset(s)")
98
+
99
+ for dataset_path in sorted(datasets):
100
+ analyze_dataset(dataset_path)
101
+ print("\n" + "-" * 60)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ main()
@@ -0,0 +1,119 @@
1
+ """
2
+ Summary of the diamond spawning issue in Crafter.
3
+ """
4
+
5
+ def explain_probability_condition():
6
+ """Explain how the probability condition works."""
7
+ print("DIAMOND SPAWNING PROBABILITY EXPLANATION")
8
+ print("=" * 60)
9
+ print()
10
+ print("The code uses this condition to spawn diamonds:")
11
+ print(" uniform() > (1 - config.diamond_probability)")
12
+ print()
13
+ print("This means:")
14
+ print(" - uniform() generates a random number between 0 and 1")
15
+ print(" - We spawn a diamond if this random number is GREATER than (1 - diamond_probability)")
16
+ print()
17
+ print("Examples:")
18
+ print("-" * 60)
19
+
20
+ probabilities = [0.006, 0.1, 0.5, 0.8, 0.95]
21
+
22
+ for prob in probabilities:
23
+ threshold = 1 - prob
24
+ actual_chance = 1 - threshold
25
+ print(f"\nWith diamond_probability = {prob}:")
26
+ print(f" Condition: uniform() > {threshold:.3f}")
27
+ print(f" Actual spawn chance: {actual_chance:.1%}")
28
+ print(f" Description: {'VERY LOW' if prob < 0.1 else 'LOW' if prob < 0.3 else 'MODERATE' if prob < 0.7 else 'HIGH' if prob < 0.9 else 'VERY HIGH'}")
29
+
30
+ def show_cascading_conditions():
31
+ """Show how conditions cascade in the world generation."""
32
+ print("\n\nCASCADING CONDITIONS IN WORLD GENERATION")
33
+ print("=" * 60)
34
+ print()
35
+ print("For a position in a mountain area, materials are checked in this order:")
36
+ print()
37
+ print("1. Cave check (two conditions)")
38
+ print(" └─ If true → place 'path'")
39
+ print()
40
+ print("2. Horizontal tunnel check")
41
+ print(" └─ If true → place 'path'")
42
+ print()
43
+ print("3. Vertical tunnel check")
44
+ print(" └─ If true → place 'path'")
45
+ print()
46
+ print("4. Coal check (noise threshold AND probability)")
47
+ print(" └─ If true → place 'coal'")
48
+ print()
49
+ print("5. Iron check (noise threshold AND probability)")
50
+ print(" └─ If true → place 'iron'")
51
+ print()
52
+ print("6. Diamond check (mountain threshold AND probability)")
53
+ print(" └─ If true → place 'diamond'")
54
+ print()
55
+ print("7. Lava check")
56
+ print(" └─ If true → place 'lava'")
57
+ print()
58
+ print("8. Default: place 'stone'")
59
+ print()
60
+ print("IMPORTANT: Once any condition is met, no further checks are done!")
61
+ print("This means diamonds can be blocked by caves, tunnels, coal, or iron.")
62
+
63
+ def calculate_effective_spawn_rate():
64
+ """Calculate the effective diamond spawn rate."""
65
+ print("\n\nEFFECTIVE DIAMOND SPAWN RATE")
66
+ print("=" * 60)
67
+ print()
68
+ print("Based on the analysis of 10,000 positions:")
69
+ print()
70
+
71
+ # Data from our analysis
72
+ total_positions = 10000
73
+ mountain_positions = 2402
74
+ blocked_caves = 431
75
+ blocked_tunnels = 445
76
+ blocked_coal = 104
77
+ blocked_iron = 41
78
+ diamond_threshold_met = 1185
79
+ diamond_probability = 0.006
80
+
81
+ print(f"Total positions: {total_positions}")
82
+ print(f"Mountain positions: {mountain_positions} ({100*mountain_positions/total_positions:.1f}%)")
83
+ print(f" Blocked by caves: {blocked_caves}")
84
+ print(f" Blocked by tunnels: {blocked_tunnels}")
85
+ print(f" Blocked by coal: {blocked_coal}")
86
+ print(f" Blocked by iron: {blocked_iron}")
87
+ print(f" Reached diamond check: {diamond_threshold_met}")
88
+ print()
89
+ print(f"Diamond spawn probability: {diamond_probability} ({diamond_probability*100:.1f}%)")
90
+ print()
91
+
92
+ expected_diamonds = diamond_threshold_met * diamond_probability
93
+ overall_rate = expected_diamonds / total_positions
94
+
95
+ print(f"Expected diamonds: {expected_diamonds:.1f}")
96
+ print(f"Overall spawn rate: {overall_rate:.4%}")
97
+ print()
98
+ print("This explains why diamonds are so rare!")
99
+
100
+ if __name__ == "__main__":
101
+ explain_probability_condition()
102
+ show_cascading_conditions()
103
+ calculate_effective_spawn_rate()
104
+
105
+ print("\n\nRECOMMENDATION")
106
+ print("=" * 60)
107
+ print()
108
+ print("To make diamonds appear with 'high probability', you should:")
109
+ print()
110
+ print("1. Increase diamond_probability to at least 0.5 (50% chance)")
111
+ print(" or even 0.8-0.9 for truly 'high' probability")
112
+ print()
113
+ print("2. Consider adjusting other thresholds to reduce blocking:")
114
+ print(" - Increase coal_threshold (currently 0.0)")
115
+ print(" - Increase iron_threshold (currently 0.4)")
116
+ print(" - Decrease coal_probability and iron_probability")
117
+ print()
118
+ print("3. Optionally lower diamond_threshold slightly (currently 0.18)")
119
+ print(" to increase the number of positions that can spawn diamonds")
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Example usage of Crafter datasets
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
9
+
10
+ from run_dataset import CrafterDatasetRunner
11
+
12
+
13
+ def main():
14
+ runner = CrafterDatasetRunner()
15
+
16
+ print("=== Example 1: Run 5 easy instances ===")
17
+ runner.run_batch(
18
+ dataset_name="crafter_balanced_v1",
19
+ num_instances=5,
20
+ difficulties=["easy"],
21
+ max_steps=500
22
+ )
23
+
24
+ print("\n\n=== Example 2: Run validation set instances ===")
25
+ runner.run_batch(
26
+ dataset_name="crafter_balanced_v1",
27
+ num_instances=5,
28
+ split="val",
29
+ max_steps=500
30
+ )
31
+
32
+ print("\n\n=== Example 3: Run speedrun challenges ===")
33
+ runner.run_batch(
34
+ dataset_name="crafter_balanced_v1",
35
+ num_instances=5,
36
+ impetus_types=["speedrun"],
37
+ max_steps=500
38
+ )
39
+
40
+ print("\n\n=== Example 4: Compare difficulties ===")
41
+ for difficulty in ["easy", "normal", "hard"]:
42
+ print(f"\n--- Testing {difficulty} ---")
43
+ runner.run_batch(
44
+ dataset_name="crafter_progression_v1",
45
+ num_instances=3,
46
+ difficulties=[difficulty],
47
+ max_steps=300
48
+ )
49
+
50
+
51
+ if __name__ == "__main__":
52
+ main()