synth-ai 0.2.4.dev8__py3-none-any.whl → 0.2.4.dev9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/cli/__init__.py +6 -0
- synth_ai/cli/demo.py +68 -9
- synth_ai/cli/rl_demo.py +137 -0
- synth_ai/cli/root.py +65 -0
- synth_ai/demos/core/__init__.py +1 -0
- synth_ai/demos/core/cli.py +621 -0
- synth_ai/demos/demo_task_apps/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/core.py +374 -0
- synth_ai/demos/demo_task_apps/math/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/math/app.py +37 -0
- synth_ai/demos/demo_task_apps/math/config.toml +44 -0
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +60 -0
- synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +22 -0
- synth_ai/environments/examples/bandit/__init__.py +33 -0
- synth_ai/environments/examples/bandit/engine.py +294 -0
- synth_ai/environments/examples/bandit/environment.py +194 -0
- synth_ai/environments/examples/bandit/taskset.py +200 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +250 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +59 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +724 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +384 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +324 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +858 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +52 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +874 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +216 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
- synth_ai/environments/examples/crafter_classic/environment.py +41 -2
- synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
- synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
- synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
- synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
- synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +112 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +221 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +831 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +899 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +95 -0
- synth_ai/environments/service/app.py +8 -0
- synth_ai/install_sqld.sh +40 -0
- synth_ai-0.2.4.dev9.dist-info/METADATA +91 -0
- {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/RECORD +110 -11
- {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/entry_points.txt +1 -0
- synth_ai-0.2.4.dev8.dist-info/METADATA +0 -635
- {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Detailed analysis of why diamonds aren't spawning despite high probability.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import opensimplex
|
|
7
|
+
from crafter.config import WorldGenConfig
|
|
8
|
+
from crafter.worldgen import _simplex
|
|
9
|
+
|
|
10
|
+
def trace_single_position(x, y, config, simplex):
|
|
11
|
+
"""Trace through the exact logic for a single position."""
|
|
12
|
+
print(f"\nTracing position ({x}, {y}):")
|
|
13
|
+
|
|
14
|
+
# Terrain generation
|
|
15
|
+
water = _simplex(simplex, x, y, 3, {15: 1, 5: 0.15}, False) + 0.1
|
|
16
|
+
mountain = _simplex(simplex, x, y, 0, {15: 1, 5: 0.3})
|
|
17
|
+
mountain -= 0.3 * water
|
|
18
|
+
|
|
19
|
+
print(f" Water value: {water:.3f}")
|
|
20
|
+
print(f" Mountain value: {mountain:.3f}")
|
|
21
|
+
print(f" Mountain threshold: {config.mountain_threshold}")
|
|
22
|
+
|
|
23
|
+
if mountain <= config.mountain_threshold:
|
|
24
|
+
print(f" ❌ Not in mountain area (mountain={mountain:.3f} <= threshold={config.mountain_threshold})")
|
|
25
|
+
return False
|
|
26
|
+
|
|
27
|
+
print(f" ✓ In mountain area")
|
|
28
|
+
|
|
29
|
+
# Check each condition in order
|
|
30
|
+
# Cave check
|
|
31
|
+
cave1 = _simplex(simplex, x, y, 6, 7)
|
|
32
|
+
cave2 = _simplex(simplex, x, y, 6, 5)
|
|
33
|
+
print(f" Cave check 1: {cave1:.3f} > 0.15 and mountain > 0.3? {cave1 > 0.15 and mountain > 0.3}")
|
|
34
|
+
print(f" Cave check 2: {cave2:.3f} > {config.cave_threshold}? {cave2 > config.cave_threshold}")
|
|
35
|
+
|
|
36
|
+
if (cave1 > 0.15 and mountain > 0.3) or cave2 > config.cave_threshold:
|
|
37
|
+
print(f" ❌ Blocked by cave")
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
# Tunnel checks
|
|
41
|
+
h_tunnel = _simplex(simplex, 2 * x, y / 5, 7, 3)
|
|
42
|
+
v_tunnel = _simplex(simplex, x / 5, 2 * y, 7, 3)
|
|
43
|
+
print(f" Horizontal tunnel: {h_tunnel:.3f} > 0.4? {h_tunnel > 0.4}")
|
|
44
|
+
print(f" Vertical tunnel: {v_tunnel:.3f} > 0.4? {v_tunnel > 0.4}")
|
|
45
|
+
|
|
46
|
+
if h_tunnel > 0.4 or v_tunnel > 0.4:
|
|
47
|
+
print(f" ❌ Blocked by tunnel")
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
# Coal check
|
|
51
|
+
coal_noise = _simplex(simplex, x, y, 1, 8)
|
|
52
|
+
coal_random = np.random.uniform()
|
|
53
|
+
print(f" Coal noise: {coal_noise:.3f} > {config.coal_threshold}? {coal_noise > config.coal_threshold}")
|
|
54
|
+
print(f" Coal random: {coal_random:.3f} > {1 - config.coal_probability:.3f}? {coal_random > (1 - config.coal_probability)}")
|
|
55
|
+
|
|
56
|
+
if coal_noise > config.coal_threshold and coal_random > (1 - config.coal_probability):
|
|
57
|
+
print(f" ❌ Blocked by coal")
|
|
58
|
+
return False
|
|
59
|
+
|
|
60
|
+
# Iron check
|
|
61
|
+
iron_noise = _simplex(simplex, x, y, 2, 6)
|
|
62
|
+
iron_random = np.random.uniform()
|
|
63
|
+
print(f" Iron noise: {iron_noise:.3f} > {config.iron_threshold}? {iron_noise > config.iron_threshold}")
|
|
64
|
+
print(f" Iron random: {iron_random:.3f} > {1 - config.iron_probability:.3f}? {iron_random > (1 - config.iron_probability)}")
|
|
65
|
+
|
|
66
|
+
if iron_noise > config.iron_threshold and iron_random > (1 - config.iron_probability):
|
|
67
|
+
print(f" ❌ Blocked by iron")
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
# Diamond check
|
|
71
|
+
print(f" Diamond threshold check: {mountain:.3f} > {config.diamond_threshold}? {mountain > config.diamond_threshold}")
|
|
72
|
+
|
|
73
|
+
if mountain > config.diamond_threshold:
|
|
74
|
+
diamond_random = np.random.uniform()
|
|
75
|
+
print(f" ✓ Diamond threshold met!")
|
|
76
|
+
print(f" Diamond random: {diamond_random:.3f} > {1 - config.diamond_probability:.3f}? {diamond_random > (1 - config.diamond_probability)}")
|
|
77
|
+
|
|
78
|
+
if diamond_random > (1 - config.diamond_probability):
|
|
79
|
+
print(f" ✅ DIAMOND SPAWNED!")
|
|
80
|
+
return True
|
|
81
|
+
else:
|
|
82
|
+
print(f" ❌ Diamond probability check failed")
|
|
83
|
+
else:
|
|
84
|
+
print(f" ❌ Mountain value too low for diamonds")
|
|
85
|
+
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
def find_diamond_candidates(config, num_positions=1000, seed=42):
|
|
89
|
+
"""Find positions that meet the diamond threshold."""
|
|
90
|
+
np.random.seed(seed)
|
|
91
|
+
simplex = opensimplex.OpenSimplex(seed=seed)
|
|
92
|
+
|
|
93
|
+
candidates = []
|
|
94
|
+
|
|
95
|
+
for i in range(num_positions):
|
|
96
|
+
x = np.random.randint(10, 200)
|
|
97
|
+
y = np.random.randint(10, 200)
|
|
98
|
+
|
|
99
|
+
water = _simplex(simplex, x, y, 3, {15: 1, 5: 0.15}, False) + 0.1
|
|
100
|
+
mountain = _simplex(simplex, x, y, 0, {15: 1, 5: 0.3})
|
|
101
|
+
mountain -= 0.3 * water
|
|
102
|
+
|
|
103
|
+
if mountain > config.diamond_threshold:
|
|
104
|
+
# Check if it would pass all other checks
|
|
105
|
+
cave1 = _simplex(simplex, x, y, 6, 7) > 0.15 and mountain > 0.3
|
|
106
|
+
cave2 = _simplex(simplex, x, y, 6, 5) > config.cave_threshold
|
|
107
|
+
h_tunnel = _simplex(simplex, 2 * x, y / 5, 7, 3) > 0.4
|
|
108
|
+
v_tunnel = _simplex(simplex, x / 5, 2 * y, 7, 3) > 0.4
|
|
109
|
+
|
|
110
|
+
if not (cave1 or cave2 or h_tunnel or v_tunnel):
|
|
111
|
+
# This position could potentially spawn a diamond
|
|
112
|
+
candidates.append((x, y, mountain))
|
|
113
|
+
|
|
114
|
+
return candidates
|
|
115
|
+
|
|
116
|
+
# The key issue: the probability condition
|
|
117
|
+
print("THE KEY ISSUE WITH DIAMOND SPAWNING:")
|
|
118
|
+
print("=" * 50)
|
|
119
|
+
print("\nIn the worldgen.py code, line 55:")
|
|
120
|
+
print(" elif mountain > config.diamond_threshold and uniform() > (1 - config.diamond_probability):")
|
|
121
|
+
print()
|
|
122
|
+
print("With default diamond_probability = 0.006:")
|
|
123
|
+
print(f" The condition uniform() > (1 - 0.006) means uniform() > 0.994")
|
|
124
|
+
print(f" This means we need to roll > 0.994 to spawn a diamond")
|
|
125
|
+
print(f" That's only a 0.6% chance!")
|
|
126
|
+
print()
|
|
127
|
+
print("The comment says 'high probability' but 0.006 is actually VERY LOW!")
|
|
128
|
+
print()
|
|
129
|
+
print("To have 'high probability', diamond_probability should be something like:")
|
|
130
|
+
print(" - 0.5 for 50% chance")
|
|
131
|
+
print(" - 0.8 for 80% chance")
|
|
132
|
+
print(" - 0.95 for 95% chance")
|
|
133
|
+
print()
|
|
134
|
+
|
|
135
|
+
# Demonstrate with actual positions
|
|
136
|
+
print("\nDemonstration with a few random positions:")
|
|
137
|
+
print("=" * 50)
|
|
138
|
+
|
|
139
|
+
config = WorldGenConfig(diamond_probability=0.006) # Default low probability
|
|
140
|
+
np.random.seed(42)
|
|
141
|
+
simplex = opensimplex.OpenSimplex(seed=42)
|
|
142
|
+
|
|
143
|
+
# Find some candidate positions
|
|
144
|
+
candidates = find_diamond_candidates(config, num_positions=5000)
|
|
145
|
+
print(f"\nFound {len(candidates)} positions that could spawn diamonds")
|
|
146
|
+
|
|
147
|
+
if candidates:
|
|
148
|
+
# Trace a few
|
|
149
|
+
for i in range(min(3, len(candidates))):
|
|
150
|
+
x, y, mountain = candidates[i]
|
|
151
|
+
trace_single_position(x, y, config, simplex)
|
|
152
|
+
|
|
153
|
+
print("\n" + "=" * 50)
|
|
154
|
+
print("\nNow testing with HIGH probability (0.95):")
|
|
155
|
+
config_high = WorldGenConfig(diamond_probability=0.95)
|
|
156
|
+
|
|
157
|
+
if candidates:
|
|
158
|
+
x, y, mountain = candidates[0]
|
|
159
|
+
trace_single_position(x, y, config_high, simplex)
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Analyze diamond spawning in Crafter world generation.
|
|
3
|
+
This script helps understand why diamonds aren't appearing with high probability.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import opensimplex
|
|
8
|
+
from crafter.config import WorldGenConfig
|
|
9
|
+
from crafter.worldgen import _simplex
|
|
10
|
+
|
|
11
|
+
def analyze_diamond_conditions(config=None, num_samples=10000, seed=42):
|
|
12
|
+
"""Analyze how often diamond spawning conditions are met."""
|
|
13
|
+
if config is None:
|
|
14
|
+
config = WorldGenConfig()
|
|
15
|
+
|
|
16
|
+
np.random.seed(seed)
|
|
17
|
+
simplex = opensimplex.OpenSimplex(seed=seed)
|
|
18
|
+
|
|
19
|
+
# Track statistics
|
|
20
|
+
stats = {
|
|
21
|
+
'total_positions': num_samples,
|
|
22
|
+
'mountain_positions': 0,
|
|
23
|
+
'diamond_threshold_met': 0,
|
|
24
|
+
'diamonds_spawned': 0,
|
|
25
|
+
'blocked_by_coal': 0,
|
|
26
|
+
'blocked_by_iron': 0,
|
|
27
|
+
'blocked_by_caves': 0,
|
|
28
|
+
'blocked_by_tunnels': 0,
|
|
29
|
+
'blocked_by_lava': 0,
|
|
30
|
+
'mountain_values': [],
|
|
31
|
+
'positions_checked': []
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# Simulate world generation at random positions
|
|
35
|
+
for i in range(num_samples):
|
|
36
|
+
x = np.random.randint(10, 200) # Avoid spawn area
|
|
37
|
+
y = np.random.randint(10, 200)
|
|
38
|
+
|
|
39
|
+
# Simulate terrain generation (simplified - no player spawn adjustment)
|
|
40
|
+
water = _simplex(simplex, x, y, 3, {15: 1, 5: 0.15}, False) + 0.1
|
|
41
|
+
mountain = _simplex(simplex, x, y, 0, {15: 1, 5: 0.3})
|
|
42
|
+
mountain -= 0.3 * water # Simplified without spawn adjustment
|
|
43
|
+
|
|
44
|
+
stats['mountain_values'].append(mountain)
|
|
45
|
+
|
|
46
|
+
# Check if in mountain area
|
|
47
|
+
if mountain > config.mountain_threshold:
|
|
48
|
+
stats['mountain_positions'] += 1
|
|
49
|
+
|
|
50
|
+
# Check cave conditions
|
|
51
|
+
cave_check1 = _simplex(simplex, x, y, 6, 7) > 0.15 and mountain > 0.3
|
|
52
|
+
cave_check2 = _simplex(simplex, x, y, 6, 5) > config.cave_threshold
|
|
53
|
+
if cave_check1 or cave_check2:
|
|
54
|
+
stats['blocked_by_caves'] += 1
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
# Check tunnel conditions
|
|
58
|
+
h_tunnel = _simplex(simplex, 2 * x, y / 5, 7, 3) > 0.4
|
|
59
|
+
v_tunnel = _simplex(simplex, x / 5, 2 * y, 7, 3) > 0.4
|
|
60
|
+
if h_tunnel or v_tunnel:
|
|
61
|
+
stats['blocked_by_tunnels'] += 1
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
# Check coal condition
|
|
65
|
+
coal_noise = _simplex(simplex, x, y, 1, 8) > config.coal_threshold
|
|
66
|
+
coal_prob = np.random.uniform() > (1 - config.coal_probability)
|
|
67
|
+
if coal_noise and coal_prob:
|
|
68
|
+
stats['blocked_by_coal'] += 1
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
# Check iron condition
|
|
72
|
+
iron_noise = _simplex(simplex, x, y, 2, 6) > config.iron_threshold
|
|
73
|
+
iron_prob = np.random.uniform() > (1 - config.iron_probability)
|
|
74
|
+
if iron_noise and iron_prob:
|
|
75
|
+
stats['blocked_by_iron'] += 1
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
# Check diamond threshold
|
|
79
|
+
if mountain > config.diamond_threshold:
|
|
80
|
+
stats['diamond_threshold_met'] += 1
|
|
81
|
+
|
|
82
|
+
# Check if diamond probability passes
|
|
83
|
+
if np.random.uniform() > (1 - config.diamond_probability):
|
|
84
|
+
# Check lava condition (which comes after diamond)
|
|
85
|
+
lava_check = mountain > 0.3 and _simplex(simplex, x, y, 6, 5) > config.lava_threshold
|
|
86
|
+
if not lava_check:
|
|
87
|
+
stats['diamonds_spawned'] += 1
|
|
88
|
+
stats['positions_checked'].append((x, y, mountain))
|
|
89
|
+
else:
|
|
90
|
+
stats['blocked_by_lava'] += 1
|
|
91
|
+
|
|
92
|
+
return stats
|
|
93
|
+
|
|
94
|
+
def print_analysis(stats, config):
|
|
95
|
+
"""Print analysis results."""
|
|
96
|
+
print("Diamond Spawning Analysis")
|
|
97
|
+
print("=" * 50)
|
|
98
|
+
print(f"Configuration:")
|
|
99
|
+
print(f" Mountain threshold: {config.mountain_threshold}")
|
|
100
|
+
print(f" Diamond threshold: {config.diamond_threshold}")
|
|
101
|
+
print(f" Diamond probability: {config.diamond_probability}")
|
|
102
|
+
print(f" Coal threshold: {config.coal_threshold}")
|
|
103
|
+
print(f" Coal probability: {config.coal_probability}")
|
|
104
|
+
print(f" Iron threshold: {config.iron_threshold}")
|
|
105
|
+
print(f" Iron probability: {config.iron_probability}")
|
|
106
|
+
print()
|
|
107
|
+
|
|
108
|
+
print(f"Results from {stats['total_positions']} positions:")
|
|
109
|
+
print(f" Mountain positions: {stats['mountain_positions']} ({100 * stats['mountain_positions'] / stats['total_positions']:.2f}%)")
|
|
110
|
+
|
|
111
|
+
if stats['mountain_positions'] > 0:
|
|
112
|
+
print(f" Within mountain areas:")
|
|
113
|
+
print(f" Blocked by caves: {stats['blocked_by_caves']} ({100 * stats['blocked_by_caves'] / stats['mountain_positions']:.2f}%)")
|
|
114
|
+
print(f" Blocked by tunnels: {stats['blocked_by_tunnels']} ({100 * stats['blocked_by_tunnels'] / stats['mountain_positions']:.2f}%)")
|
|
115
|
+
print(f" Blocked by coal: {stats['blocked_by_coal']} ({100 * stats['blocked_by_coal'] / stats['mountain_positions']:.2f}%)")
|
|
116
|
+
print(f" Blocked by iron: {stats['blocked_by_iron']} ({100 * stats['blocked_by_iron'] / stats['mountain_positions']:.2f}%)")
|
|
117
|
+
print(f" Diamond threshold met: {stats['diamond_threshold_met']} ({100 * stats['diamond_threshold_met'] / stats['mountain_positions']:.2f}%)")
|
|
118
|
+
|
|
119
|
+
if stats['diamond_threshold_met'] > 0:
|
|
120
|
+
print(f" Of positions meeting diamond threshold:")
|
|
121
|
+
print(f" Diamonds spawned: {stats['diamonds_spawned']} ({100 * stats['diamonds_spawned'] / stats['diamond_threshold_met']:.2f}%)")
|
|
122
|
+
print(f" Blocked by lava: {stats['blocked_by_lava']} ({100 * stats['blocked_by_lava'] / stats['diamond_threshold_met']:.2f}%)")
|
|
123
|
+
|
|
124
|
+
print(f"\n Overall diamond spawn rate: {stats['diamonds_spawned']} ({100 * stats['diamonds_spawned'] / stats['total_positions']:.4f}%)")
|
|
125
|
+
|
|
126
|
+
# Mountain value statistics
|
|
127
|
+
mountain_values = np.array(stats['mountain_values'])
|
|
128
|
+
print(f"\nMountain value statistics:")
|
|
129
|
+
print(f" Min: {mountain_values.min():.3f}")
|
|
130
|
+
print(f" Max: {mountain_values.max():.3f}")
|
|
131
|
+
print(f" Mean: {mountain_values.mean():.3f}")
|
|
132
|
+
print(f" Std: {mountain_values.std():.3f}")
|
|
133
|
+
print(f" Percentiles:")
|
|
134
|
+
for p in [50, 75, 90, 95, 99]:
|
|
135
|
+
print(f" {p}th: {np.percentile(mountain_values, p):.3f}")
|
|
136
|
+
|
|
137
|
+
if __name__ == "__main__":
|
|
138
|
+
# Test with default config
|
|
139
|
+
print("Testing with default configuration:")
|
|
140
|
+
config = WorldGenConfig()
|
|
141
|
+
stats = analyze_diamond_conditions(config)
|
|
142
|
+
print_analysis(stats, config)
|
|
143
|
+
|
|
144
|
+
print("\n" + "=" * 50 + "\n")
|
|
145
|
+
|
|
146
|
+
# Test with modified config
|
|
147
|
+
print("Testing with increased diamond probability (0.5):")
|
|
148
|
+
config_high_prob = WorldGenConfig(diamond_probability=0.5)
|
|
149
|
+
stats_high = analyze_diamond_conditions(config_high_prob)
|
|
150
|
+
print_analysis(stats_high, config_high_prob)
|
|
151
|
+
|
|
152
|
+
print("\n" + "=" * 50 + "\n")
|
|
153
|
+
|
|
154
|
+
# Test with lower diamond threshold
|
|
155
|
+
print("Testing with lower diamond threshold (0.16):")
|
|
156
|
+
config_low_threshold = WorldGenConfig(diamond_threshold=0.16)
|
|
157
|
+
stats_low = analyze_diamond_conditions(config_low_threshold)
|
|
158
|
+
print_analysis(stats_low, config_low_threshold)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Compare agent performance across different world configurations.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import subprocess
|
|
7
|
+
import json
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
def run_evaluation(world_config, episodes=2, max_turns=20):
|
|
12
|
+
"""Run evaluation with a specific world configuration."""
|
|
13
|
+
print(f"\n{'='*60}")
|
|
14
|
+
print(f"Testing {world_config.upper()} world configuration")
|
|
15
|
+
print(f"{'='*60}")
|
|
16
|
+
|
|
17
|
+
cmd = [
|
|
18
|
+
sys.executable, "-m",
|
|
19
|
+
"synth_ai.environments.examples.crafter_custom.agent_demos.test_crafter_custom_agent",
|
|
20
|
+
"--model", "gpt-4.1-nano",
|
|
21
|
+
"--world-config", world_config,
|
|
22
|
+
"--episodes", str(episodes),
|
|
23
|
+
"--max-turns", str(max_turns),
|
|
24
|
+
"--evaluate-traces"
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
28
|
+
|
|
29
|
+
# Extract key metrics from output
|
|
30
|
+
lines = result.stdout.split('\n')
|
|
31
|
+
metrics = {}
|
|
32
|
+
|
|
33
|
+
for line in lines:
|
|
34
|
+
if "Mean Score" in line:
|
|
35
|
+
metrics["mean_score"] = float(line.split()[-1])
|
|
36
|
+
elif "Avg Achievements/Episode" in line:
|
|
37
|
+
metrics["avg_achievements"] = float(line.split()[-1])
|
|
38
|
+
elif "Average Score:" in line and "📊" not in line:
|
|
39
|
+
metrics["trace_score"] = float(line.split()[-1])
|
|
40
|
+
|
|
41
|
+
return metrics
|
|
42
|
+
|
|
43
|
+
def main():
|
|
44
|
+
print("🎮 Crafter World Configuration Comparison")
|
|
45
|
+
print("=" * 60)
|
|
46
|
+
|
|
47
|
+
configs = ["peaceful", "easy", "normal", "hard"]
|
|
48
|
+
results = {}
|
|
49
|
+
|
|
50
|
+
for config in configs:
|
|
51
|
+
results[config] = run_evaluation(config, episodes=2, max_turns=15)
|
|
52
|
+
print(f"\nResults for {config}: {results[config]}")
|
|
53
|
+
|
|
54
|
+
# Summary comparison
|
|
55
|
+
print("\n" + "="*60)
|
|
56
|
+
print("📊 COMPARISON SUMMARY")
|
|
57
|
+
print("="*60)
|
|
58
|
+
print(f"{'Config':<10} {'Mean Score':<12} {'Avg Achievements':<18} {'Trace Score':<12}")
|
|
59
|
+
print("-"*60)
|
|
60
|
+
|
|
61
|
+
for config in configs:
|
|
62
|
+
r = results[config]
|
|
63
|
+
print(f"{config:<10} {r.get('mean_score', 0):<12.2f} {r.get('avg_achievements', 0):<18.2f} {r.get('trace_score', 0):<12.2f}")
|
|
64
|
+
|
|
65
|
+
print("\n💡 Analysis:")
|
|
66
|
+
print("- Peaceful worlds should have highest scores (no enemies)")
|
|
67
|
+
print("- Hard worlds should have lowest scores (many enemies, few resources)")
|
|
68
|
+
print("- Resource availability directly impacts achievement unlocking")
|
|
69
|
+
|
|
70
|
+
if __name__ == "__main__":
|
|
71
|
+
main()
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Display statistics about Crafter datasets
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from collections import Counter
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def analyze_dataset(dataset_path: Path):
|
|
13
|
+
"""Analyze a single dataset"""
|
|
14
|
+
# Load metadata
|
|
15
|
+
with open(dataset_path / "metadata.json", "r") as f:
|
|
16
|
+
metadata = json.load(f)
|
|
17
|
+
|
|
18
|
+
# Load instances
|
|
19
|
+
with open(dataset_path / "instances.json", "r") as f:
|
|
20
|
+
instances = json.load(f)
|
|
21
|
+
|
|
22
|
+
print(f"\nDataset: {metadata['name']}")
|
|
23
|
+
print(f"Description: {metadata['description']}")
|
|
24
|
+
print(f"Total instances: {metadata['num_instances']}")
|
|
25
|
+
print(f"Train/Val/Test split: {len(instances) - len(metadata['split_info']['val_instance_ids']) - len(metadata['split_info']['test_instance_ids'])}/{len(metadata['split_info']['val_instance_ids'])}/{len(metadata['split_info']['test_instance_ids'])}")
|
|
26
|
+
|
|
27
|
+
# Analyze by difficulty
|
|
28
|
+
difficulties = Counter(inst['metadata']['difficulty'] for inst in instances)
|
|
29
|
+
print(f"\nInstances by difficulty:")
|
|
30
|
+
for diff, count in sorted(difficulties.items()):
|
|
31
|
+
print(f" {diff}: {count} ({count/len(instances)*100:.1f}%)")
|
|
32
|
+
|
|
33
|
+
# Analyze by impetus type
|
|
34
|
+
impetus_types = Counter()
|
|
35
|
+
for inst in instances:
|
|
36
|
+
instructions = inst['impetus']['instructions'].lower()
|
|
37
|
+
if 'speedrun' in instructions:
|
|
38
|
+
impetus_types['speedrun'] += 1
|
|
39
|
+
elif 'focus on' in instructions:
|
|
40
|
+
impetus_types['focused'] += 1
|
|
41
|
+
else:
|
|
42
|
+
impetus_types['general'] += 1
|
|
43
|
+
|
|
44
|
+
print(f"\nInstances by type:")
|
|
45
|
+
for type_name, count in sorted(impetus_types.items()):
|
|
46
|
+
print(f" {type_name}: {count} ({count/len(instances)*100:.1f}%)")
|
|
47
|
+
|
|
48
|
+
# Analyze achievement focuses
|
|
49
|
+
focus_counts = Counter()
|
|
50
|
+
speedrun_targets = Counter()
|
|
51
|
+
|
|
52
|
+
for inst in instances:
|
|
53
|
+
if inst['impetus'].get('achievement_focus'):
|
|
54
|
+
for ach in inst['impetus']['achievement_focus']:
|
|
55
|
+
focus_counts[ach] += 1
|
|
56
|
+
|
|
57
|
+
if 'speedrun' in inst['impetus']['instructions'].lower():
|
|
58
|
+
# Extract speedrun target
|
|
59
|
+
instructions = inst['impetus']['instructions']
|
|
60
|
+
if ':' in instructions:
|
|
61
|
+
target = instructions.split(':')[1].strip().split(' ')[0]
|
|
62
|
+
speedrun_targets[target] += 1
|
|
63
|
+
|
|
64
|
+
if focus_counts:
|
|
65
|
+
print(f"\nTop achievement focuses:")
|
|
66
|
+
for ach, count in focus_counts.most_common(10):
|
|
67
|
+
print(f" {ach}: {count}")
|
|
68
|
+
|
|
69
|
+
if speedrun_targets:
|
|
70
|
+
print(f"\nSpeedrun targets:")
|
|
71
|
+
for target, count in speedrun_targets.most_common():
|
|
72
|
+
print(f" {target}: {count}")
|
|
73
|
+
|
|
74
|
+
# Sample some instances
|
|
75
|
+
print(f"\nSample instances:")
|
|
76
|
+
for i, inst in enumerate(instances[:3]):
|
|
77
|
+
print(f"\n Instance {i+1}:")
|
|
78
|
+
print(f" ID: {inst['id']}")
|
|
79
|
+
print(f" Difficulty: {inst['metadata']['difficulty']}")
|
|
80
|
+
print(f" Seed: {inst['metadata']['world_seed']}")
|
|
81
|
+
print(f" Instructions: {inst['impetus']['instructions'][:80]}...")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def main():
|
|
85
|
+
dataset_dir = Path("dataset")
|
|
86
|
+
|
|
87
|
+
print("Crafter Dataset Statistics")
|
|
88
|
+
print("=" * 60)
|
|
89
|
+
|
|
90
|
+
# Find all datasets
|
|
91
|
+
datasets = [d for d in dataset_dir.iterdir() if d.is_dir() and (d / "metadata.json").exists()]
|
|
92
|
+
|
|
93
|
+
if not datasets:
|
|
94
|
+
print("No datasets found in dataset/")
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
print(f"Found {len(datasets)} dataset(s)")
|
|
98
|
+
|
|
99
|
+
for dataset_path in sorted(datasets):
|
|
100
|
+
analyze_dataset(dataset_path)
|
|
101
|
+
print("\n" + "-" * 60)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
if __name__ == "__main__":
|
|
105
|
+
main()
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Summary of the diamond spawning issue in Crafter.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
def explain_probability_condition():
|
|
6
|
+
"""Explain how the probability condition works."""
|
|
7
|
+
print("DIAMOND SPAWNING PROBABILITY EXPLANATION")
|
|
8
|
+
print("=" * 60)
|
|
9
|
+
print()
|
|
10
|
+
print("The code uses this condition to spawn diamonds:")
|
|
11
|
+
print(" uniform() > (1 - config.diamond_probability)")
|
|
12
|
+
print()
|
|
13
|
+
print("This means:")
|
|
14
|
+
print(" - uniform() generates a random number between 0 and 1")
|
|
15
|
+
print(" - We spawn a diamond if this random number is GREATER than (1 - diamond_probability)")
|
|
16
|
+
print()
|
|
17
|
+
print("Examples:")
|
|
18
|
+
print("-" * 60)
|
|
19
|
+
|
|
20
|
+
probabilities = [0.006, 0.1, 0.5, 0.8, 0.95]
|
|
21
|
+
|
|
22
|
+
for prob in probabilities:
|
|
23
|
+
threshold = 1 - prob
|
|
24
|
+
actual_chance = 1 - threshold
|
|
25
|
+
print(f"\nWith diamond_probability = {prob}:")
|
|
26
|
+
print(f" Condition: uniform() > {threshold:.3f}")
|
|
27
|
+
print(f" Actual spawn chance: {actual_chance:.1%}")
|
|
28
|
+
print(f" Description: {'VERY LOW' if prob < 0.1 else 'LOW' if prob < 0.3 else 'MODERATE' if prob < 0.7 else 'HIGH' if prob < 0.9 else 'VERY HIGH'}")
|
|
29
|
+
|
|
30
|
+
def show_cascading_conditions():
|
|
31
|
+
"""Show how conditions cascade in the world generation."""
|
|
32
|
+
print("\n\nCASCADING CONDITIONS IN WORLD GENERATION")
|
|
33
|
+
print("=" * 60)
|
|
34
|
+
print()
|
|
35
|
+
print("For a position in a mountain area, materials are checked in this order:")
|
|
36
|
+
print()
|
|
37
|
+
print("1. Cave check (two conditions)")
|
|
38
|
+
print(" └─ If true → place 'path'")
|
|
39
|
+
print()
|
|
40
|
+
print("2. Horizontal tunnel check")
|
|
41
|
+
print(" └─ If true → place 'path'")
|
|
42
|
+
print()
|
|
43
|
+
print("3. Vertical tunnel check")
|
|
44
|
+
print(" └─ If true → place 'path'")
|
|
45
|
+
print()
|
|
46
|
+
print("4. Coal check (noise threshold AND probability)")
|
|
47
|
+
print(" └─ If true → place 'coal'")
|
|
48
|
+
print()
|
|
49
|
+
print("5. Iron check (noise threshold AND probability)")
|
|
50
|
+
print(" └─ If true → place 'iron'")
|
|
51
|
+
print()
|
|
52
|
+
print("6. Diamond check (mountain threshold AND probability)")
|
|
53
|
+
print(" └─ If true → place 'diamond'")
|
|
54
|
+
print()
|
|
55
|
+
print("7. Lava check")
|
|
56
|
+
print(" └─ If true → place 'lava'")
|
|
57
|
+
print()
|
|
58
|
+
print("8. Default: place 'stone'")
|
|
59
|
+
print()
|
|
60
|
+
print("IMPORTANT: Once any condition is met, no further checks are done!")
|
|
61
|
+
print("This means diamonds can be blocked by caves, tunnels, coal, or iron.")
|
|
62
|
+
|
|
63
|
+
def calculate_effective_spawn_rate():
|
|
64
|
+
"""Calculate the effective diamond spawn rate."""
|
|
65
|
+
print("\n\nEFFECTIVE DIAMOND SPAWN RATE")
|
|
66
|
+
print("=" * 60)
|
|
67
|
+
print()
|
|
68
|
+
print("Based on the analysis of 10,000 positions:")
|
|
69
|
+
print()
|
|
70
|
+
|
|
71
|
+
# Data from our analysis
|
|
72
|
+
total_positions = 10000
|
|
73
|
+
mountain_positions = 2402
|
|
74
|
+
blocked_caves = 431
|
|
75
|
+
blocked_tunnels = 445
|
|
76
|
+
blocked_coal = 104
|
|
77
|
+
blocked_iron = 41
|
|
78
|
+
diamond_threshold_met = 1185
|
|
79
|
+
diamond_probability = 0.006
|
|
80
|
+
|
|
81
|
+
print(f"Total positions: {total_positions}")
|
|
82
|
+
print(f"Mountain positions: {mountain_positions} ({100*mountain_positions/total_positions:.1f}%)")
|
|
83
|
+
print(f" Blocked by caves: {blocked_caves}")
|
|
84
|
+
print(f" Blocked by tunnels: {blocked_tunnels}")
|
|
85
|
+
print(f" Blocked by coal: {blocked_coal}")
|
|
86
|
+
print(f" Blocked by iron: {blocked_iron}")
|
|
87
|
+
print(f" Reached diamond check: {diamond_threshold_met}")
|
|
88
|
+
print()
|
|
89
|
+
print(f"Diamond spawn probability: {diamond_probability} ({diamond_probability*100:.1f}%)")
|
|
90
|
+
print()
|
|
91
|
+
|
|
92
|
+
expected_diamonds = diamond_threshold_met * diamond_probability
|
|
93
|
+
overall_rate = expected_diamonds / total_positions
|
|
94
|
+
|
|
95
|
+
print(f"Expected diamonds: {expected_diamonds:.1f}")
|
|
96
|
+
print(f"Overall spawn rate: {overall_rate:.4%}")
|
|
97
|
+
print()
|
|
98
|
+
print("This explains why diamonds are so rare!")
|
|
99
|
+
|
|
100
|
+
if __name__ == "__main__":
|
|
101
|
+
explain_probability_condition()
|
|
102
|
+
show_cascading_conditions()
|
|
103
|
+
calculate_effective_spawn_rate()
|
|
104
|
+
|
|
105
|
+
print("\n\nRECOMMENDATION")
|
|
106
|
+
print("=" * 60)
|
|
107
|
+
print()
|
|
108
|
+
print("To make diamonds appear with 'high probability', you should:")
|
|
109
|
+
print()
|
|
110
|
+
print("1. Increase diamond_probability to at least 0.5 (50% chance)")
|
|
111
|
+
print(" or even 0.8-0.9 for truly 'high' probability")
|
|
112
|
+
print()
|
|
113
|
+
print("2. Consider adjusting other thresholds to reduce blocking:")
|
|
114
|
+
print(" - Increase coal_threshold (currently 0.0)")
|
|
115
|
+
print(" - Increase iron_threshold (currently 0.4)")
|
|
116
|
+
print(" - Decrease coal_probability and iron_probability")
|
|
117
|
+
print()
|
|
118
|
+
print("3. Optionally lower diamond_threshold slightly (currently 0.18)")
|
|
119
|
+
print(" to increase the number of positions that can spawn diamonds")
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Example usage of Crafter datasets
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
|
|
9
|
+
|
|
10
|
+
from run_dataset import CrafterDatasetRunner
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main():
|
|
14
|
+
runner = CrafterDatasetRunner()
|
|
15
|
+
|
|
16
|
+
print("=== Example 1: Run 5 easy instances ===")
|
|
17
|
+
runner.run_batch(
|
|
18
|
+
dataset_name="crafter_balanced_v1",
|
|
19
|
+
num_instances=5,
|
|
20
|
+
difficulties=["easy"],
|
|
21
|
+
max_steps=500
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
print("\n\n=== Example 2: Run validation set instances ===")
|
|
25
|
+
runner.run_batch(
|
|
26
|
+
dataset_name="crafter_balanced_v1",
|
|
27
|
+
num_instances=5,
|
|
28
|
+
split="val",
|
|
29
|
+
max_steps=500
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
print("\n\n=== Example 3: Run speedrun challenges ===")
|
|
33
|
+
runner.run_batch(
|
|
34
|
+
dataset_name="crafter_balanced_v1",
|
|
35
|
+
num_instances=5,
|
|
36
|
+
impetus_types=["speedrun"],
|
|
37
|
+
max_steps=500
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
print("\n\n=== Example 4: Compare difficulties ===")
|
|
41
|
+
for difficulty in ["easy", "normal", "hard"]:
|
|
42
|
+
print(f"\n--- Testing {difficulty} ---")
|
|
43
|
+
runner.run_batch(
|
|
44
|
+
dataset_name="crafter_progression_v1",
|
|
45
|
+
num_instances=3,
|
|
46
|
+
difficulties=[difficulty],
|
|
47
|
+
max_steps=300
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
if __name__ == "__main__":
|
|
52
|
+
main()
|