synth-ai 0.2.4.dev3__py3-none-any.whl → 0.2.4.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/environments/examples/__init__.py +1 -0
- synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
- synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
- synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
- synth_ai/environments/examples/crafter_classic/engine.py +575 -0
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
- synth_ai/environments/examples/crafter_classic/environment.py +364 -0
- synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
- synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
- synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
- synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
- synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
- synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
- synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
- synth_ai/environments/examples/crafter_custom/environment.py +312 -0
- synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
- synth_ai/environments/examples/enron/engine.py +291 -0
- synth_ai/environments/examples/enron/environment.py +165 -0
- synth_ai/environments/examples/enron/taskset.py +112 -0
- synth_ai/environments/examples/minigrid/__init__.py +48 -0
- synth_ai/environments/examples/minigrid/engine.py +589 -0
- synth_ai/environments/examples/minigrid/environment.py +274 -0
- synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
- synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
- synth_ai/environments/examples/minigrid/taskset.py +583 -0
- synth_ai/environments/examples/nethack/__init__.py +7 -0
- synth_ai/environments/examples/nethack/achievements.py +337 -0
- synth_ai/environments/examples/nethack/engine.py +738 -0
- synth_ai/environments/examples/nethack/environment.py +255 -0
- synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
- synth_ai/environments/examples/nethack/taskset.py +323 -0
- synth_ai/environments/examples/red/__init__.py +7 -0
- synth_ai/environments/examples/red/config_logging.py +110 -0
- synth_ai/environments/examples/red/engine.py +693 -0
- synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
- synth_ai/environments/examples/red/environment.py +235 -0
- synth_ai/environments/examples/red/taskset.py +77 -0
- synth_ai/environments/examples/sokoban/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine.py +675 -0
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
- synth_ai/environments/examples/sokoban/environment.py +228 -0
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
- synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
- synth_ai/environments/examples/sokoban/taskset.py +425 -0
- synth_ai/environments/examples/tictactoe/__init__.py +1 -0
- synth_ai/environments/examples/tictactoe/engine.py +368 -0
- synth_ai/environments/examples/tictactoe/environment.py +239 -0
- synth_ai/environments/examples/tictactoe/taskset.py +214 -0
- synth_ai/environments/examples/verilog/__init__.py +10 -0
- synth_ai/environments/examples/verilog/engine.py +328 -0
- synth_ai/environments/examples/verilog/environment.py +349 -0
- synth_ai/environments/examples/verilog/taskset.py +418 -0
- synth_ai/tracing_v3/examples/basic_usage.py +188 -0
- {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/METADATA +1 -1
- {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/RECORD +105 -6
- {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev3.dist-info → synth_ai-0.2.4.dev5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,418 @@
|
|
1
|
+
from synth_ai.environments.tasks.core import (
|
2
|
+
Task,
|
3
|
+
TaskInstance,
|
4
|
+
TaskInstanceMetadata,
|
5
|
+
TaskInstanceSet,
|
6
|
+
SplitInfo,
|
7
|
+
Impetus,
|
8
|
+
Intent,
|
9
|
+
)
|
10
|
+
from uuid import uuid4, UUID
|
11
|
+
from dataclasses import dataclass, asdict, fields
|
12
|
+
from typing import Optional
|
13
|
+
from pathlib import Path
|
14
|
+
import tempfile
|
15
|
+
import os
|
16
|
+
import shutil
|
17
|
+
import atexit
|
18
|
+
from datasets import load_dataset
|
19
|
+
|
20
|
+
# Global list to track temp directories for cleanup
|
21
|
+
_temp_dirs = []
|
22
|
+
|
23
|
+
|
24
|
+
def _cleanup_temp_dirs():
|
25
|
+
"""Clean up all temporary directories created during task instances."""
|
26
|
+
for temp_dir in _temp_dirs:
|
27
|
+
try:
|
28
|
+
if os.path.exists(temp_dir):
|
29
|
+
shutil.rmtree(temp_dir)
|
30
|
+
except Exception:
|
31
|
+
pass # Ignore cleanup errors
|
32
|
+
_temp_dirs.clear()
|
33
|
+
|
34
|
+
|
35
|
+
# Register cleanup function to run at exit
|
36
|
+
atexit.register(_cleanup_temp_dirs)
|
37
|
+
|
38
|
+
verilog_task = Task(
|
39
|
+
global_premises="Implement and verify Verilog hardware designs",
|
40
|
+
global_constraints="Must pass testbench verification",
|
41
|
+
global_objectives="Write correct Verilog code that passes all tests",
|
42
|
+
shared_env_params={},
|
43
|
+
)
|
44
|
+
|
45
|
+
|
46
|
+
@dataclass
|
47
|
+
class VerilogTaskInstanceMetadata(TaskInstanceMetadata):
|
48
|
+
problem_name: str
|
49
|
+
difficulty: str
|
50
|
+
description: str
|
51
|
+
files_provided: list[str]
|
52
|
+
|
53
|
+
|
54
|
+
@dataclass
|
55
|
+
class VerilogTaskInstance(TaskInstance):
|
56
|
+
pristine_dir: Optional[str] = None
|
57
|
+
snapshot_dir: Optional[str] = None
|
58
|
+
|
59
|
+
async def serialize(self) -> dict:
|
60
|
+
data = asdict(self)
|
61
|
+
if "id" in data and isinstance(data["id"], UUID):
|
62
|
+
data["id"] = str(data["id"])
|
63
|
+
if "intent" in data and data["intent"] is not None:
|
64
|
+
if "deterministic_eval_functions" in data["intent"]:
|
65
|
+
data["intent"]["deterministic_eval_functions"] = []
|
66
|
+
return data
|
67
|
+
|
68
|
+
@classmethod
|
69
|
+
async def deserialize(cls, data: dict) -> "VerilogTaskInstance":
|
70
|
+
"""Gracefully accept non-UUID ids and rebuild required objects."""
|
71
|
+
if "id" in data:
|
72
|
+
try:
|
73
|
+
data["id"] = UUID(str(data["id"]))
|
74
|
+
except (ValueError, TypeError, AttributeError):
|
75
|
+
pass # keep original string
|
76
|
+
|
77
|
+
if "impetus" in data and isinstance(data["impetus"], dict):
|
78
|
+
impetus_data = data["impetus"]
|
79
|
+
# Ensure instructions field exists with default if missing
|
80
|
+
if "instructions" not in impetus_data:
|
81
|
+
impetus_data["instructions"] = "Implement the Verilog module"
|
82
|
+
data["impetus"] = Impetus(**impetus_data)
|
83
|
+
|
84
|
+
if "intent" in data and isinstance(data["intent"], dict):
|
85
|
+
intent_data = data["intent"]
|
86
|
+
if "deterministic_eval_functions" not in intent_data:
|
87
|
+
intent_data["deterministic_eval_functions"] = []
|
88
|
+
# Provide default values for required fields if missing
|
89
|
+
if "rubric" not in intent_data:
|
90
|
+
intent_data["rubric"] = {"goal": "Pass all testbench tests"}
|
91
|
+
if "gold_trajectories" not in intent_data:
|
92
|
+
intent_data["gold_trajectories"] = None
|
93
|
+
if "gold_state_diff" not in intent_data:
|
94
|
+
intent_data["gold_state_diff"] = {}
|
95
|
+
data["intent"] = Intent(**intent_data)
|
96
|
+
|
97
|
+
if "metadata" in data and isinstance(data["metadata"], dict):
|
98
|
+
metadata_data = data["metadata"]
|
99
|
+
# Ensure required fields exist with defaults if missing
|
100
|
+
if "problem_name" not in metadata_data:
|
101
|
+
metadata_data["problem_name"] = "unknown"
|
102
|
+
if "difficulty" not in metadata_data:
|
103
|
+
metadata_data["difficulty"] = "medium"
|
104
|
+
if "description" not in metadata_data:
|
105
|
+
metadata_data["description"] = "Verilog implementation task"
|
106
|
+
if "files_provided" not in metadata_data:
|
107
|
+
metadata_data["files_provided"] = []
|
108
|
+
data["metadata"] = VerilogTaskInstanceMetadata(**metadata_data)
|
109
|
+
|
110
|
+
constructor_field_names = {f.name for f in fields(cls)}
|
111
|
+
filtered_data = {k: v for k, v in data.items() if k in constructor_field_names}
|
112
|
+
|
113
|
+
# Add default values for required fields if missing
|
114
|
+
if "is_reproducible" not in filtered_data:
|
115
|
+
filtered_data["is_reproducible"] = True
|
116
|
+
if "initial_engine_snapshot" not in filtered_data:
|
117
|
+
filtered_data["initial_engine_snapshot"] = None
|
118
|
+
|
119
|
+
return cls(**filtered_data)
|
120
|
+
|
121
|
+
|
122
|
+
async def create_verilog_taskset(max_instances: int = 10) -> TaskInstanceSet:
|
123
|
+
"""Create a Verilog task set from HuggingFace VerilogEval v2 dataset."""
|
124
|
+
# Load VerilogEval v2 dataset from HuggingFace
|
125
|
+
ds = load_dataset("dakies/nvlabs-verilogeval-v2-spec-to-rtl", split="test")
|
126
|
+
|
127
|
+
instances = []
|
128
|
+
|
129
|
+
# Limit the number of instances for faster testing
|
130
|
+
dataset_size = min(max_instances, len(ds)) # type: ignore[arg-type]
|
131
|
+
|
132
|
+
# Convert each dataset item to VerilogTaskInstance
|
133
|
+
for i in range(dataset_size):
|
134
|
+
item = ds[i]
|
135
|
+
instance = _create_hf_task_instance(item, i)
|
136
|
+
instances.append(instance)
|
137
|
+
|
138
|
+
# Create split info - use first 80% for validation, last 20% for test
|
139
|
+
total_instances = len(instances)
|
140
|
+
val_split = int(0.8 * total_instances)
|
141
|
+
|
142
|
+
val_ids = {inst.id for inst in instances[:val_split]}
|
143
|
+
test_ids = {inst.id for inst in instances[val_split:]}
|
144
|
+
|
145
|
+
split_info = SplitInfo(
|
146
|
+
val_instance_ids=val_ids,
|
147
|
+
test_instance_ids=test_ids,
|
148
|
+
_is_split_defined=True,
|
149
|
+
)
|
150
|
+
|
151
|
+
return TaskInstanceSet(
|
152
|
+
name="VerilogEval v2 TaskSet",
|
153
|
+
description="VerilogEval v2 spec-to-RTL tasks from HuggingFace",
|
154
|
+
instances=instances,
|
155
|
+
split_info=split_info,
|
156
|
+
)
|
157
|
+
|
158
|
+
|
159
|
+
def _create_hf_task_instance(item, index: int) -> VerilogTaskInstance:
|
160
|
+
"""Create a VerilogTaskInstance from a HuggingFace dataset item."""
|
161
|
+
instance_id = uuid4()
|
162
|
+
|
163
|
+
# Create temporary directory for this task
|
164
|
+
temp_dir = tempfile.mkdtemp(prefix=f"verilog_hf_{index}_{instance_id}_")
|
165
|
+
_temp_dirs.append(temp_dir) # Track for cleanup
|
166
|
+
pristine_dir = Path(temp_dir)
|
167
|
+
pristine_dir.mkdir(exist_ok=True)
|
168
|
+
|
169
|
+
# Extract information from dataset item
|
170
|
+
problem_id = item["problem_id"]
|
171
|
+
prompt = item["prompt"]
|
172
|
+
testbench = item["test"]
|
173
|
+
ref_solution = item["ref"]
|
174
|
+
|
175
|
+
# Create incomplete module template (TopModule is the expected name in tests)
|
176
|
+
module_content = (
|
177
|
+
"""module TopModule();
|
178
|
+
// TODO: Implement the module based on the specification below
|
179
|
+
/*
|
180
|
+
Specification:
|
181
|
+
"""
|
182
|
+
+ prompt.strip()
|
183
|
+
+ """
|
184
|
+
*/
|
185
|
+
endmodule"""
|
186
|
+
)
|
187
|
+
|
188
|
+
# Write files to pristine directory
|
189
|
+
module_file = "TopModule.v"
|
190
|
+
testbench_file = f"{problem_id}_tb.v"
|
191
|
+
ref_file = "RefModule.v"
|
192
|
+
|
193
|
+
(pristine_dir / module_file).write_text(module_content)
|
194
|
+
(pristine_dir / testbench_file).write_text(testbench)
|
195
|
+
(pristine_dir / ref_file).write_text(ref_solution) # Include reference module
|
196
|
+
|
197
|
+
files_provided = [module_file, testbench_file, ref_file]
|
198
|
+
|
199
|
+
# Create task components
|
200
|
+
impetus = Impetus(
|
201
|
+
instructions=f"Problem: {problem_id}\n\n{prompt.strip()}\n\nImplement the TopModule according to the specification. The testbench will verify your implementation."
|
202
|
+
)
|
203
|
+
|
204
|
+
intent = Intent(
|
205
|
+
rubric={
|
206
|
+
"goal": f"Implement correct TopModule for {problem_id} that passes testbench verification"
|
207
|
+
},
|
208
|
+
gold_trajectories=None,
|
209
|
+
gold_state_diff={},
|
210
|
+
)
|
211
|
+
|
212
|
+
metadata = VerilogTaskInstanceMetadata(
|
213
|
+
problem_name=problem_id,
|
214
|
+
difficulty="medium", # VerilogEval doesn't specify difficulty levels
|
215
|
+
description=prompt.strip(), # Full description
|
216
|
+
files_provided=files_provided,
|
217
|
+
)
|
218
|
+
|
219
|
+
# Create snapshot directory and track for cleanup
|
220
|
+
snapshot_dir = tempfile.mkdtemp(prefix=f"verilog_snapshot_{instance_id}_")
|
221
|
+
_temp_dirs.append(snapshot_dir)
|
222
|
+
|
223
|
+
return VerilogTaskInstance(
|
224
|
+
id=instance_id,
|
225
|
+
impetus=impetus,
|
226
|
+
intent=intent,
|
227
|
+
metadata=metadata,
|
228
|
+
is_reproducible=True,
|
229
|
+
initial_engine_snapshot=None,
|
230
|
+
pristine_dir=str(pristine_dir),
|
231
|
+
snapshot_dir=snapshot_dir,
|
232
|
+
)
|
233
|
+
|
234
|
+
|
235
|
+
def _create_adder_task() -> VerilogTaskInstance:
|
236
|
+
"""Create a simple 4-bit adder task."""
|
237
|
+
instance_id = uuid4()
|
238
|
+
|
239
|
+
# Create temporary directory for this task
|
240
|
+
temp_dir = tempfile.mkdtemp(prefix=f"verilog_adder_{instance_id}_")
|
241
|
+
_temp_dirs.append(temp_dir) # Track for cleanup
|
242
|
+
|
243
|
+
# Write adder testbench
|
244
|
+
adder_tb_content = """`timescale 1ns/1ps
|
245
|
+
module adder4_tb;
|
246
|
+
reg [3:0] a, b;
|
247
|
+
wire [4:0] sum;
|
248
|
+
|
249
|
+
adder4 dut(.a(a), .b(b), .sum(sum));
|
250
|
+
|
251
|
+
initial begin
|
252
|
+
a = 4'b0000; b = 4'b0000; #10;
|
253
|
+
if (sum != 5'b00000) $fatal(1, "Test failed: 0 + 0 != 0");
|
254
|
+
|
255
|
+
a = 4'b0001; b = 4'b0001; #10;
|
256
|
+
if (sum != 5'b00010) $fatal(1, "Test failed: 1 + 1 != 2");
|
257
|
+
|
258
|
+
a = 4'b1111; b = 4'b0001; #10;
|
259
|
+
if (sum != 5'b10000) $fatal(1, "Test failed: 15 + 1 != 16");
|
260
|
+
|
261
|
+
$display("ALL_TESTS_PASSED");
|
262
|
+
$finish;
|
263
|
+
end
|
264
|
+
endmodule"""
|
265
|
+
|
266
|
+
# Write incomplete adder module (for student to complete)
|
267
|
+
adder_content = """module adder4(
|
268
|
+
input [3:0] a,
|
269
|
+
input [3:0] b,
|
270
|
+
output [4:0] sum
|
271
|
+
);
|
272
|
+
// TODO: Implement 4-bit adder
|
273
|
+
// assign sum = ?;
|
274
|
+
endmodule"""
|
275
|
+
|
276
|
+
pristine_dir = Path(temp_dir)
|
277
|
+
pristine_dir.mkdir(exist_ok=True)
|
278
|
+
|
279
|
+
(pristine_dir / "adder4_tb.v").write_text(adder_tb_content)
|
280
|
+
(pristine_dir / "adder4.v").write_text(adder_content)
|
281
|
+
|
282
|
+
impetus = Impetus(
|
283
|
+
instructions="Implement a 4-bit adder module that takes two 4-bit inputs 'a' and 'b' and produces a 5-bit output 'sum'."
|
284
|
+
)
|
285
|
+
|
286
|
+
intent = Intent(
|
287
|
+
rubric="Implement correct 4-bit adder that passes testbench",
|
288
|
+
gold_trajectories=None,
|
289
|
+
gold_state_diff={},
|
290
|
+
)
|
291
|
+
|
292
|
+
metadata = VerilogTaskInstanceMetadata(
|
293
|
+
problem_name="adder4",
|
294
|
+
difficulty="easy",
|
295
|
+
description="4-bit adder implementation",
|
296
|
+
files_provided=["adder4.v", "adder4_tb.v"],
|
297
|
+
)
|
298
|
+
|
299
|
+
return VerilogTaskInstance(
|
300
|
+
id=instance_id,
|
301
|
+
impetus=impetus,
|
302
|
+
intent=intent,
|
303
|
+
metadata=metadata,
|
304
|
+
is_reproducible=True,
|
305
|
+
initial_engine_snapshot=None,
|
306
|
+
pristine_dir=str(pristine_dir),
|
307
|
+
snapshot_dir=(
|
308
|
+
lambda: (
|
309
|
+
_temp_dirs.append(d := tempfile.mkdtemp(prefix=f"verilog_snapshot_{instance_id}_")),
|
310
|
+
d,
|
311
|
+
)[1]
|
312
|
+
)(),
|
313
|
+
)
|
314
|
+
|
315
|
+
|
316
|
+
def _create_and_gate_task() -> VerilogTaskInstance:
|
317
|
+
"""Create a simple AND gate task."""
|
318
|
+
instance_id = uuid4()
|
319
|
+
|
320
|
+
# Create temporary directory for this task
|
321
|
+
temp_dir = tempfile.mkdtemp(prefix=f"verilog_and_{instance_id}_")
|
322
|
+
_temp_dirs.append(temp_dir) # Track for cleanup
|
323
|
+
|
324
|
+
# Write AND gate testbench
|
325
|
+
and_tb_content = """`timescale 1ns/1ps
|
326
|
+
module and_gate_tb;
|
327
|
+
reg a, b;
|
328
|
+
wire y;
|
329
|
+
|
330
|
+
and_gate dut(.a(a), .b(b), .y(y));
|
331
|
+
|
332
|
+
initial begin
|
333
|
+
a = 0; b = 0; #10;
|
334
|
+
if (y != 0) $fatal(1, "Test failed: 0 AND 0 != 0");
|
335
|
+
|
336
|
+
a = 0; b = 1; #10;
|
337
|
+
if (y != 0) $fatal(1, "Test failed: 0 AND 1 != 0");
|
338
|
+
|
339
|
+
a = 1; b = 0; #10;
|
340
|
+
if (y != 0) $fatal(1, "Test failed: 1 AND 0 != 0");
|
341
|
+
|
342
|
+
a = 1; b = 1; #10;
|
343
|
+
if (y != 1) $fatal(1, "Test failed: 1 AND 1 != 1");
|
344
|
+
|
345
|
+
$display("ALL_TESTS_PASSED");
|
346
|
+
$finish;
|
347
|
+
end
|
348
|
+
endmodule"""
|
349
|
+
|
350
|
+
# Write incomplete AND gate module
|
351
|
+
and_content = """module and_gate(
|
352
|
+
input a,
|
353
|
+
input b,
|
354
|
+
output y
|
355
|
+
);
|
356
|
+
// TODO: Implement AND gate
|
357
|
+
// assign y = ?;
|
358
|
+
endmodule"""
|
359
|
+
|
360
|
+
pristine_dir = Path(temp_dir)
|
361
|
+
pristine_dir.mkdir(exist_ok=True)
|
362
|
+
|
363
|
+
(pristine_dir / "and_gate_tb.v").write_text(and_tb_content)
|
364
|
+
(pristine_dir / "and_gate.v").write_text(and_content)
|
365
|
+
|
366
|
+
impetus = Impetus(
|
367
|
+
instructions="Implement an AND gate module that takes two inputs 'a' and 'b' and produces output 'y'."
|
368
|
+
)
|
369
|
+
|
370
|
+
intent = Intent(
|
371
|
+
rubric="Implement correct AND gate that passes testbench",
|
372
|
+
gold_trajectories=None,
|
373
|
+
gold_state_diff={},
|
374
|
+
)
|
375
|
+
|
376
|
+
metadata = VerilogTaskInstanceMetadata(
|
377
|
+
problem_name="and_gate",
|
378
|
+
difficulty="easy",
|
379
|
+
description="Basic AND gate implementation",
|
380
|
+
files_provided=["and_gate.v", "and_gate_tb.v"],
|
381
|
+
)
|
382
|
+
|
383
|
+
return VerilogTaskInstance(
|
384
|
+
id=instance_id,
|
385
|
+
impetus=impetus,
|
386
|
+
intent=intent,
|
387
|
+
metadata=metadata,
|
388
|
+
is_reproducible=True,
|
389
|
+
initial_engine_snapshot=None,
|
390
|
+
pristine_dir=str(pristine_dir),
|
391
|
+
snapshot_dir=(
|
392
|
+
lambda: (
|
393
|
+
_temp_dirs.append(d := tempfile.mkdtemp(prefix=f"verilog_snapshot_{instance_id}_")),
|
394
|
+
d,
|
395
|
+
)[1]
|
396
|
+
)(),
|
397
|
+
)
|
398
|
+
|
399
|
+
|
400
|
+
# Example usage
|
401
|
+
if __name__ == "__main__":
|
402
|
+
import asyncio
|
403
|
+
|
404
|
+
async def main():
|
405
|
+
taskset = await create_verilog_taskset()
|
406
|
+
|
407
|
+
serialized = await asyncio.gather(*(inst.serialize() for inst in taskset.instances))
|
408
|
+
|
409
|
+
print(f"Created {len(serialized)} Verilog task instances")
|
410
|
+
|
411
|
+
# Print summary
|
412
|
+
for i, inst in enumerate(taskset.instances):
|
413
|
+
print(f"Task {i + 1}: {inst.metadata.problem_name} ({inst.metadata.difficulty})")
|
414
|
+
print(f" Description: {inst.metadata.description}")
|
415
|
+
print(f" Files: {inst.metadata.files_provided}")
|
416
|
+
print()
|
417
|
+
|
418
|
+
asyncio.run(main())
|
@@ -0,0 +1,188 @@
|
|
1
|
+
"""Basic usage example for tracing v3."""
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
import time
|
5
|
+
from datetime import datetime
|
6
|
+
|
7
|
+
from synth_ai.tracing_v3 import SessionTracer
|
8
|
+
from synth_ai.tracing_v3.abstractions import LMCAISEvent, EnvironmentEvent, RuntimeEvent, TimeRecord
|
9
|
+
from synth_ai.tracing_v3.turso.daemon import SqldDaemon
|
10
|
+
|
11
|
+
|
12
|
+
async def simulate_llm_call(model: str, prompt: str) -> dict:
|
13
|
+
"""Simulate an LLM API call."""
|
14
|
+
await asyncio.sleep(0.1) # Simulate network latency
|
15
|
+
|
16
|
+
# Simulate response
|
17
|
+
tokens = len(prompt.split()) * 3
|
18
|
+
return {
|
19
|
+
"model": model,
|
20
|
+
"response": f"Response to: {prompt[:50]}...",
|
21
|
+
"usage": {
|
22
|
+
"prompt_tokens": len(prompt.split()) * 2,
|
23
|
+
"completion_tokens": tokens,
|
24
|
+
"total_tokens": len(prompt.split()) * 2 + tokens,
|
25
|
+
},
|
26
|
+
}
|
27
|
+
|
28
|
+
|
29
|
+
async def main():
|
30
|
+
"""Demonstrate basic tracing v3 usage."""
|
31
|
+
print("Starting tracing v3 example...")
|
32
|
+
|
33
|
+
# Option 1: Start sqld daemon programmatically
|
34
|
+
with SqldDaemon() as daemon:
|
35
|
+
print("✓ Started sqld daemon")
|
36
|
+
|
37
|
+
# Wait for daemon to be ready
|
38
|
+
await asyncio.sleep(1)
|
39
|
+
|
40
|
+
# Create tracer
|
41
|
+
tracer = SessionTracer()
|
42
|
+
await tracer.initialize()
|
43
|
+
print("✓ Initialized tracer")
|
44
|
+
|
45
|
+
# Example 1: Basic session with events
|
46
|
+
print("\n--- Example 1: Basic Session ---")
|
47
|
+
async with tracer.session(metadata={"example": "basic"}) as session_id:
|
48
|
+
print(f"Started session: {session_id}")
|
49
|
+
|
50
|
+
# Timestep 1: LLM interaction
|
51
|
+
async with tracer.timestep("llm_step", turn_number=1):
|
52
|
+
# Simulate LLM call
|
53
|
+
result = await simulate_llm_call("gpt-4", "What is the capital of France?")
|
54
|
+
|
55
|
+
# Record LLM event
|
56
|
+
event = LMCAISEvent(
|
57
|
+
system_instance_id="llm_system",
|
58
|
+
time_record=TimeRecord(event_time=time.time()),
|
59
|
+
model_name=result["model"],
|
60
|
+
input_tokens=result["usage"]["prompt_tokens"],
|
61
|
+
output_tokens=result["usage"]["completion_tokens"],
|
62
|
+
total_tokens=result["usage"]["total_tokens"],
|
63
|
+
cost_usd=0.003, # $0.003
|
64
|
+
latency_ms=100,
|
65
|
+
metadata={"prompt": "What is the capital of France?"},
|
66
|
+
)
|
67
|
+
await tracer.record_event(event)
|
68
|
+
|
69
|
+
# Record messages
|
70
|
+
await tracer.record_message(
|
71
|
+
content="What is the capital of France?", message_type="user"
|
72
|
+
)
|
73
|
+
await tracer.record_message(content=result["response"], message_type="assistant")
|
74
|
+
print("✓ Recorded LLM interaction")
|
75
|
+
|
76
|
+
# Timestep 2: Environment interaction
|
77
|
+
async with tracer.timestep("env_step", turn_number=2):
|
78
|
+
# Record environment event
|
79
|
+
env_event = EnvironmentEvent(
|
80
|
+
system_instance_id="environment",
|
81
|
+
time_record=TimeRecord(event_time=time.time()),
|
82
|
+
reward=0.8,
|
83
|
+
terminated=False,
|
84
|
+
system_state_before={"position": [0, 0]},
|
85
|
+
system_state_after={"position": [1, 0]},
|
86
|
+
)
|
87
|
+
await tracer.record_event(env_event)
|
88
|
+
print("✓ Recorded environment event")
|
89
|
+
|
90
|
+
# Timestep 3: Runtime action
|
91
|
+
async with tracer.timestep("runtime_step", turn_number=3):
|
92
|
+
# Record runtime event
|
93
|
+
runtime_event = RuntimeEvent(
|
94
|
+
system_instance_id="agent",
|
95
|
+
time_record=TimeRecord(event_time=time.time()),
|
96
|
+
actions=[1, 0, 0, 1], # Example action vector
|
97
|
+
metadata={"action_type": "move_right"},
|
98
|
+
)
|
99
|
+
await tracer.record_event(runtime_event)
|
100
|
+
print("✓ Recorded runtime event")
|
101
|
+
|
102
|
+
print(f"✓ Session {session_id} saved\n")
|
103
|
+
|
104
|
+
# Example 2: Concurrent sessions
|
105
|
+
print("--- Example 2: Concurrent Sessions ---")
|
106
|
+
|
107
|
+
async def run_concurrent_session(session_num: int):
|
108
|
+
"""Run a session concurrently."""
|
109
|
+
async with tracer.session(
|
110
|
+
metadata={"example": "concurrent", "session_num": session_num}
|
111
|
+
) as sid:
|
112
|
+
for i in range(3):
|
113
|
+
async with tracer.timestep(f"step_{i}", turn_number=i):
|
114
|
+
# Simulate some work
|
115
|
+
await asyncio.sleep(0.05)
|
116
|
+
|
117
|
+
# Record event
|
118
|
+
event = RuntimeEvent(
|
119
|
+
system_instance_id=f"worker_{session_num}",
|
120
|
+
time_record=TimeRecord(event_time=time.time()),
|
121
|
+
actions=[i],
|
122
|
+
metadata={"iteration": i},
|
123
|
+
)
|
124
|
+
await tracer.record_event(event)
|
125
|
+
|
126
|
+
return sid
|
127
|
+
|
128
|
+
# Run 5 concurrent sessions
|
129
|
+
tasks = [run_concurrent_session(i) for i in range(5)]
|
130
|
+
session_ids = await asyncio.gather(*tasks)
|
131
|
+
print(f"✓ Completed {len(session_ids)} concurrent sessions")
|
132
|
+
|
133
|
+
# Example 3: Query stored data
|
134
|
+
print("\n--- Example 3: Querying Data ---")
|
135
|
+
|
136
|
+
# Get model usage statistics
|
137
|
+
model_usage = await tracer.db.get_model_usage()
|
138
|
+
print("\nModel Usage:")
|
139
|
+
print(model_usage)
|
140
|
+
|
141
|
+
# Query recent sessions
|
142
|
+
recent_sessions = await tracer.get_session_history(limit=5)
|
143
|
+
print(f"\nRecent Sessions: {len(recent_sessions)} found")
|
144
|
+
for session in recent_sessions:
|
145
|
+
print(
|
146
|
+
f" - {session['session_id']}: "
|
147
|
+
f"{session['num_events']} events, "
|
148
|
+
f"{session['num_messages']} messages"
|
149
|
+
)
|
150
|
+
|
151
|
+
# Get specific session details
|
152
|
+
if recent_sessions:
|
153
|
+
session_detail = await tracer.db.get_session_trace(recent_sessions[0]["session_id"])
|
154
|
+
print(f"\nSession Detail for {session_detail['session_id']}:")
|
155
|
+
print(f" Created: {session_detail['created_at']}")
|
156
|
+
print(f" Timesteps: {len(session_detail['timesteps'])}")
|
157
|
+
|
158
|
+
# Example 4: Using hooks
|
159
|
+
print("\n--- Example 4: Hooks ---")
|
160
|
+
|
161
|
+
# Add a custom hook
|
162
|
+
call_count = {"count": 0}
|
163
|
+
|
164
|
+
async def count_events(event, **kwargs):
|
165
|
+
call_count["count"] += 1
|
166
|
+
print(f" Hook: Event #{call_count['count']} recorded")
|
167
|
+
|
168
|
+
tracer.hooks.register("event_recorded", count_events, name="event_counter")
|
169
|
+
|
170
|
+
async with tracer.session(metadata={"example": "hooks"}) as session_id:
|
171
|
+
async with tracer.timestep("hook_test"):
|
172
|
+
for i in range(3):
|
173
|
+
event = RuntimeEvent(
|
174
|
+
system_instance_id="hook_test",
|
175
|
+
time_record=TimeRecord(event_time=time.time()),
|
176
|
+
actions=[i],
|
177
|
+
)
|
178
|
+
await tracer.record_event(event)
|
179
|
+
|
180
|
+
print(f"✓ Hook called {call_count['count']} times")
|
181
|
+
|
182
|
+
# Cleanup
|
183
|
+
await tracer.close()
|
184
|
+
print("\n✓ Example completed successfully!")
|
185
|
+
|
186
|
+
|
187
|
+
if __name__ == "__main__":
|
188
|
+
asyncio.run(main())
|