wafer-core 0.1.21__py3-none-any.whl → 0.1.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wafer_core/auth.py CHANGED
@@ -41,6 +41,18 @@ PROVIDERS = {
41
41
  "display_name": "Modal",
42
42
  "key_url": "https://modal.com/settings",
43
43
  },
44
+ "anthropic": {
45
+ "env_var": "ANTHROPIC_API_KEY",
46
+ "alt_env_var": "WAFER_ANTHROPIC_API_KEY", # Check this first
47
+ "display_name": "Anthropic",
48
+ "key_url": "https://console.anthropic.com/settings/keys",
49
+ },
50
+ "openai": {
51
+ "env_var": "OPENAI_API_KEY",
52
+ "alt_env_var": "WAFER_OPENAI_KEY", # Check this first
53
+ "display_name": "OpenAI",
54
+ "key_url": "https://platform.openai.com/api-keys",
55
+ },
44
56
  }
45
57
 
46
58
 
@@ -78,11 +90,12 @@ def get_api_key(provider: str) -> str | None:
78
90
  """Get API key for a provider.
79
91
 
80
92
  Checks in order:
81
- 1. Environment variable (e.g., WAFER_RUNPOD_API_KEY)
82
- 2. ~/.wafer/auth.json
93
+ 1. Alt environment variable if defined (e.g., WAFER_ANTHROPIC_API_KEY)
94
+ 2. Primary environment variable (e.g., ANTHROPIC_API_KEY)
95
+ 3. ~/.wafer/auth.json
83
96
 
84
97
  Args:
85
- provider: Provider name (runpod, digitalocean, modal)
98
+ provider: Provider name (runpod, digitalocean, modal, anthropic, openai)
86
99
 
87
100
  Returns:
88
101
  API key string or None if not found
@@ -92,7 +105,13 @@ def get_api_key(provider: str) -> str | None:
92
105
 
93
106
  config = PROVIDERS[provider]
94
107
 
95
- # Check environment variable first
108
+ # Check alt environment variable first (e.g., WAFER_ANTHROPIC_API_KEY)
109
+ if "alt_env_var" in config:
110
+ alt_key = os.environ.get(config["alt_env_var"], "").strip()
111
+ if alt_key:
112
+ return alt_key
113
+
114
+ # Check primary environment variable
96
115
  env_key = os.environ.get(config["env_var"], "").strip()
97
116
  if env_key:
98
117
  return env_key
@@ -154,7 +173,7 @@ def get_auth_status(provider: str) -> AuthStatus:
154
173
  """Get authentication status for a provider.
155
174
 
156
175
  Args:
157
- provider: Provider name (runpod, digitalocean, modal)
176
+ provider: Provider name (runpod, digitalocean, modal, anthropic, openai)
158
177
 
159
178
  Returns:
160
179
  AuthStatus with details about the auth state
@@ -164,7 +183,20 @@ def get_auth_status(provider: str) -> AuthStatus:
164
183
 
165
184
  config = PROVIDERS[provider]
166
185
 
167
- # Check environment variable first
186
+ # Check alt environment variable first (e.g., WAFER_ANTHROPIC_API_KEY)
187
+ if "alt_env_var" in config:
188
+ alt_key = os.environ.get(config["alt_env_var"], "").strip()
189
+ if alt_key:
190
+ return AuthStatus(
191
+ provider=provider,
192
+ display_name=config["display_name"],
193
+ is_authenticated=True,
194
+ source="env",
195
+ key_preview=_format_key_preview(alt_key),
196
+ key_url=config["key_url"],
197
+ )
198
+
199
+ # Check primary environment variable
168
200
  env_key = os.environ.get(config["env_var"], "").strip()
169
201
  if env_key:
170
202
  return AuthStatus(
@@ -34,6 +34,8 @@ from wafer_core.tools import (
34
34
  GLOB_TOOL,
35
35
  GREP_TOOL,
36
36
  READ_TOOL,
37
+ SEARCH_DOCS_TOOL,
38
+ SKILL_TOOL,
37
39
  WRITE_TOOL,
38
40
  ApprovalCallback,
39
41
  exec_bash,
@@ -41,6 +43,8 @@ from wafer_core.tools import (
41
43
  exec_glob,
42
44
  exec_grep,
43
45
  exec_read,
46
+ exec_search_docs,
47
+ exec_skill,
44
48
  exec_write,
45
49
  )
46
50
 
@@ -61,6 +65,8 @@ ALL_TOOLS = {
61
65
  "glob": GLOB_TOOL,
62
66
  "grep": GREP_TOOL,
63
67
  "bash": BASH_TOOL,
68
+ "search_docs": SEARCH_DOCS_TOOL,
69
+ "skill": SKILL_TOOL,
64
70
  # TODO(wafer-tool): "wafer": WAFER_TOOL,
65
71
  }
66
72
 
@@ -208,6 +214,8 @@ class CodingEnvironment:
208
214
  self.bash_approval_callback,
209
215
  self._sandbox_policy,
210
216
  ),
217
+ "search_docs": lambda tc: exec_search_docs(tc),
218
+ "skill": lambda tc: exec_skill(tc),
211
219
  # TODO(wafer-tool): "wafer": lambda tc: exec_wafer(
212
220
  # tc, self.working_dir, self.enabled_tools, self.allow_spawn, cancel_scope
213
221
  # ),
@@ -1562,6 +1562,10 @@ class EvalConfig:
1562
1562
  resume_dir: Path | None = None
1563
1563
  report_batch_size: int = 1 # Write report after each sample for best recovery
1564
1564
 
1565
+ # Custom metadata (flows to report.json for dashboard filtering)
1566
+ # e.g., {"waferbench_category": "gemm", "github_runner": "elliot"}
1567
+ metadata: dict[str, Any] | None = None
1568
+
1565
1569
 
1566
1570
  # ── Session Types ──────────────────────────────────────────────────────────────
1567
1571
  # Types for persisting agent sessions (trajectories, config, environment state).
@@ -331,9 +331,9 @@ def generate_diff(old_content: str, new_content: str, context_lines: int = 3) ->
331
331
 
332
332
  # Tool preset configurations
333
333
  TOOL_PRESETS = {
334
- "full": ["read", "write", "edit", "bash", "web_fetch"],
334
+ "full": ["read", "write", "edit", "bash", "web_fetch", "skill"],
335
335
  "readonly": ["read"],
336
- "no-write": ["read", "edit", "bash", "web_fetch"],
336
+ "no-write": ["read", "edit", "bash", "web_fetch", "skill"],
337
337
  }
338
338
 
339
339
 
@@ -630,6 +630,24 @@ class LocalFilesystemEnvironment:
630
630
  required=["url", "prompt"],
631
631
  ),
632
632
  ),
633
+ # skill tool
634
+ Tool(
635
+ type="function",
636
+ function=ToolFunction(
637
+ name="skill",
638
+ description="Load a skill's full instructions. Skills provide domain-specific knowledge and workflows. Use this when you need detailed guidance for a task mentioned in your available skills.",
639
+ parameters=ToolFunctionParameter(
640
+ type="object",
641
+ properties={
642
+ "name": {
643
+ "type": "string",
644
+ "description": "Name of the skill to load (e.g., 'wafer-guide')",
645
+ },
646
+ },
647
+ ),
648
+ required=["name"],
649
+ ),
650
+ ),
633
651
  ]
634
652
 
635
653
  async def on_assistant_message(self, message: Message, state: AgentState) -> AgentState:
@@ -655,6 +673,8 @@ class LocalFilesystemEnvironment:
655
673
  return await self._exec_bash(tool_call, current_state.session_id, cancel_scope)
656
674
  elif tool_call.name == "web_fetch":
657
675
  return await self._exec_web_fetch(tool_call, current_state.session_id)
676
+ elif tool_call.name == "skill":
677
+ return await self._exec_skill(tool_call)
658
678
  else:
659
679
  return ToolResult(
660
680
  tool_call_id=tool_call.id,
@@ -1155,3 +1175,31 @@ class LocalFilesystemEnvironment:
1155
1175
  content=header + final_content,
1156
1176
  details={"output_file": output_file_path} if output_file_path else None,
1157
1177
  )
1178
+
1179
+ async def _exec_skill(self, tool_call: ToolCall) -> ToolResult:
1180
+ """Load a skill's full instructions."""
1181
+ from ..skills import load_skill
1182
+
1183
+ skill_name = tool_call.args["name"]
1184
+ skill = load_skill(skill_name)
1185
+
1186
+ if skill is None:
1187
+ # List available skills in error message
1188
+ from ..skills import discover_skills
1189
+
1190
+ available = discover_skills()
1191
+ available_names = [s.name for s in available]
1192
+ return ToolResult(
1193
+ tool_call_id=tool_call.id,
1194
+ is_error=True,
1195
+ content="",
1196
+ error=f"Skill not found: {skill_name}. Available skills: {', '.join(available_names) or 'none'}",
1197
+ )
1198
+
1199
+ # Return the full skill content
1200
+ header = f"# Skill: {skill.name}\n\n"
1201
+ return ToolResult(
1202
+ tool_call_id=tool_call.id,
1203
+ is_error=False,
1204
+ content=header + skill.content,
1205
+ )
@@ -642,6 +642,7 @@ class EvalReport:
642
642
  timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
643
643
  git_info: dict[str, Any] = field(default_factory=_get_git_info)
644
644
  config_path: str | None = None # Path to config file relative to repo root
645
+ metadata: dict[str, Any] | None = None # Custom metadata (waferbench_category, github_runner, etc.)
645
646
 
646
647
  async def save(self, output_dir: Path) -> None:
647
648
  """Save evaluation results to directory."""
@@ -675,6 +676,8 @@ class EvalReport:
675
676
  "config_path": self.config_path,
676
677
  "sample_ids": [s.id for s in self.sample_results],
677
678
  }
679
+ if self.metadata:
680
+ summary["metadata"] = self.metadata
678
681
  # Sanitize API keys in the summary before saving
679
682
  summary = sanitize_api_keys(summary)
680
683
  report_file = output_dir / "report.json"
@@ -761,6 +764,9 @@ def _write_partial_report(
761
764
  "config_path": config.config_path,
762
765
  }
763
766
 
767
+ if config.metadata:
768
+ report["metadata"] = config.metadata
769
+
764
770
  if resume_from:
765
771
  report["resume_from"] = resume_from
766
772
 
@@ -1279,6 +1285,7 @@ async def evaluate(
1279
1285
  summary_metrics={},
1280
1286
  sample_results=[],
1281
1287
  config={"resumed_from": str(config.resume_dir)},
1288
+ metadata=config.metadata,
1282
1289
  )
1283
1290
 
1284
1291
  if config.verbose:
@@ -1489,6 +1496,7 @@ async def evaluate(
1489
1496
  "evaluation_timestamp": datetime.now().isoformat(),
1490
1497
  },
1491
1498
  config_path=config.config_path,
1499
+ metadata=config.metadata,
1492
1500
  )
1493
1501
 
1494
1502
  # Save if output directory specified
@@ -1546,7 +1554,7 @@ def compute_summary_metrics(results: list[Sample]) -> dict[str, float]:
1546
1554
  for m in r.score.metrics:
1547
1555
  all_metric_names.add(m.name)
1548
1556
 
1549
- # Compute mean, min, max, std for each metric
1557
+ # Compute mean, median, min, max, std for each metric
1550
1558
  for metric_name in all_metric_names:
1551
1559
  values = []
1552
1560
  for r in results:
@@ -1557,7 +1565,15 @@ def compute_summary_metrics(results: list[Sample]) -> dict[str, float]:
1557
1565
  break
1558
1566
  if values:
1559
1567
  mean_val = sum(values) / len(values)
1568
+ sorted_values = sorted(values)
1569
+ n = len(sorted_values)
1570
+ if n % 2 == 0:
1571
+ median_val = (sorted_values[n // 2 - 1] + sorted_values[n // 2]) / 2
1572
+ else:
1573
+ median_val = sorted_values[n // 2]
1574
+
1560
1575
  summary[f"mean_{metric_name}"] = mean_val
1576
+ summary[f"median_{metric_name}"] = median_val
1561
1577
  summary[f"min_{metric_name}"] = min(values)
1562
1578
  summary[f"max_{metric_name}"] = max(values)
1563
1579
  summary[f"std_{metric_name}"] = (
@@ -232,7 +232,7 @@ Detailed docs: {docs}
232
232
  If asked about your capabilities, read these files."""
233
233
 
234
234
 
235
- def build_system_prompt(
235
+ def build_system_prompt( # noqa: PLR0913
236
236
  env_name: str,
237
237
  tools: list[Tool],
238
238
  cwd: Path | None = None,
@@ -240,6 +240,7 @@ def build_system_prompt(
240
240
  env_system_prompt: str | None = None,
241
241
  include_self_docs: bool = True,
242
242
  include_project_context: bool = True,
243
+ include_skills: bool = False,
243
244
  ) -> str:
244
245
  """Build complete system prompt with dynamic tool info.
245
246
 
@@ -251,6 +252,7 @@ def build_system_prompt(
251
252
  env_system_prompt: Environment-provided system prompt (from env.get_system_prompt())
252
253
  include_self_docs: Whether to include rollouts documentation paths
253
254
  include_project_context: Whether to load AGENTS.md/ROLLOUTS.md files
255
+ include_skills: Whether to discover and list available skills
254
256
  """
255
257
  # Assertions (Tiger Style: 2+ per function, split compound)
256
258
  assert env_name, "env_name required"
@@ -277,11 +279,19 @@ def build_system_prompt(
277
279
  if guidelines:
278
280
  sections.append("Guidelines:\n" + "\n".join(f"- {g}" for g in guidelines))
279
281
 
280
- # 5. Self-documentation
282
+ # 5. Available skills (metadata only - agent loads full content via skill tool)
283
+ if include_skills:
284
+ from .skills import discover_skills, format_skill_metadata_for_prompt
285
+
286
+ skill_metadata = discover_skills()
287
+ if skill_metadata:
288
+ sections.append(format_skill_metadata_for_prompt(skill_metadata))
289
+
290
+ # 6. Self-documentation
281
291
  if include_self_docs:
282
292
  sections.append(build_self_doc_section())
283
293
 
284
- # 6. Project context files (AGENTS.md, ROLLOUTS.md, etc.)
294
+ # 7. Project context files (AGENTS.md, ROLLOUTS.md, etc.)
285
295
  if include_project_context:
286
296
  context_files = load_project_context(working_dir)
287
297
  if context_files:
@@ -290,7 +300,7 @@ def build_system_prompt(
290
300
  ctx_section += f"\n## {path}\n\n{content}\n"
291
301
  sections.append(ctx_section)
292
302
 
293
- # 7. Runtime context
303
+ # 8. Runtime context
294
304
  now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
295
305
  sections.append(f"Current time: {now}\nWorking directory: {working_dir}")
296
306
 
@@ -0,0 +1,176 @@
1
+ """Skill discovery and loading.
2
+
3
+ Skills are documentation files that agents can load on demand.
4
+ Format follows agentskills.io spec: SKILL.md with YAML frontmatter.
5
+
6
+ Discovery order:
7
+ 1. ~/.wafer/skills/{name}/SKILL.md (user-installed)
8
+ 2. Bundled skills (wafer-cli package)
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from dataclasses import dataclass
14
+ from pathlib import Path
15
+
16
+ from .paths import get_config_dir
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class SkillMetadata:
21
+ """Lightweight skill metadata for system prompt injection."""
22
+
23
+ name: str
24
+ description: str
25
+ path: Path # Path to SKILL.md file
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class Skill:
30
+ """Full skill with content."""
31
+
32
+ name: str
33
+ description: str
34
+ content: str # Full markdown content (without frontmatter)
35
+ path: Path
36
+
37
+
38
+ def _parse_skill_file(path: Path) -> tuple[dict[str, str], str] | None:
39
+ """Parse SKILL.md file into (frontmatter, content).
40
+
41
+ Returns None if file doesn't exist or is malformed.
42
+ """
43
+ if not path.exists():
44
+ return None
45
+
46
+ try:
47
+ text = path.read_text()
48
+ except (OSError, PermissionError):
49
+ return None
50
+
51
+ # Parse YAML frontmatter (between --- markers)
52
+ if not text.startswith("---"):
53
+ return None
54
+
55
+ # Find closing ---
56
+ end_idx = text.find("---", 3)
57
+ if end_idx == -1:
58
+ return None
59
+
60
+ frontmatter_text = text[3:end_idx].strip()
61
+ content = text[end_idx + 3 :].strip()
62
+
63
+ # Parse YAML (simple key: value format, no dependencies)
64
+ frontmatter: dict[str, str] = {}
65
+ for raw_line in frontmatter_text.split("\n"):
66
+ stripped = raw_line.strip()
67
+ if not stripped or ":" not in stripped:
68
+ continue
69
+ key, _, value = stripped.partition(":")
70
+ frontmatter[key.strip()] = value.strip()
71
+
72
+ # Validate required fields
73
+ if "name" not in frontmatter or "description" not in frontmatter:
74
+ return None
75
+
76
+ return frontmatter, content
77
+
78
+
79
+ def _get_bundled_skills_dir() -> Path | None:
80
+ """Get path to bundled skills in wafer-cli package."""
81
+ # Try to find wafer-cli's skills directory
82
+ try:
83
+ import wafer
84
+
85
+ wafer_cli_path = Path(wafer.__file__).parent
86
+ skills_dir = wafer_cli_path / "skills"
87
+ if skills_dir.exists():
88
+ return skills_dir
89
+ except ImportError:
90
+ pass
91
+
92
+ return None
93
+
94
+
95
+ def discover_skills() -> list[SkillMetadata]:
96
+ """Discover all available skills.
97
+
98
+ Returns list of SkillMetadata (name + description only).
99
+ """
100
+ skills: dict[str, SkillMetadata] = {}
101
+
102
+ # 1. User-installed skills (~/.wafer/skills/)
103
+ user_skills_dir = get_config_dir() / "skills"
104
+ if user_skills_dir.exists():
105
+ for skill_dir in user_skills_dir.iterdir():
106
+ if not skill_dir.is_dir():
107
+ continue
108
+ skill_file = skill_dir / "SKILL.md"
109
+ parsed = _parse_skill_file(skill_file)
110
+ if parsed:
111
+ frontmatter, _ = parsed
112
+ skills[frontmatter["name"]] = SkillMetadata(
113
+ name=frontmatter["name"],
114
+ description=frontmatter["description"],
115
+ path=skill_file,
116
+ )
117
+
118
+ # 2. Bundled skills (wafer-cli package)
119
+ bundled_dir = _get_bundled_skills_dir()
120
+ if bundled_dir:
121
+ for skill_dir in bundled_dir.iterdir():
122
+ if not skill_dir.is_dir():
123
+ continue
124
+ skill_file = skill_dir / "SKILL.md"
125
+ parsed = _parse_skill_file(skill_file)
126
+ if parsed:
127
+ frontmatter, _ = parsed
128
+ # User skills take precedence
129
+ if frontmatter["name"] not in skills:
130
+ skills[frontmatter["name"]] = SkillMetadata(
131
+ name=frontmatter["name"],
132
+ description=frontmatter["description"],
133
+ path=skill_file,
134
+ )
135
+
136
+ return list(skills.values())
137
+
138
+
139
+ def load_skill(name: str) -> Skill | None:
140
+ """Load a skill by name.
141
+
142
+ Returns full Skill with content, or None if not found.
143
+ """
144
+ # Find the skill
145
+ for metadata in discover_skills():
146
+ if metadata.name == name:
147
+ parsed = _parse_skill_file(metadata.path)
148
+ if parsed:
149
+ frontmatter, content = parsed
150
+ return Skill(
151
+ name=frontmatter["name"],
152
+ description=frontmatter["description"],
153
+ content=content,
154
+ path=metadata.path,
155
+ )
156
+ return None
157
+
158
+
159
+ def format_skill_metadata_for_prompt(skills: list[SkillMetadata]) -> str:
160
+ """Format skill metadata for system prompt injection.
161
+
162
+ Returns a compact section listing available skills.
163
+ """
164
+ if not skills:
165
+ return ""
166
+
167
+ lines = ["## Available Skills", ""]
168
+ lines.append(
169
+ "You have access to the following skills. Use the `skill` tool to load full instructions when needed."
170
+ )
171
+ lines.append("")
172
+
173
+ for skill in skills:
174
+ lines.append(f"- **{skill.name}**: {skill.description}")
175
+
176
+ return "\n".join(lines)
@@ -222,6 +222,9 @@ class TemplateConfig:
222
222
  # Example: {"corpus": "./docs/", "format": "markdown"}
223
223
  defaults: dict[str, str] = field(default_factory=dict)
224
224
 
225
+ # Skill discovery - if True, discovers skills and adds skill tool
226
+ include_skills: bool = False
227
+
225
228
  def interpolate_prompt(self, args: dict[str, str] | None = None) -> str:
226
229
  """Interpolate template variables into the system prompt.
227
230