spice-mcp 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,4 +9,11 @@ def resolve_raw_sql_template_id() -> int:
9
9
  Tests stub HTTP boundaries and only require a consistent integer. This
10
10
  placeholder can be adjusted if upstream semantics change.
11
11
  """
12
- return int(os.getenv("SPICE_RAW_SQL_QUERY_ID", "4060379"))
12
+ env_value = os.getenv("SPICE_RAW_SQL_QUERY_ID")
13
+ if env_value:
14
+ try:
15
+ return int(env_value.strip())
16
+ except (ValueError, AttributeError):
17
+ # Invalid environment variable, fallback to default
18
+ pass
19
+ return 4060379
@@ -0,0 +1,6 @@
1
+ """Spellbook adapter for discovering dbt models from GitHub repository."""
2
+
3
+ from .explorer import SpellbookExplorer
4
+
5
+ __all__ = ["SpellbookExplorer"]
6
+
@@ -0,0 +1,313 @@
1
+ """
2
+ Spellbook Explorer - Parses dbt models from Spellbook GitHub repository.
3
+
4
+ This adapter clones or accesses the Spellbook GitHub repo and parses dbt models
5
+ to discover available tables, schemas, and column information.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import re
11
+ import subprocess
12
+ import tempfile
13
+ from collections.abc import Sequence
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ from ...core.models import SchemaMatch, TableColumn, TableDescription, TableSummary
18
+ from ...core.ports import CatalogExplorer
19
+
20
+
21
+ class SpellbookExplorer(CatalogExplorer):
22
+ """
23
+ Explorer that parses Spellbook dbt models from GitHub repository.
24
+
25
+ Spellbook repo: https://github.com/duneanalytics/spellbook
26
+ """
27
+
28
+ SPELLBOOK_REPO_URL = "https://github.com/duneanalytics/spellbook.git"
29
+ DEFAULT_BRANCH = "main"
30
+
31
+ def __init__(
32
+ self,
33
+ repo_path: Path | str | None = None,
34
+ repo_url: str | None = None,
35
+ branch: str | None = None,
36
+ ):
37
+ """
38
+ Initialize Spellbook explorer.
39
+
40
+ Args:
41
+ repo_path: Local path to spellbook repo (if None, will clone to temp dir)
42
+ repo_url: GitHub repo URL (defaults to official spellbook repo)
43
+ branch: Git branch to use (defaults to 'main')
44
+ """
45
+ self.repo_url = repo_url or self.SPELLBOOK_REPO_URL
46
+ self.branch = branch or self.DEFAULT_BRANCH
47
+ self._repo_path: Path | None = None
48
+ self._models_cache: dict[str, list[dict[str, Any]]] | None = None
49
+
50
+ if repo_path:
51
+ self._repo_path = Path(repo_path)
52
+ else:
53
+ # Use cache directory if available, otherwise temp
54
+ cache_base = os.getenv("SPICE_SPELLBOOK_CACHE", tempfile.gettempdir())
55
+ self._repo_path = Path(cache_base) / "spellbook_repo"
56
+
57
+ def _ensure_repo(self) -> Path:
58
+ """Ensure spellbook repo is cloned locally."""
59
+ if self._repo_path is None:
60
+ raise RuntimeError("Repository path not set")
61
+
62
+ repo_path = self._repo_path
63
+
64
+ # Clone if doesn't exist
65
+ if not repo_path.exists() or not (repo_path / ".git").exists():
66
+ repo_path.parent.mkdir(parents=True, exist_ok=True)
67
+ subprocess.run(
68
+ ["git", "clone", "--depth", "1", "--branch", self.branch, self.repo_url, str(repo_path)],
69
+ check=True,
70
+ capture_output=True,
71
+ )
72
+ else:
73
+ # Update if exists
74
+ try:
75
+ subprocess.run(
76
+ ["git", "-C", str(repo_path), "pull", "origin", self.branch],
77
+ check=False,
78
+ capture_output=True,
79
+ timeout=30,
80
+ )
81
+ except Exception:
82
+ pass # Ignore update failures
83
+
84
+ return repo_path
85
+
86
+ def _load_models(self) -> dict[str, list[dict[str, Any]]]:
87
+ """Load all dbt models from spellbook repo, organized by schema/subproject."""
88
+ if self._models_cache is not None:
89
+ return self._models_cache
90
+
91
+ repo_path = self._ensure_repo()
92
+ models: dict[str, list[dict[str, Any]]] = {}
93
+
94
+ # Spellbook uses subprojects - look in dbt_subprojects/ and models/ directories
95
+ subproject_dirs = [
96
+ repo_path / "dbt_subprojects",
97
+ repo_path / "models",
98
+ ]
99
+
100
+ for base_dir in subproject_dirs:
101
+ if not base_dir.exists():
102
+ continue
103
+
104
+ # Walk through subproject directories
105
+ for subproject_dir in base_dir.iterdir():
106
+ if not subproject_dir.is_dir():
107
+ continue
108
+
109
+ # Skip hidden directories
110
+ if subproject_dir.name.startswith("."):
111
+ continue
112
+
113
+ schema_name = subproject_dir.name
114
+ if schema_name not in models:
115
+ models[schema_name] = []
116
+
117
+ # Find SQL model files
118
+ models_dir = subproject_dir / "models"
119
+ if not models_dir.exists():
120
+ models_dir = subproject_dir
121
+
122
+ for sql_file in models_dir.rglob("*.sql"):
123
+ # Skip files in target/ or node_modules/
124
+ if "target" in sql_file.parts or "node_modules" in sql_file.parts:
125
+ continue
126
+
127
+ # Extract model name from file path
128
+ # models/schema/table.sql -> table
129
+ model_name = sql_file.stem
130
+
131
+ # Try to find schema.yml for metadata
132
+ schema_yml = sql_file.parent / "schema.yml"
133
+ if not schema_yml.exists():
134
+ schema_yml = sql_file.parent.parent / "schema.yml"
135
+
136
+ models[schema_name].append({
137
+ "name": model_name,
138
+ "file": sql_file,
139
+ "schema_yml": schema_yml if schema_yml.exists() else None,
140
+ "schema": schema_name,
141
+ })
142
+
143
+ self._models_cache = models
144
+ return models
145
+
146
+ def find_schemas(self, keyword: str) -> Sequence[SchemaMatch]:
147
+ """
148
+ Find schemas (subprojects) matching keyword in Spellbook repo.
149
+
150
+ This searches through dbt subproject names and model descriptions.
151
+ """
152
+ models = self._load_models()
153
+ matches: list[SchemaMatch] = []
154
+ keyword_lower = keyword.lower()
155
+
156
+ for schema_name, model_list in models.items():
157
+ # Match schema name
158
+ if keyword_lower in schema_name.lower():
159
+ matches.append(SchemaMatch(schema=schema_name))
160
+ continue
161
+
162
+ # Match model names/descriptions
163
+ for model in model_list:
164
+ if keyword_lower in model["name"].lower():
165
+ if not any(m.schema == schema_name for m in matches):
166
+ matches.append(SchemaMatch(schema=schema_name))
167
+ break
168
+
169
+ return matches
170
+
171
+ def list_tables(self, schema: str, limit: int | None = None) -> Sequence[TableSummary]:
172
+ """
173
+ List tables (dbt models) in a given schema/subproject.
174
+
175
+ Returns model names from the spellbook repository.
176
+ """
177
+ models = self._load_models()
178
+ schema_models = models.get(schema, [])
179
+
180
+ summaries = [
181
+ TableSummary(schema=schema, table=model["name"])
182
+ for model in schema_models
183
+ ]
184
+
185
+ if limit is not None:
186
+ summaries = summaries[:limit]
187
+
188
+ return summaries
189
+
190
+ def describe_table(self, schema: str, table: str) -> TableDescription:
191
+ """
192
+ Describe table columns by parsing dbt model SQL and schema.yml.
193
+
194
+ Attempts to extract column information from:
195
+ 1. schema.yml file (if exists)
196
+ 2. SQL SELECT statement columns
197
+ 3. Fallback to basic inference
198
+ """
199
+ models = self._load_models()
200
+ schema_models = models.get(schema, [])
201
+
202
+ # Find matching model
203
+ model_info = None
204
+ for model in schema_models:
205
+ if model["name"] == table:
206
+ model_info = model
207
+ break
208
+
209
+ if model_info is None:
210
+ raise ValueError(f"Table {schema}.{table} not found in Spellbook")
211
+
212
+ columns: list[TableColumn] = []
213
+
214
+ # Try to parse schema.yml first
215
+ if model_info["schema_yml"]:
216
+ columns = self._parse_schema_yml(model_info["schema_yml"], table)
217
+
218
+ # Fallback: parse SQL file for column hints
219
+ if not columns:
220
+ columns = self._parse_sql_columns(model_info["file"])
221
+
222
+ # If still no columns, create a basic placeholder
223
+ if not columns:
224
+ columns = [
225
+ TableColumn(name="column_1", dune_type="VARCHAR", polars_dtype="Utf8")
226
+ ]
227
+
228
+ return TableDescription(
229
+ fully_qualified_name=f"{schema}.{table}",
230
+ columns=columns,
231
+ )
232
+
233
+ def _parse_schema_yml(self, schema_yml_path: Path, table_name: str) -> list[TableColumn]:
234
+ """Parse dbt schema.yml to extract column definitions."""
235
+ try:
236
+ try:
237
+ import yaml
238
+ except ImportError:
239
+ # PyYAML not available, skip schema.yml parsing
240
+ return []
241
+
242
+ with open(schema_yml_path, encoding="utf-8") as f:
243
+ content = yaml.safe_load(f)
244
+
245
+ if not isinstance(content, dict):
246
+ return []
247
+
248
+ # Find model in schema.yml
249
+ models = content.get("models", [])
250
+ for model in models:
251
+ if model.get("name") == table_name:
252
+ cols = model.get("columns", [])
253
+ return [
254
+ TableColumn(
255
+ name=col.get("name", ""),
256
+ dune_type=col.get("data_type", "VARCHAR"),
257
+ polars_dtype=col.get("data_type"),
258
+ comment=col.get("description"),
259
+ )
260
+ for col in cols
261
+ ]
262
+ except Exception:
263
+ pass
264
+
265
+ return []
266
+
267
+ def _parse_sql_columns(self, sql_file: Path) -> list[TableColumn]:
268
+ """Parse SQL file to extract column names from SELECT statements."""
269
+ try:
270
+ with open(sql_file, encoding="utf-8") as f:
271
+ sql = f.read()
272
+
273
+ # Look for SELECT ... FROM patterns
274
+ # Match: SELECT col1, col2, col3 FROM ...
275
+ select_match = re.search(
276
+ r"SELECT\s+(.+?)\s+FROM",
277
+ sql,
278
+ re.IGNORECASE | re.DOTALL,
279
+ )
280
+
281
+ if select_match:
282
+ cols_str = select_match.group(1)
283
+ # Split by comma, but handle function calls and aliases
284
+ cols = []
285
+ for col in cols_str.split(","):
286
+ col = col.strip()
287
+ # Extract column name (handle aliases: col AS alias -> col)
288
+ if " AS " in col.upper():
289
+ col = col.split(" AS ", 1)[0].strip()
290
+ elif " " in col and not col.startswith("("):
291
+ # Might be alias without AS
292
+ parts = col.split()
293
+ col = parts[0].strip()
294
+
295
+ # Clean up function calls: function(col) -> col
296
+ col = re.sub(r"^\w+\((.+)\)", r"\1", col)
297
+ col = col.strip().strip('"').strip("'")
298
+
299
+ if col and col not in ["*", "DISTINCT"]:
300
+ cols.append(
301
+ TableColumn(
302
+ name=col,
303
+ dune_type="VARCHAR", # Default, can't infer from SQL
304
+ polars_dtype="Utf8",
305
+ )
306
+ )
307
+
308
+ return cols[:20] # Limit to reasonable number
309
+ except Exception:
310
+ pass
311
+
312
+ return []
313
+
spice_mcp/config.py CHANGED
@@ -41,7 +41,7 @@ class Config:
41
41
  cache: CacheConfig = field(default_factory=CacheConfig)
42
42
  logging: LoggingConfig = field(default_factory=LoggingConfig)
43
43
  http: HttpClientConfig = field(default_factory=HttpClientConfig)
44
- max_concurrent_queries: int = 5
44
+ max_concurrent_queries: int = 5 # Note: Not currently enforced (kept for future use)
45
45
  default_timeout_seconds: int = 30
46
46
 
47
47
  @classmethod
spice_mcp/core/models.py CHANGED
@@ -78,11 +78,3 @@ class TableDescription:
78
78
  columns: list[TableColumn] = field(default_factory=list)
79
79
 
80
80
 
81
- @dataclass(slots=True)
82
- class SuiPackageOverview:
83
- package_ids: list[str]
84
- window_hours: int
85
- events_preview: list[dict[str, Any]] | None = None
86
- transactions_preview: list[dict[str, Any]] | None = None
87
- objects_preview: list[dict[str, Any]] | None = None
88
- stats: dict[str, Any] = field(default_factory=dict)
spice_mcp/core/ports.py CHANGED
@@ -8,7 +8,6 @@ from .models import (
8
8
  QueryResult,
9
9
  ResultMetadata,
10
10
  SchemaMatch,
11
- SuiPackageOverview,
12
11
  TableDescription,
13
12
  TableSummary,
14
13
  )
@@ -39,15 +38,6 @@ class CatalogExplorer(Protocol):
39
38
  ...
40
39
 
41
40
 
42
- class SuiInspector(Protocol):
43
- """Port for Sui package exploration helpers."""
44
-
45
- def package_overview(
46
- self, packages: Sequence[str], *, hours: int, timeout_seconds: float | None = None
47
- ) -> SuiPackageOverview:
48
- ...
49
-
50
-
51
41
  class QueryAdmin(Protocol):
52
42
  """Port for managing Dune saved queries."""
53
43
 
@@ -62,8 +52,3 @@ class QueryAdmin(Protocol):
62
52
 
63
53
  def fork(self, source_query_id: int, *, name: str | None = None) -> Mapping[str, Any]:
64
54
  ...
65
-
66
- def events_preview(
67
- self, packages: Sequence[str], *, hours: int, limit: int
68
- ) -> Sequence[Mapping[str, object]]:
69
- ...