spice-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spice-mcp might be problematic. Click here for more details.
- spice_mcp/__init__.py +2 -0
- spice_mcp/adapters/__init__.py +0 -0
- spice_mcp/adapters/dune/__init__.py +10 -0
- spice_mcp/adapters/dune/admin.py +94 -0
- spice_mcp/adapters/dune/cache.py +185 -0
- spice_mcp/adapters/dune/client.py +255 -0
- spice_mcp/adapters/dune/extract.py +1461 -0
- spice_mcp/adapters/dune/helpers.py +11 -0
- spice_mcp/adapters/dune/transport.py +70 -0
- spice_mcp/adapters/dune/types.py +52 -0
- spice_mcp/adapters/dune/typing_utils.py +10 -0
- spice_mcp/adapters/dune/urls.py +126 -0
- spice_mcp/adapters/http_client.py +156 -0
- spice_mcp/config.py +81 -0
- spice_mcp/core/__init__.py +0 -0
- spice_mcp/core/errors.py +101 -0
- spice_mcp/core/models.py +88 -0
- spice_mcp/core/ports.py +69 -0
- spice_mcp/logging/query_history.py +131 -0
- spice_mcp/mcp/__init__.py +1 -0
- spice_mcp/mcp/server.py +546 -0
- spice_mcp/mcp/tools/__init__.py +1 -0
- spice_mcp/mcp/tools/base.py +41 -0
- spice_mcp/mcp/tools/execute_query.py +425 -0
- spice_mcp/mcp/tools/sui_package_overview.py +56 -0
- spice_mcp/observability/__init__.py +0 -0
- spice_mcp/observability/logging.py +18 -0
- spice_mcp/polars_utils.py +15 -0
- spice_mcp/py.typed +1 -0
- spice_mcp/service_layer/__init__.py +0 -0
- spice_mcp/service_layer/discovery_service.py +20 -0
- spice_mcp/service_layer/query_admin_service.py +26 -0
- spice_mcp/service_layer/query_service.py +118 -0
- spice_mcp/service_layer/sui_service.py +131 -0
- spice_mcp-0.1.0.dist-info/METADATA +133 -0
- spice_mcp-0.1.0.dist-info/RECORD +39 -0
- spice_mcp-0.1.0.dist-info/WHEEL +4 -0
- spice_mcp-0.1.0.dist-info/entry_points.txt +2 -0
- spice_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0
spice_mcp/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Dune adapter built from the vendored spice client.
|
|
2
|
+
|
|
3
|
+
This module provides a thin façade used by the new service layer while
|
|
4
|
+
keeping the battle-tested logic that the original spice client offered.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from . import urls # re-export for callers needing low-level helpers
|
|
8
|
+
from .extract import async_query, query # noqa: F401
|
|
9
|
+
|
|
10
|
+
__all__ = ["query", "async_query", "urls"]
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from ..http_client import HttpClient, HttpClientConfig
|
|
7
|
+
from . import urls
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DuneAdminAdapter:
|
|
11
|
+
"""Lightweight client for Dune saved query management."""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
api_key: str,
|
|
16
|
+
*,
|
|
17
|
+
http_client: HttpClient | None = None,
|
|
18
|
+
http_config: HttpClientConfig | None = None,
|
|
19
|
+
):
|
|
20
|
+
self.api_key = api_key
|
|
21
|
+
config = http_config or HttpClientConfig()
|
|
22
|
+
self._http = http_client or HttpClient(config)
|
|
23
|
+
|
|
24
|
+
def _headers(self) -> Mapping[str, str]:
|
|
25
|
+
return {
|
|
26
|
+
"X-Dune-API-Key": self.api_key,
|
|
27
|
+
"User-Agent": "spice-mcp-admin/1",
|
|
28
|
+
"Content-Type": "application/json",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
def get(self, query_id: int) -> dict[str, Any]:
|
|
32
|
+
url = urls.url_templates["query"].format(query_id=query_id)
|
|
33
|
+
resp = self._http.request("GET", url, headers=self._headers())
|
|
34
|
+
return resp.json()
|
|
35
|
+
|
|
36
|
+
def create(
|
|
37
|
+
self,
|
|
38
|
+
*,
|
|
39
|
+
name: str,
|
|
40
|
+
query_sql: str,
|
|
41
|
+
description: str | None = None,
|
|
42
|
+
tags: list[str] | None = None,
|
|
43
|
+
parameters: list[dict] | None = None,
|
|
44
|
+
is_private: bool = False,
|
|
45
|
+
) -> dict[str, Any]:
|
|
46
|
+
body: dict[str, Any] = {"name": name, "query_sql": query_sql, "is_private": is_private}
|
|
47
|
+
if description:
|
|
48
|
+
body["description"] = description
|
|
49
|
+
if tags is not None:
|
|
50
|
+
body["tags"] = tags
|
|
51
|
+
if parameters is not None:
|
|
52
|
+
body["parameters"] = parameters
|
|
53
|
+
resp = self._http.request(
|
|
54
|
+
"POST",
|
|
55
|
+
urls.url_templates["query_create"],
|
|
56
|
+
headers=self._headers(),
|
|
57
|
+
json=body,
|
|
58
|
+
timeout=20,
|
|
59
|
+
)
|
|
60
|
+
return resp.json()
|
|
61
|
+
|
|
62
|
+
def update(self, query_id: int, *, name: str | None = None, query_sql: str | None = None, description: str | None = None, tags: list[str] | None = None, parameters: list[dict] | None = None) -> dict[str, Any]:
|
|
63
|
+
body: dict[str, Any] = {}
|
|
64
|
+
if name is not None:
|
|
65
|
+
body["name"] = name
|
|
66
|
+
if query_sql is not None:
|
|
67
|
+
body["query_sql"] = query_sql
|
|
68
|
+
if description is not None:
|
|
69
|
+
body["description"] = description
|
|
70
|
+
if tags is not None:
|
|
71
|
+
body["tags"] = tags
|
|
72
|
+
if parameters is not None:
|
|
73
|
+
body["parameters"] = parameters
|
|
74
|
+
url = urls.url_templates["query"].format(query_id=query_id)
|
|
75
|
+
resp = self._http.request(
|
|
76
|
+
"PATCH",
|
|
77
|
+
url,
|
|
78
|
+
headers=self._headers(),
|
|
79
|
+
json=body,
|
|
80
|
+
timeout=20,
|
|
81
|
+
)
|
|
82
|
+
return resp.json()
|
|
83
|
+
|
|
84
|
+
def fork(self, source_query_id: int, *, name: str | None = None) -> dict[str, Any]:
|
|
85
|
+
url = urls.url_templates.get("query_fork", f"https://api.dune.com/api/v1/query/{source_query_id}/fork").format(query_id=source_query_id)
|
|
86
|
+
body: dict[str, Any] = {"name": name} if name else {}
|
|
87
|
+
resp = self._http.request(
|
|
88
|
+
"POST",
|
|
89
|
+
url,
|
|
90
|
+
headers=self._headers(),
|
|
91
|
+
json=body,
|
|
92
|
+
timeout=20,
|
|
93
|
+
)
|
|
94
|
+
return resp.json()
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from . import extract as _extract
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
import polars as pl
|
|
10
|
+
|
|
11
|
+
from .types import Execution
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
cache_template = (
|
|
15
|
+
'{query_id}__{execution_id}__{parameter_hash}__{timestamp}.parquet'
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def load_from_cache(
|
|
20
|
+
execute_kwargs: _extract.ExecuteKwargs,
|
|
21
|
+
result_kwargs: _extract.RetrievalKwargs,
|
|
22
|
+
output_kwargs: _extract.OutputKwargs,
|
|
23
|
+
) -> tuple[
|
|
24
|
+
pl.DataFrame | tuple[pl.DataFrame, Execution] | None, Execution | None
|
|
25
|
+
]:
|
|
26
|
+
# get latest execution id
|
|
27
|
+
execution = _extract.get_latest_execution(execute_kwargs)
|
|
28
|
+
if execution is None:
|
|
29
|
+
return None, None
|
|
30
|
+
|
|
31
|
+
# build cache path
|
|
32
|
+
cache_path = _build_cache_path(
|
|
33
|
+
execution=execution,
|
|
34
|
+
execute_kwargs=execute_kwargs,
|
|
35
|
+
result_kwargs=result_kwargs,
|
|
36
|
+
cache_dir=output_kwargs['cache_dir'],
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# load cache result
|
|
40
|
+
if os.path.exists(cache_path):
|
|
41
|
+
import polars as pl
|
|
42
|
+
|
|
43
|
+
if result_kwargs['verbose']:
|
|
44
|
+
print('loading dune query result from cache')
|
|
45
|
+
df = pl.read_parquet(cache_path)
|
|
46
|
+
if output_kwargs['include_execution']:
|
|
47
|
+
return (df, execution), execution
|
|
48
|
+
else:
|
|
49
|
+
return df, execution
|
|
50
|
+
else:
|
|
51
|
+
return None, execution
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
async def async_load_from_cache(
|
|
55
|
+
execute_kwargs: _extract.ExecuteKwargs,
|
|
56
|
+
result_kwargs: _extract.RetrievalKwargs,
|
|
57
|
+
output_kwargs: _extract.OutputKwargs,
|
|
58
|
+
) -> tuple[
|
|
59
|
+
pl.DataFrame | tuple[pl.DataFrame, Execution] | None, Execution | None
|
|
60
|
+
]:
|
|
61
|
+
# get latest execution
|
|
62
|
+
execution = await _extract.async_get_latest_execution(execute_kwargs)
|
|
63
|
+
if execution is None:
|
|
64
|
+
return None, None
|
|
65
|
+
|
|
66
|
+
# build cache path
|
|
67
|
+
cache_path = _build_cache_path(
|
|
68
|
+
execution=execution,
|
|
69
|
+
execute_kwargs=execute_kwargs,
|
|
70
|
+
result_kwargs=result_kwargs,
|
|
71
|
+
cache_dir=output_kwargs['cache_dir'],
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# load cache result
|
|
75
|
+
if os.path.exists(cache_path):
|
|
76
|
+
import polars as pl
|
|
77
|
+
|
|
78
|
+
if result_kwargs['verbose']:
|
|
79
|
+
print('loading dune query result from cache')
|
|
80
|
+
df = await pl.scan_parquet(cache_path).collect_async()
|
|
81
|
+
if output_kwargs['include_execution']:
|
|
82
|
+
return (df, execution), execution
|
|
83
|
+
else:
|
|
84
|
+
return df, execution
|
|
85
|
+
else:
|
|
86
|
+
return None, execution
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def save_to_cache(
|
|
90
|
+
df: pl.DataFrame,
|
|
91
|
+
execution: Execution,
|
|
92
|
+
execute_kwargs: _extract.ExecuteKwargs,
|
|
93
|
+
result_kwargs: _extract.RetrievalKwargs,
|
|
94
|
+
cache_dir: str | None,
|
|
95
|
+
) -> None:
|
|
96
|
+
import secrets
|
|
97
|
+
import shutil
|
|
98
|
+
|
|
99
|
+
if result_kwargs['verbose']:
|
|
100
|
+
print('saving result to cache')
|
|
101
|
+
|
|
102
|
+
# build cache path
|
|
103
|
+
cache_path = _build_cache_path(
|
|
104
|
+
execution=execution,
|
|
105
|
+
execute_kwargs=execute_kwargs,
|
|
106
|
+
result_kwargs=result_kwargs,
|
|
107
|
+
cache_dir=cache_dir,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# create dir
|
|
111
|
+
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
|
|
112
|
+
|
|
113
|
+
# save to cache
|
|
114
|
+
tmp_path = (
|
|
115
|
+
cache_path + '_tmp_' + secrets.token_hex(8)
|
|
116
|
+
) # add for if running in parallel
|
|
117
|
+
df.write_parquet(tmp_path)
|
|
118
|
+
shutil.move(tmp_path, cache_path)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _preserialize_types(
|
|
122
|
+
result_kwargs: _extract.RetrievalKwargs,
|
|
123
|
+
key: str,
|
|
124
|
+
) -> list[str | list[str]] | None:
|
|
125
|
+
raw = result_kwargs[key] # type: ignore
|
|
126
|
+
if raw is None:
|
|
127
|
+
types: list[str | list[str]] | None = None
|
|
128
|
+
else:
|
|
129
|
+
types = []
|
|
130
|
+
if isinstance(raw, list):
|
|
131
|
+
for type in raw:
|
|
132
|
+
types.append(str(type))
|
|
133
|
+
elif isinstance(raw, dict):
|
|
134
|
+
for name, type in raw.items():
|
|
135
|
+
types.append([name, str(type)])
|
|
136
|
+
else:
|
|
137
|
+
raise Exception('invalid format for ' + key)
|
|
138
|
+
return types
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _build_cache_path(
|
|
142
|
+
execution: Execution,
|
|
143
|
+
execute_kwargs: _extract.ExecuteKwargs,
|
|
144
|
+
result_kwargs: _extract.RetrievalKwargs,
|
|
145
|
+
cache_dir: str | None,
|
|
146
|
+
) -> str:
|
|
147
|
+
import hashlib
|
|
148
|
+
import json
|
|
149
|
+
|
|
150
|
+
# get parameter hash
|
|
151
|
+
types = _preserialize_types(result_kwargs, 'types')
|
|
152
|
+
all_types = _preserialize_types(result_kwargs, 'all_types')
|
|
153
|
+
hash_params = {
|
|
154
|
+
'spice_version': 'spice-mcp',
|
|
155
|
+
'execution_id': execution['execution_id'],
|
|
156
|
+
'query_id': execute_kwargs['query_id'],
|
|
157
|
+
'parameters': execute_kwargs['parameters'],
|
|
158
|
+
'limit': result_kwargs['limit'],
|
|
159
|
+
'offset': result_kwargs['offset'],
|
|
160
|
+
'sample_count': result_kwargs['sample_count'],
|
|
161
|
+
'sort_by': result_kwargs['sort_by'],
|
|
162
|
+
'columns': result_kwargs['columns'],
|
|
163
|
+
'extras': result_kwargs['extras'],
|
|
164
|
+
'types': types,
|
|
165
|
+
'all_types': all_types,
|
|
166
|
+
}
|
|
167
|
+
md5_hash = hashlib.md5()
|
|
168
|
+
md5_hash.update(json.dumps(hash_params, sort_keys=True).encode('utf-8'))
|
|
169
|
+
hash_str = md5_hash.hexdigest()[:16]
|
|
170
|
+
|
|
171
|
+
# build from template
|
|
172
|
+
timestamp = execution['timestamp']
|
|
173
|
+
if timestamp is None:
|
|
174
|
+
raise Exception('need completion timestamp on execution')
|
|
175
|
+
cache_filename = cache_template.format(
|
|
176
|
+
query_id=execute_kwargs['query_id'],
|
|
177
|
+
execution_id=execution['execution_id'],
|
|
178
|
+
parameter_hash=hash_str,
|
|
179
|
+
timestamp=int(timestamp),
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if cache_dir is None:
|
|
183
|
+
cache_dir = '/tmp/dune_spice'
|
|
184
|
+
|
|
185
|
+
return os.path.join(cache_dir, cache_filename)
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
import time
|
|
6
|
+
from collections.abc import Mapping, Sequence
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import polars as pl
|
|
10
|
+
|
|
11
|
+
from ...config import Config
|
|
12
|
+
from ...core.models import (
|
|
13
|
+
QueryRequest,
|
|
14
|
+
QueryResult,
|
|
15
|
+
ResultMetadata,
|
|
16
|
+
ResultPreview,
|
|
17
|
+
SchemaMatch,
|
|
18
|
+
TableColumn,
|
|
19
|
+
TableDescription,
|
|
20
|
+
TableSummary,
|
|
21
|
+
)
|
|
22
|
+
from ...core.ports import CatalogExplorer, QueryExecutor
|
|
23
|
+
from ...polars_utils import collect_preview
|
|
24
|
+
from ..http_client import HttpClient, HttpClientConfig
|
|
25
|
+
from . import extract, urls
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DuneAdapter(QueryExecutor, CatalogExplorer):
|
|
29
|
+
"""Thin façade around the vendored extract module."""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
config: Config,
|
|
34
|
+
*,
|
|
35
|
+
http_client: HttpClient | None = None,
|
|
36
|
+
):
|
|
37
|
+
self.config = config
|
|
38
|
+
http_config: HttpClientConfig = config.http
|
|
39
|
+
self._http = http_client or HttpClient(http_config)
|
|
40
|
+
|
|
41
|
+
# QueryExecutor -----------------------------------------------------------------
|
|
42
|
+
def execute(self, request: QueryRequest) -> QueryResult:
|
|
43
|
+
self._ensure_api_key()
|
|
44
|
+
start = time.time()
|
|
45
|
+
q = request.query
|
|
46
|
+
if isinstance(q, str):
|
|
47
|
+
q_rewritten = _maybe_rewrite_show_sql(q)
|
|
48
|
+
if q_rewritten is not None:
|
|
49
|
+
q = q_rewritten
|
|
50
|
+
result = extract.query(
|
|
51
|
+
query_or_execution=q,
|
|
52
|
+
verbose=False,
|
|
53
|
+
refresh=request.refresh,
|
|
54
|
+
max_age=request.max_age,
|
|
55
|
+
parameters=request.parameters,
|
|
56
|
+
api_key=self._api_key(),
|
|
57
|
+
performance=request.performance or "medium",
|
|
58
|
+
poll=request.poll,
|
|
59
|
+
timeout_seconds=request.timeout_seconds,
|
|
60
|
+
limit=request.limit,
|
|
61
|
+
offset=request.offset,
|
|
62
|
+
sample_count=request.sample_count,
|
|
63
|
+
sort_by=request.sort_by,
|
|
64
|
+
columns=request.columns,
|
|
65
|
+
cache_dir=self.config.cache.cache_dir,
|
|
66
|
+
include_execution=request.include_execution,
|
|
67
|
+
http_client=self._http,
|
|
68
|
+
)
|
|
69
|
+
if request.include_execution:
|
|
70
|
+
df, execution = result
|
|
71
|
+
else:
|
|
72
|
+
df = result
|
|
73
|
+
execution = {}
|
|
74
|
+
duration_ms = int((time.time() - start) * 1000)
|
|
75
|
+
columns = list(df.columns)
|
|
76
|
+
rowcount = int(len(df))
|
|
77
|
+
lazyframe = df.lazy()
|
|
78
|
+
preview = _build_preview(lazyframe, columns, rowcount)
|
|
79
|
+
del df
|
|
80
|
+
# Use rewritten SQL for metadata determination too
|
|
81
|
+
req_for_meta = request
|
|
82
|
+
try:
|
|
83
|
+
from dataclasses import replace
|
|
84
|
+
|
|
85
|
+
if isinstance(q, str) and q != request.query:
|
|
86
|
+
req_for_meta = replace(request, query=q) # type: ignore[arg-type]
|
|
87
|
+
except Exception:
|
|
88
|
+
pass
|
|
89
|
+
meta = self.fetch_metadata(req_for_meta, execution=execution)
|
|
90
|
+
meta.duration_ms = duration_ms
|
|
91
|
+
return QueryResult(preview=preview, info=meta, lazyframe=lazyframe)
|
|
92
|
+
|
|
93
|
+
def fetch_metadata(
|
|
94
|
+
self, request: QueryRequest, *, execution: Mapping[str, Any] | None = None
|
|
95
|
+
) -> ResultMetadata:
|
|
96
|
+
self._ensure_api_key()
|
|
97
|
+
query_id, _, effective_params = extract._determine_input_type( # type: ignore[attr-defined]
|
|
98
|
+
request.query,
|
|
99
|
+
request.parameters,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
payload: dict[str, Any] = {}
|
|
103
|
+
next_uri: str | None = None
|
|
104
|
+
next_offset: int | None = None
|
|
105
|
+
|
|
106
|
+
if query_id is not None:
|
|
107
|
+
params: dict[str, Any] = {}
|
|
108
|
+
if effective_params is not None:
|
|
109
|
+
params["query_parameters"] = effective_params
|
|
110
|
+
params.update(
|
|
111
|
+
{
|
|
112
|
+
"limit": request.limit,
|
|
113
|
+
"offset": request.offset,
|
|
114
|
+
"sample_count": request.sample_count,
|
|
115
|
+
"sort_by": request.sort_by,
|
|
116
|
+
"columns": list(request.columns) if request.columns else None,
|
|
117
|
+
}
|
|
118
|
+
)
|
|
119
|
+
if request.extras:
|
|
120
|
+
try:
|
|
121
|
+
params.update(request.extras)
|
|
122
|
+
except Exception:
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
url = urls.get_query_results_url(query_id, parameters=params, csv=False)
|
|
126
|
+
headers = {
|
|
127
|
+
"X-Dune-API-Key": self._api_key(),
|
|
128
|
+
"User-Agent": extract.get_user_agent(),
|
|
129
|
+
}
|
|
130
|
+
try:
|
|
131
|
+
resp = self._http.request("GET", url, headers=headers)
|
|
132
|
+
data = resp.json()
|
|
133
|
+
if isinstance(data, dict):
|
|
134
|
+
payload = data.get("result", {}).get("metadata") or {}
|
|
135
|
+
next_uri = data.get("next_uri")
|
|
136
|
+
next_offset = data.get("next_offset")
|
|
137
|
+
if "error" in data:
|
|
138
|
+
payload = {**payload, "error": data["error"]}
|
|
139
|
+
if "state" in data:
|
|
140
|
+
payload = {**payload, "state": data["state"]}
|
|
141
|
+
except Exception:
|
|
142
|
+
payload = {}
|
|
143
|
+
|
|
144
|
+
execution_meta: dict[str, Any] = {}
|
|
145
|
+
if execution:
|
|
146
|
+
execution_meta = dict(execution)
|
|
147
|
+
|
|
148
|
+
return ResultMetadata(
|
|
149
|
+
execution=execution_meta,
|
|
150
|
+
duration_ms=0,
|
|
151
|
+
metadata=payload or None,
|
|
152
|
+
next_offset=next_offset,
|
|
153
|
+
next_uri=next_uri,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# CatalogExplorer ---------------------------------------------------------------
|
|
157
|
+
def find_schemas(self, keyword: str) -> Sequence[SchemaMatch]:
|
|
158
|
+
sql = f"SHOW SCHEMAS LIKE '%{keyword}%'"
|
|
159
|
+
df = self._run_sql(sql)
|
|
160
|
+
return [SchemaMatch(schema=row.get("Schema", "")) for row in df.to_dicts()]
|
|
161
|
+
|
|
162
|
+
def list_tables(self, schema: str, limit: int | None = None) -> Sequence[TableSummary]:
|
|
163
|
+
sql = f"SHOW TABLES FROM {schema}"
|
|
164
|
+
df = self._run_sql(sql, limit=limit)
|
|
165
|
+
return [
|
|
166
|
+
TableSummary(schema=schema, table=row.get("Table", row.get("name", "")))
|
|
167
|
+
for row in df.to_dicts()
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
def describe_table(self, schema: str, table: str) -> TableDescription:
|
|
171
|
+
fq = f"{schema}.{table}"
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
df = self._run_sql(f"SHOW COLUMNS FROM {fq}")
|
|
175
|
+
rows = df.to_dicts()
|
|
176
|
+
columns = [
|
|
177
|
+
TableColumn(
|
|
178
|
+
name=row.get("Column") or row.get("column_name") or "",
|
|
179
|
+
dune_type=row.get("Type") or row.get("data_type"),
|
|
180
|
+
polars_dtype=row.get("Type") or None,
|
|
181
|
+
comment=row.get("Comment"),
|
|
182
|
+
extra=row.get("Extra"),
|
|
183
|
+
)
|
|
184
|
+
for row in rows
|
|
185
|
+
]
|
|
186
|
+
return TableDescription(fully_qualified_name=fq, columns=columns)
|
|
187
|
+
except Exception:
|
|
188
|
+
df = self._run_sql(f"SELECT * FROM {fq} LIMIT 1")
|
|
189
|
+
columns = [
|
|
190
|
+
TableColumn(name=name, polars_dtype=str(dtype))
|
|
191
|
+
for name, dtype in zip(df.columns, df.dtypes)
|
|
192
|
+
]
|
|
193
|
+
return TableDescription(fully_qualified_name=fq, columns=columns)
|
|
194
|
+
|
|
195
|
+
# Internal helpers --------------------------------------------------------------
|
|
196
|
+
def _run_sql(self, sql: str, *, limit: int | None = None) -> pl.DataFrame:
|
|
197
|
+
self._ensure_api_key()
|
|
198
|
+
sql_eff = _maybe_rewrite_show_sql(sql) or sql
|
|
199
|
+
df = extract.query(
|
|
200
|
+
sql_eff,
|
|
201
|
+
verbose=False,
|
|
202
|
+
performance="medium",
|
|
203
|
+
timeout_seconds=self.config.default_timeout_seconds,
|
|
204
|
+
limit=limit,
|
|
205
|
+
cache_dir=self.config.cache.cache_dir,
|
|
206
|
+
http_client=self._http,
|
|
207
|
+
)
|
|
208
|
+
if limit is not None and len(df) > limit:
|
|
209
|
+
return df.head(limit)
|
|
210
|
+
return df
|
|
211
|
+
|
|
212
|
+
def _ensure_api_key(self) -> None:
|
|
213
|
+
if not os.getenv("DUNE_API_KEY"):
|
|
214
|
+
os.environ["DUNE_API_KEY"] = self._api_key()
|
|
215
|
+
|
|
216
|
+
def _api_key(self) -> str:
|
|
217
|
+
return self.config.dune.api_key
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _build_preview(lf: pl.LazyFrame, columns: list[str], rowcount: int) -> ResultPreview:
|
|
221
|
+
preview_rows = min(10, rowcount)
|
|
222
|
+
data_preview = collect_preview(lf, preview_rows)
|
|
223
|
+
return ResultPreview(
|
|
224
|
+
rowcount=rowcount,
|
|
225
|
+
columns=columns,
|
|
226
|
+
data_preview=data_preview,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _maybe_rewrite_show_sql(sql: str) -> str | None:
|
|
231
|
+
"""Rewrite certain SHOW statements to information_schema SELECTs for portability.
|
|
232
|
+
|
|
233
|
+
This allows running discovery-style commands through the parameterized raw SQL
|
|
234
|
+
template which expects SELECT statements.
|
|
235
|
+
"""
|
|
236
|
+
s = sql.strip()
|
|
237
|
+
m = re.match(r"^SHOW\s+SCHEMAS\s+LIKE\s+'([^']+)'\s*;?$", s, flags=re.IGNORECASE)
|
|
238
|
+
if m:
|
|
239
|
+
pat = m.group(1)
|
|
240
|
+
return (
|
|
241
|
+
"SELECT schema_name AS Schema FROM information_schema.schemata "
|
|
242
|
+
f"WHERE schema_name LIKE '{pat}'"
|
|
243
|
+
)
|
|
244
|
+
if re.match(r"^SHOW\s+SCHEMAS\s*;?$", s, flags=re.IGNORECASE):
|
|
245
|
+
return "SELECT schema_name AS Schema FROM information_schema.schemata"
|
|
246
|
+
|
|
247
|
+
m = re.match(r"^SHOW\s+TABLES\s+FROM\s+([A-Za-z0-9_\.]+)\s*;?$", s, flags=re.IGNORECASE)
|
|
248
|
+
if m:
|
|
249
|
+
schema = m.group(1)
|
|
250
|
+
return (
|
|
251
|
+
"SELECT table_name AS Table FROM information_schema.tables "
|
|
252
|
+
f"WHERE table_schema = '{schema}'"
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
return None
|