thordata-mcp-server 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata_mcp/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  """
2
2
  Thordata MCP Server package.
3
3
  """
4
- __version__ = "0.4.4"
4
+ __version__ = "0.5.0"
@@ -1,8 +1,7 @@
1
1
  """Browser session management for Thordata Scraping Browser.
2
2
 
3
3
  This module provides a high-level wrapper around Playwright connected to
4
- Thordata's Scraping Browser (via `AsyncThordataClient.get_browser_connection_url`),
5
- inspired by Bright Data's browser session design but implemented in Python.
4
+ Thordata's Scraping Browser (via `AsyncThordataClient.get_browser_connection_url`).
6
5
 
7
6
  Design goals:
8
7
  - Domain-scoped browser sessions (one browser/page per domain).
@@ -18,6 +17,8 @@ from urllib.parse import urlparse
18
17
 
19
18
  from playwright.async_api import Browser, Page, Playwright, async_playwright
20
19
 
20
+ import time
21
+
21
22
  from thordata.async_client import AsyncThordataClient
22
23
 
23
24
  from .aria_snapshot import AriaSnapshotFilter
@@ -37,6 +38,11 @@ class BrowserSession:
37
38
  self._requests: Dict[str, Dict[Any, Any]] = {}
38
39
  self._dom_refs: Set[str] = set()
39
40
  self._current_domain: str = "default"
41
+ # Console and network diagnostics cache
42
+ self._console_messages: Dict[str, List[Dict[str, Any]]] = {}
43
+ self._network_requests: Dict[str, List[Dict[str, Any]]] = {}
44
+ self._max_console_messages = 10
45
+ self._max_network_requests = 20
40
46
 
41
47
  @staticmethod
42
48
  def _get_domain(url: str) -> str:
@@ -139,11 +145,26 @@ class BrowserSession:
139
145
 
140
146
  # Reset network tracking for this domain
141
147
  self._requests[domain] = {}
148
+ self._console_messages[domain] = []
149
+ self._network_requests[domain] = []
142
150
 
143
151
  async def on_request(request: Any) -> None:
144
152
  if domain in self._requests:
145
153
  self._requests[domain][request] = None
146
-
154
+ try:
155
+ self._network_requests.setdefault(domain, [])
156
+ self._network_requests[domain].append(
157
+ {
158
+ "url": request.url,
159
+ "method": request.method,
160
+ "resourceType": getattr(request, "resource_type", None),
161
+ "timestamp": int(time.time() * 1000),
162
+ }
163
+ )
164
+ self._network_requests[domain] = self._network_requests[domain][-self._max_network_requests :]
165
+ except Exception:
166
+ pass
167
+
147
168
  async def on_response(response: Any) -> None:
148
169
  if domain in self._requests:
149
170
  try:
@@ -151,15 +172,78 @@ class BrowserSession:
151
172
  except Exception:
152
173
  # Best-effort, non-fatal
153
174
  pass
175
+ try:
176
+ # Update last matching request with status
177
+ req = response.request
178
+ url = getattr(req, "url", None)
179
+ if url and domain in self._network_requests:
180
+ for item in reversed(self._network_requests[domain]):
181
+ if item.get("url") == url and item.get("statusCode") is None:
182
+ item["statusCode"] = response.status
183
+ break
184
+ except Exception:
185
+ pass
154
186
 
155
187
  page.on("request", on_request)
156
188
  page.on("response", on_response)
157
-
189
+
190
+ # Console message tracking
191
+ async def on_console(msg: Any) -> None:
192
+ try:
193
+ self._console_messages.setdefault(domain, [])
194
+ self._console_messages[domain].append(
195
+ {
196
+ "type": msg.type,
197
+ "message": msg.text,
198
+ "timestamp": int(time.time() * 1000),
199
+ }
200
+ )
201
+ self._console_messages[domain] = self._console_messages[domain][-self._max_console_messages :]
202
+ except Exception:
203
+ pass
204
+
205
+ page.on("console", on_console)
206
+
158
207
  self._pages[domain] = page
159
208
  return page
160
209
 
161
- async def capture_snapshot(self, filtered: bool = True) -> Dict[str, Any]:
162
- """Capture an ARIA-like snapshot and optional DOM snapshot."""
210
+ def get_console_tail(self, n: int = 10, domain: Optional[str] = None) -> List[Dict[str, Any]]:
211
+ """Return recent console messages for the given domain."""
212
+ d = domain or self._current_domain
213
+ items = self._console_messages.get(d, [])
214
+ return items[-max(0, int(n)) :]
215
+
216
+ def get_network_tail(self, n: int = 20, domain: Optional[str] = None) -> List[Dict[str, Any]]:
217
+ """Return recent network request summaries for the given domain."""
218
+ d = domain or self._current_domain
219
+ items = self._network_requests.get(d, [])
220
+ return items[-max(0, int(n)) :]
221
+
222
+ def reset_page(self, domain: Optional[str] = None) -> None:
223
+ """Drop cached page for a domain so the next call recreates it."""
224
+ d = domain or self._current_domain
225
+ self._pages.pop(d, None)
226
+ self._requests.pop(d, None)
227
+ self._console_messages.pop(d, None)
228
+ self._network_requests.pop(d, None)
229
+
230
+
231
+ async def capture_snapshot(
232
+ self,
233
+ *,
234
+ filtered: bool = True,
235
+ mode: str = "compact",
236
+ max_items: int = 80,
237
+ include_dom: bool = False,
238
+ ) -> Dict[str, Any]:
239
+ """Capture an ARIA-like snapshot and optional DOM snapshot.
240
+
241
+ Args:
242
+ filtered: Whether to apply AriaSnapshotFilter (legacy, kept for compatibility).
243
+ mode: "compact" | "full". Compact returns minimal interactive elements.
244
+ max_items: Maximum number of interactive elements to include (compact mode only).
245
+ include_dom: Whether to include dom_snapshot (compact mode defaults to False).
246
+ """
163
247
  page = await self.get_page()
164
248
 
165
249
  try:
@@ -175,16 +259,64 @@ class BrowserSession:
175
259
  "aria_snapshot": full_snapshot,
176
260
  }
177
261
 
262
+ if mode == "compact":
263
+ # Compact: return only filtered interactive elements, optionally without dom_snapshot
264
+ filtered_snapshot = AriaSnapshotFilter.filter_snapshot(full_snapshot)
265
+ filtered_snapshot = self._limit_aria_snapshot_items(filtered_snapshot, max_items=max_items)
266
+ dom_snapshot = None
267
+ if include_dom:
268
+ dom_snapshot_raw = await self._capture_dom_snapshot(page)
269
+ self._dom_refs = {el["ref"] for el in dom_snapshot_raw}
270
+ dom_snapshot = AriaSnapshotFilter.format_dom_elements(dom_snapshot_raw)
271
+ return {
272
+ "url": page.url,
273
+ "title": await page.title(),
274
+ "aria_snapshot": filtered_snapshot,
275
+ "dom_snapshot": dom_snapshot,
276
+ "_meta": {"mode": mode, "max_items": max_items, "include_dom": include_dom},
277
+ }
278
+
279
+ # Full mode: include both filtered aria and dom_snapshot (legacy behavior)
178
280
  filtered_snapshot = AriaSnapshotFilter.filter_snapshot(full_snapshot)
179
- dom_snapshot = await self._capture_dom_snapshot(page)
180
- self._dom_refs = {el["ref"] for el in dom_snapshot}
181
-
281
+ dom_snapshot_raw = await self._capture_dom_snapshot(page)
282
+ self._dom_refs = {el["ref"] for el in dom_snapshot_raw}
182
283
  return {
183
284
  "url": page.url,
184
285
  "title": await page.title(),
185
286
  "aria_snapshot": filtered_snapshot,
186
- "dom_snapshot": AriaSnapshotFilter.format_dom_elements(dom_snapshot),
287
+ "dom_snapshot": AriaSnapshotFilter.format_dom_elements(dom_snapshot_raw),
288
+ "_meta": {"mode": mode},
187
289
  }
290
+
291
+ @staticmethod
292
+ def _limit_aria_snapshot_items(text: str, *, max_items: int) -> str:
293
+ """Limit snapshot to the first N interactive element blocks.
294
+
295
+ The snapshot format is a list where each element starts with a line beginning
296
+ with '- ' (Playwright raw) or '[' (AriaSnapshotFilter compact), and may include
297
+ one or more indented continuation lines.
298
+ """
299
+ try:
300
+ n = int(max_items)
301
+ except Exception:
302
+ n = 80
303
+ if n <= 0:
304
+ return ""
305
+ if not text:
306
+ return text
307
+
308
+ lines = text.splitlines()
309
+ out: list[str] = []
310
+ items = 0
311
+ for line in lines:
312
+ if line.startswith("- ") or line.startswith("["):
313
+ if items >= n:
314
+ break
315
+ items += 1
316
+ # Include continuation lines only if we've started collecting items.
317
+ if items > 0:
318
+ out.append(line)
319
+ return "\n".join(out).strip()
188
320
 
189
321
  async def _get_interactive_snapshot(self, page: Page) -> str:
190
322
  """Generate a text snapshot of interactive elements with refs."""
@@ -194,12 +326,25 @@ class BrowserSession:
194
326
  const lines = [];
195
327
  let refCounter = 0;
196
328
 
329
+ function normalizeRole(tag, explicitRole) {
330
+ const role = (explicitRole || '').toLowerCase();
331
+ const t = (tag || '').toLowerCase();
332
+ if (role) return role;
333
+ // Map common interactive tags to standard ARIA roles
334
+ if (t === 'a') return 'link';
335
+ if (t === 'button') return 'button';
336
+ if (t === 'input') return 'textbox';
337
+ if (t === 'select') return 'combobox';
338
+ if (t === 'textarea') return 'textbox';
339
+ return t;
340
+ }
341
+
197
342
  function traverse(node) {
198
343
  if (node.nodeType === Node.ELEMENT_NODE) {
199
- const role = node.getAttribute('role') || node.tagName.toLowerCase();
200
344
  const tag = node.tagName.toLowerCase();
201
345
  const interactiveTag = ['a', 'button', 'input', 'select', 'textarea'].includes(tag);
202
- const interactiveRole = ['button', 'link', 'textbox', 'checkbox'].includes(role);
346
+ const role = normalizeRole(tag, node.getAttribute('role'));
347
+ const interactiveRole = ['button', 'link', 'textbox', 'searchbox', 'combobox', 'checkbox', 'radio', 'switch', 'tab', 'menuitem', 'option'].includes(role);
203
348
 
204
349
  if (interactiveTag || interactiveRole) {
205
350
  if (!node.dataset.fastmcpRef) {
thordata_mcp/config.py CHANGED
@@ -6,6 +6,14 @@ from pydantic_settings import BaseSettings
6
6
  class Settings(BaseSettings):
7
7
  """Environment-driven configuration for the MCP server."""
8
8
 
9
+ # MCP tool exposure mode (BrightData-like)
10
+ # - rapid: minimal core tools
11
+ # - pro: all tools
12
+ # - custom: enable by THORDATA_GROUPS and THORDATA_TOOLS
13
+ THORDATA_MODE: str = "rapid"
14
+ THORDATA_GROUPS: str | None = None
15
+ THORDATA_TOOLS: str | None = None
16
+
9
17
  # Thordata credentials
10
18
  THORDATA_SCRAPER_TOKEN: str | None = None
11
19
  THORDATA_PUBLIC_TOKEN: str | None = None
@@ -20,9 +28,9 @@ class Settings(BaseSettings):
20
28
  # Tasks discovery UX (to avoid dumping hundreds of tools to the client by default)
21
29
  # - mode=curated: only return tools from THORDATA_TASKS_GROUPS, with pagination
22
30
  # - mode=all: return all discovered tools
23
- # Default to listing ALL Web Scraper tasks, but paginated (no env changes required for “100+ tools” use-case).
24
- THORDATA_TASKS_LIST_MODE: str = "all"
25
- THORDATA_TASKS_LIST_DEFAULT_LIMIT: int = 100
31
+ # Default to curated mode to reduce tool selection noise for LLMs.
32
+ THORDATA_TASKS_LIST_MODE: str = "curated"
33
+ THORDATA_TASKS_LIST_DEFAULT_LIMIT: int = 60
26
34
  THORDATA_TASKS_GROUPS: str = "ecommerce,social,video,search,travel,code,professional"
27
35
 
28
36
  # Optional: restrict which SDK tool_keys are allowed to execute (safety/UX)
@@ -49,6 +57,9 @@ class Settings(BaseSettings):
49
57
  # Logging
50
58
  LOG_LEVEL: str = "INFO"
51
59
 
60
+ # Debug tools exposure
61
+ THORDATA_DEBUG_TOOLS: bool = False
62
+
52
63
  class Config:
53
64
  env_file = ".env"
54
65
  extra = "ignore"
thordata_mcp/context.py CHANGED
@@ -35,4 +35,4 @@ class ServerContext:
35
35
 
36
36
  if cls._client:
37
37
  await cls._client.close()
38
- cls._client = None
38
+ cls._client = None
@@ -298,16 +298,96 @@ def register(mcp: FastMCP) -> None:
298
298
 
299
299
  @mcp.tool(name="browser.click_ref", description="Click an element by its ref ID")
300
300
  @handle_mcp_errors
301
- async def browser_click_ref(ref: str, element: str = "element") -> dict[str, Any]:
302
- """Click an element using the [ref=X] ID from the snapshot."""
301
+ async def browser_click_ref(
302
+ ref: str,
303
+ element: str = "element",
304
+ wait_for_navigation_ms: Optional[int] = None,
305
+ ) -> dict[str, Any]:
306
+ """Click an element using the [ref=X] ID from the snapshot.
307
+
308
+ Args:
309
+ ref: The ref ID from snapshot (e.g., ref-w545663wqs)
310
+ element: Description of the element for error messages
311
+ wait_for_navigation_ms: Optional wait time in ms to detect navigation after click
312
+ """
303
313
  session = await ServerContext.get_browser_session()
304
- locator = await session.ref_locator(ref, element)
305
- await locator.click(timeout=5_000)
306
- return ok_response(
307
- tool="browser.click_ref",
308
- input={"ref": ref, "element": element},
309
- output={"message": f"Successfully clicked {element}", "ref": ref},
310
- )
314
+ page = await session.get_page()
315
+
316
+ url_before = page.url
317
+ try:
318
+ locator = await session.ref_locator(ref, element)
319
+ await locator.click(timeout=5_000)
320
+
321
+ # Check for navigation if requested
322
+ did_navigate = False
323
+ url_after = url_before
324
+ if wait_for_navigation_ms and wait_for_navigation_ms > 0:
325
+ import asyncio
326
+ await asyncio.sleep(wait_for_navigation_ms / 1000)
327
+ url_after = page.url
328
+ did_navigate = url_after != url_before
329
+
330
+ return ok_response(
331
+ tool="browser.click_ref",
332
+ input={"ref": ref, "element": element, "wait_for_navigation_ms": wait_for_navigation_ms},
333
+ output={
334
+ "message": f"Successfully clicked {element}",
335
+ "ref": ref,
336
+ "url_before": url_before,
337
+ "url_after": url_after,
338
+ "did_navigate": did_navigate,
339
+ },
340
+ )
341
+ except Exception as e:
342
+ # Enhanced error with diagnostics + self-heal for common browser lifecycle issues
343
+ from ...utils import error_response
344
+
345
+ err_s = str(e).lower()
346
+ did_reset = False
347
+ if any(k in err_s for k in [
348
+ "target closed",
349
+ "page closed",
350
+ "browser has been closed",
351
+ "execution context was destroyed",
352
+ "has been disposed",
353
+ ]):
354
+ try:
355
+ session.reset_page()
356
+ did_reset = True
357
+ except Exception:
358
+ did_reset = False
359
+
360
+ # Try to get console and network diagnostics from session cache
361
+ try:
362
+ console_tail = session.get_console_tail(n=10)
363
+ except Exception:
364
+ console_tail = []
365
+ try:
366
+ network_tail = session.get_network_tail(n=20)
367
+ except Exception:
368
+ network_tail = []
369
+
370
+ hint = "Try taking a new snapshot to get fresh refs, or check if the element is still visible"
371
+ if did_reset:
372
+ hint = "Browser page was closed/reset. Take a new snapshot to get fresh refs, then retry the click."
373
+
374
+ return error_response(
375
+ tool="browser.click_ref",
376
+ input={"ref": ref, "element": element, "wait_for_navigation_ms": wait_for_navigation_ms},
377
+ error_type="browser_interaction_error",
378
+ code="E5001",
379
+ message=f"Failed to click element: {str(e)}",
380
+ details={
381
+ "ref": ref,
382
+ "element": element,
383
+ "url_before": url_before,
384
+ "url_after": page.url,
385
+ "did_reset": did_reset,
386
+ "hint": hint,
387
+ "console_tail": console_tail,
388
+ "network_tail": network_tail,
389
+ },
390
+ )
311
391
 
312
392
  @mcp.tool(
313
393
  name="browser.type_ref",
@@ -322,15 +402,41 @@ def register(mcp: FastMCP) -> None:
322
402
  ) -> dict[str, Any]:
323
403
  """Type text into an element using the [ref=X] ID."""
324
404
  session = await ServerContext.get_browser_session()
325
- locator = await session.ref_locator(ref, element)
326
- await locator.fill(text)
327
- if submit:
328
- await locator.press("Enter")
329
- return ok_response(
330
- tool="browser.type_ref",
331
- input={"ref": ref, "text": text, "submit": submit, "element": element},
332
- output={"message": "Typed into element", "ref": ref},
333
- )
405
+ page = await session.get_page()
406
+ url_before = page.url
407
+
408
+ try:
409
+ locator = await session.ref_locator(ref, element)
410
+ await locator.fill(text)
411
+ if submit:
412
+ await locator.press("Enter")
413
+
414
+ return ok_response(
415
+ tool="browser.type_ref",
416
+ input={"ref": ref, "text": text, "submit": submit, "element": element},
417
+ output={
418
+ "message": "Typed into element" + (" and submitted" if submit else ""),
419
+ "ref": ref,
420
+ "url_before": url_before,
421
+ "url_after": page.url,
422
+ },
423
+ )
424
+ except Exception as e:
425
+ from ...utils import error_response
426
+ return error_response(
427
+ tool="browser.type_ref",
428
+ input={"ref": ref, "text": text, "submit": submit, "element": element},
429
+ error_type="browser_interaction_error",
430
+ code="E5002",
431
+ message=f"Failed to type into element: {str(e)}",
432
+ details={
433
+ "ref": ref,
434
+ "element": element,
435
+ "url_before": url_before,
436
+ "url_after": page.url,
437
+ "hint": "Try taking a new snapshot to get fresh refs, or check if the element is still visible and editable",
438
+ },
439
+ )
334
440
 
335
441
  @mcp.tool(name="browser.screenshot_page", description="Take a screenshot of the current browser page")
336
442
  @handle_mcp_errors
@@ -0,0 +1,125 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from typing import Any, Optional
5
+
6
+ from mcp.server.fastmcp import FastMCP
7
+
8
+ from thordata_mcp.config import settings
9
+ from thordata_mcp.context import ServerContext
10
+ from thordata_mcp.utils import ok_response
11
+ from thordata_mcp.tools.params_utils import normalize_params
12
+
13
+
14
+ def register(mcp: FastMCP) -> None:
15
+ @mcp.tool(name="debug.status", description="Return server status and effective configuration (no secrets).")
16
+ async def debug_status() -> dict[str, Any]:
17
+ def _mask(v: str | None) -> dict[str, Any]:
18
+ if not v:
19
+ return {"set": False}
20
+ return {
21
+ "set": True,
22
+ "length": len(v),
23
+ "tail4": v[-4:] if len(v) >= 4 else v,
24
+ }
25
+
26
+ return ok_response(
27
+ tool="debug.status",
28
+ input={},
29
+ output={
30
+ "python": __import__("sys").version,
31
+ "settings": {
32
+ "THORDATA_SCRAPER_TOKEN": _mask(settings.THORDATA_SCRAPER_TOKEN),
33
+ "THORDATA_PUBLIC_TOKEN": _mask(settings.THORDATA_PUBLIC_TOKEN),
34
+ "THORDATA_PUBLIC_KEY": _mask(settings.THORDATA_PUBLIC_KEY),
35
+ "THORDATA_BROWSER_USERNAME": _mask(settings.THORDATA_BROWSER_USERNAME),
36
+ "THORDATA_BROWSER_PASSWORD": _mask(settings.THORDATA_BROWSER_PASSWORD),
37
+ "THORDATA_TASKS_LIST_MODE": settings.THORDATA_TASKS_LIST_MODE,
38
+ "THORDATA_TASKS_LIST_DEFAULT_LIMIT": settings.THORDATA_TASKS_LIST_DEFAULT_LIMIT,
39
+ },
40
+ },
41
+ )
42
+
43
+ @mcp.tool(name="browser.diagnostics", description="Return recent browser console/network diagnostics for the current session.")
44
+ async def browser_diagnostics(
45
+ console_limit: int = 10,
46
+ network_limit: int = 20,
47
+ ) -> dict[str, Any]:
48
+ session = await ServerContext.get_browser_session()
49
+ page = await session.get_page()
50
+
51
+ return ok_response(
52
+ tool="browser.diagnostics",
53
+ input={"console_limit": console_limit, "network_limit": network_limit},
54
+ output={
55
+ "url": page.url,
56
+ "title": await page.title(),
57
+ "console_tail": session.get_console_tail(n=console_limit),
58
+ "network_tail": session.get_network_tail(n=network_limit),
59
+ },
60
+ )
61
+
62
+ @mcp.tool(
63
+ name="debug.self_test",
64
+ description=(
65
+ "Run a small, non-destructive smoke test suite for core scraping capabilities and return a compact report. "
66
+ "Useful after restarting the MCP server. Params: {\"timeout_s\": 30}."
67
+ ),
68
+ )
69
+ async def debug_self_test(*, params: Any = None) -> dict[str, Any]:
70
+ try:
71
+ p = normalize_params(params, "debug.self_test", "run")
72
+ except Exception:
73
+ p = {}
74
+
75
+ timeout_s = int(p.get("timeout_s", 30))
76
+ timeout_s = max(5, min(timeout_s, 120))
77
+
78
+ async def _run(name: str, fn) -> dict[str, Any]:
79
+ try:
80
+ out = await asyncio.wait_for(fn(), timeout=timeout_s)
81
+ return {"check": name, "ok": True, "detail": out}
82
+ except Exception as e:
83
+ return {"check": name, "ok": False, "error": str(e)}
84
+
85
+ client = await ServerContext.get_client()
86
+
87
+ async def _check_serp() -> dict[str, Any]:
88
+ from thordata.types import SerpRequest
89
+
90
+ req = SerpRequest(query="thordata", engine="google", num=3, output_format="light_json")
91
+ data = await client.serp_search_advanced(req)
92
+ organic = data.get("organic") if isinstance(data, dict) else None
93
+ return {"has_organic": isinstance(organic, list) and len(organic) > 0, "organic_count": len(organic) if isinstance(organic, list) else None}
94
+
95
+ async def _check_unlocker() -> dict[str, Any]:
96
+ html = await client.universal_scrape(url="https://example.com", js_render=True, output_format="html")
97
+ s = html if isinstance(html, str) else str(html)
98
+ return {"html_len": len(s), "contains_example_domain": "Example Domain" in s}
99
+
100
+ async def _check_browser_snapshot() -> dict[str, Any]:
101
+ session = await ServerContext.get_browser_session()
102
+ snap = await session.capture_snapshot(url="https://example.com", filtered=True, max_items=20)
103
+ aria = snap.get("aria_snapshot") if isinstance(snap, dict) else None
104
+ return {"aria_non_empty": bool(aria), "aria_len": len(aria) if isinstance(aria, str) else None, "url": snap.get("url") if isinstance(snap, dict) else None}
105
+
106
+ results = await asyncio.gather(
107
+ _run("serp.search", _check_serp),
108
+ _run("unlocker.fetch(html,js_render=true)", _check_unlocker),
109
+ _run("browser.snapshot(filtered,max_items=20)", _check_browser_snapshot),
110
+ )
111
+
112
+ summary = []
113
+ ok_all = True
114
+ for r in results:
115
+ if r.get("ok"):
116
+ summary.append({"check": r.get("check"), "ok": True})
117
+ else:
118
+ ok_all = False
119
+ summary.append({"check": r.get("check"), "ok": False, "error": r.get("error")})
120
+
121
+ return ok_response(
122
+ tool="debug.self_test",
123
+ input={"params": {"timeout_s": timeout_s}},
124
+ output={"ok_all": ok_all, "summary": summary, "_meta": {"timeout_s": timeout_s}},
125
+ )
@@ -0,0 +1,107 @@
1
+ """Common parameter normalization utilities for thordata MCP tools."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Any, Dict, Optional
7
+
8
+ from thordata_mcp.utils import error_response
9
+
10
+
11
+ def normalize_params(params: Any, tool_name: str, action: Optional[str] = None) -> Dict[str, Any]:
12
+ """
13
+ Normalize params to dictionary with clear error messages.
14
+
15
+ This function handles the common case where Cursor might pass params as a string
16
+ instead of a dictionary object, and provides helpful error messages.
17
+
18
+ Args:
19
+ params: The params value passed to the tool
20
+ tool_name: Name of the tool for error reporting
21
+ action: Optional action name for error reporting
22
+
23
+ Returns:
24
+ Normalized params dictionary
25
+
26
+ Raises:
27
+ ValueError: If params cannot be normalized to a dictionary
28
+ """
29
+ if params is None:
30
+ return {}
31
+
32
+ if isinstance(params, dict):
33
+ return params
34
+
35
+ if isinstance(params, str):
36
+ try:
37
+ parsed = json.loads(params)
38
+ if not isinstance(parsed, dict):
39
+ raise ValueError("Parsed JSON is not a dictionary")
40
+ return parsed
41
+ except json.JSONDecodeError as e:
42
+ error_msg = (
43
+ f"Invalid JSON in params: {e}. "
44
+ f"Params should be a dictionary object, not a string. "
45
+ f"Example: params={{'url': 'https://example.com'}}. "
46
+ f"Received: {params[:100]}{'...' if len(params) > 100 else ''}"
47
+ )
48
+ raise ValueError(error_msg)
49
+
50
+ # Handle other types (list, number, etc.)
51
+ error_msg = (
52
+ f"params must be a dictionary object, not {type(params).__name__}. "
53
+ f"Example: params={{'url': 'https://example.com'}}. "
54
+ f"Received: {str(params)[:100]}{'...' if len(str(params)) > 100 else ''}"
55
+ )
56
+ raise ValueError(error_msg)
57
+
58
+
59
+ def create_params_error(tool_name: str, action: str, params: Any, error_message: str) -> Dict[str, Any]:
60
+ """
61
+ Create a standardized error response for parameter validation errors.
62
+
63
+ Args:
64
+ tool_name: Name of the tool
65
+ action: Action being performed
66
+ params: The invalid params value
67
+ error_message: Detailed error message
68
+
69
+ Returns:
70
+ Error response dictionary
71
+ """
72
+ return error_response(
73
+ tool=tool_name,
74
+ input={"action": action, "params": params},
75
+ error_type="validation_error",
76
+ code="E4001",
77
+ message=error_message,
78
+ )
79
+
80
+
81
+ def create_json_error(tool_name: str, action: str, params: str, error_detail: str) -> Dict[str, Any]:
82
+ """
83
+ Create a standardized error response for JSON parsing errors.
84
+
85
+ Args:
86
+ tool_name: Name of the tool
87
+ action: Action being performed
88
+ params: The invalid JSON string
89
+ error_detail: JSON parsing error detail
90
+
91
+ Returns:
92
+ Error response dictionary
93
+ """
94
+ error_message = (
95
+ f"Invalid JSON in params: {error_detail}. "
96
+ f"Use dictionary format: params={{'url': 'https://example.com'}} "
97
+ f"or valid JSON string: params='{{\"url\":\"https://example.com\"}}'. "
98
+ f"Received: {params[:100]}{'...' if len(params) > 100 else ''}"
99
+ )
100
+
101
+ return error_response(
102
+ tool=tool_name,
103
+ input={"action": action, "params": params},
104
+ error_type="json_error",
105
+ code="E4002",
106
+ message=error_message,
107
+ )