thordata-mcp-server 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata_mcp/__init__.py +1 -1
- thordata_mcp/browser_session.py +157 -12
- thordata_mcp/config.py +14 -3
- thordata_mcp/context.py +1 -1
- thordata_mcp/tools/data/browser.py +124 -18
- thordata_mcp/tools/debug.py +125 -0
- thordata_mcp/tools/params_utils.py +107 -0
- thordata_mcp/tools/product.py +83 -5
- thordata_mcp/tools/product_compact.py +2108 -962
- thordata_mcp/tools/utils.py +2 -0
- thordata_mcp/utils.py +393 -322
- {thordata_mcp_server-0.4.4.dist-info → thordata_mcp_server-0.5.0.dist-info}/METADATA +29 -54
- thordata_mcp_server-0.5.0.dist-info/RECORD +26 -0
- thordata_mcp_server-0.4.4.dist-info/RECORD +0 -24
- {thordata_mcp_server-0.4.4.dist-info → thordata_mcp_server-0.5.0.dist-info}/WHEEL +0 -0
- {thordata_mcp_server-0.4.4.dist-info → thordata_mcp_server-0.5.0.dist-info}/entry_points.txt +0 -0
- {thordata_mcp_server-0.4.4.dist-info → thordata_mcp_server-0.5.0.dist-info}/top_level.txt +0 -0
thordata_mcp/tools/product.py
CHANGED
|
@@ -382,18 +382,96 @@ async def _fetch_json_preview(download_url: str, *, max_chars: int = 20_000) ->
|
|
|
382
382
|
"""Fetch a small JSON preview from a download URL (best-effort, token-safe)."""
|
|
383
383
|
if not download_url:
|
|
384
384
|
return {"ok": False, "error": "missing_download_url"}
|
|
385
|
+
def _first_object_from_array_prefix(s: str) -> dict[str, Any] | None:
|
|
386
|
+
"""Best-effort parse of the first JSON object in a JSON array prefix.
|
|
387
|
+
|
|
388
|
+
Works even if the overall array is truncated, as long as the first object is complete.
|
|
389
|
+
This avoids json.JSONDecoder.raw_decode failing when the prefix is cut mid-string.
|
|
390
|
+
"""
|
|
391
|
+
s = s.lstrip()
|
|
392
|
+
if not s.startswith("["):
|
|
393
|
+
return None
|
|
394
|
+
start = s.find("{")
|
|
395
|
+
if start == -1:
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
in_string = False
|
|
399
|
+
escape = False
|
|
400
|
+
depth = 0
|
|
401
|
+
begun = False
|
|
402
|
+
for i in range(start, len(s)):
|
|
403
|
+
ch = s[i]
|
|
404
|
+
if in_string:
|
|
405
|
+
if escape:
|
|
406
|
+
escape = False
|
|
407
|
+
elif ch == "\\":
|
|
408
|
+
escape = True
|
|
409
|
+
elif ch == "\"":
|
|
410
|
+
in_string = False
|
|
411
|
+
continue
|
|
412
|
+
else:
|
|
413
|
+
if ch == "\"":
|
|
414
|
+
in_string = True
|
|
415
|
+
continue
|
|
416
|
+
if ch == "{":
|
|
417
|
+
depth += 1
|
|
418
|
+
begun = True
|
|
419
|
+
elif ch == "}":
|
|
420
|
+
depth -= 1
|
|
421
|
+
if begun and depth == 0:
|
|
422
|
+
snippet = s[start : i + 1]
|
|
423
|
+
try:
|
|
424
|
+
obj = json.loads(snippet)
|
|
425
|
+
return obj if isinstance(obj, dict) else None
|
|
426
|
+
except Exception:
|
|
427
|
+
return None
|
|
428
|
+
return None
|
|
429
|
+
|
|
385
430
|
try:
|
|
386
431
|
timeout = aiohttp.ClientTimeout(total=30)
|
|
387
432
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
388
433
|
async with session.get(download_url) as resp:
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
434
|
+
# Stream small preview to avoid truncating mid-string (which breaks JSON parsing).
|
|
435
|
+
# We'll try to extract the first object from an array response, reading up to a hard cap.
|
|
436
|
+
hard_cap = max(max_chars, 200_000)
|
|
437
|
+
buf_parts: list[str] = []
|
|
438
|
+
total = 0
|
|
439
|
+
first_obj: dict[str, Any] | None = None
|
|
440
|
+
|
|
441
|
+
async for chunk in resp.content.iter_chunked(16_384):
|
|
442
|
+
try:
|
|
443
|
+
part = chunk.decode("utf-8", errors="ignore")
|
|
444
|
+
except Exception:
|
|
445
|
+
part = str(chunk)
|
|
446
|
+
buf_parts.append(part)
|
|
447
|
+
total += len(part)
|
|
448
|
+
if total >= max_chars:
|
|
449
|
+
# As soon as we reach the soft cap, try to parse first object.
|
|
450
|
+
joined = "".join(buf_parts)
|
|
451
|
+
first_obj = _first_object_from_array_prefix(joined)
|
|
452
|
+
if first_obj is not None:
|
|
453
|
+
break
|
|
454
|
+
if total >= hard_cap:
|
|
455
|
+
break
|
|
456
|
+
|
|
457
|
+
txt = "".join(buf_parts)
|
|
458
|
+
truncated = total >= hard_cap or len(txt) > max_chars
|
|
392
459
|
try:
|
|
393
460
|
data = json.loads(txt)
|
|
394
461
|
except Exception:
|
|
395
|
-
|
|
396
|
-
|
|
462
|
+
if first_obj is None:
|
|
463
|
+
first_obj = _first_object_from_array_prefix(txt)
|
|
464
|
+
if first_obj is not None:
|
|
465
|
+
return {
|
|
466
|
+
"ok": True,
|
|
467
|
+
"status": resp.status,
|
|
468
|
+
"data": [first_obj],
|
|
469
|
+
"partial": True,
|
|
470
|
+
"truncated": truncated,
|
|
471
|
+
"note": "Decoded first array element from streamed prefix (best-effort preview).",
|
|
472
|
+
}
|
|
473
|
+
return {"ok": False, "status": resp.status, "raw": txt, "truncated": truncated}
|
|
474
|
+
return {"ok": True, "status": resp.status, "data": data, "truncated": truncated}
|
|
397
475
|
except Exception as e:
|
|
398
476
|
return {"ok": False, "error": str(e)}
|
|
399
477
|
|