wrapper-mcp 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """Wrapper MCP — Czech NLP tools (NameTag, MasKIT, UDPipe, PONK, Korektor, Charles Translator)."""
2
+
3
+ __version__ = "0.8.0"
wrapper_mcp/http.py ADDED
@@ -0,0 +1,108 @@
1
+ """HTTP klient pro LINDAT REST API — s retry, logging, exponential backoff."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from typing import Any
8
+
9
+ import httpx
10
+
11
+ MASKIT_URL = "https://quest.ms.mff.cuni.cz/maskit/api/process"
12
+ NAMETAG_URL = "https://lindat.mff.cuni.cz/services/nametag/api/recognize"
13
+ PONK_URL = "https://quest.ms.mff.cuni.cz/ponk/api/process"
14
+ UDPIPE_URL = "https://lindat.mff.cuni.cz/services/udpipe/api/process"
15
+
16
+ HTTP_TIMEOUT = 120.0 # Zvýšeno z 60s — MasKIT API občas reaguje 60-90s na úřední SK texty
17
+ HTTP_TIMEOUT_LONG = 240.0 # Translator doc mode / large inputs
18
+
19
+ # Retry config — exponential backoff pro transient failures
20
+ MAX_RETRIES = 3
21
+ INITIAL_BACKOFF_S = 1.0
22
+ BACKOFF_MULTIPLIER = 2.0 # 1s → 2s → 4s
23
+
24
+ # Status codes worth retrying (transient): 429 Too Many Requests, 502/503/504 server errors
25
+ _RETRYABLE_STATUSES = frozenset({429, 502, 503, 504})
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ async def _post_with_retry(
31
+ url: str,
32
+ data: dict[str, str],
33
+ timeout: float,
34
+ ) -> httpx.Response:
35
+ """POST s exponential backoff retry pro transient errors.
36
+
37
+ Retry pravidla:
38
+ - httpx.TimeoutException, httpx.ConnectError, httpx.RemoteProtocolError → retry
39
+ - HTTP status 429/502/503/504 → retry
40
+ - Jiné HTTP errors (4xx) → fail immediately
41
+ - Po MAX_RETRIES pokusech → raise last exception
42
+ """
43
+ backoff = INITIAL_BACKOFF_S
44
+ last_exc: Exception | None = None
45
+
46
+ for attempt in range(MAX_RETRIES + 1):
47
+ try:
48
+ async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
49
+ response = await client.post(url, data=data)
50
+
51
+ if response.status_code in _RETRYABLE_STATUSES and attempt < MAX_RETRIES:
52
+ logger.warning(
53
+ "HTTP %s na %s (pokus %d/%d), retry za %.1fs",
54
+ response.status_code, url, attempt + 1, MAX_RETRIES + 1, backoff,
55
+ )
56
+ await asyncio.sleep(backoff)
57
+ backoff *= BACKOFF_MULTIPLIER
58
+ continue
59
+
60
+ response.raise_for_status()
61
+ if attempt > 0:
62
+ logger.info("Retry úspěšný na %s po %d pokusech", url, attempt + 1)
63
+ return response
64
+
65
+ except (httpx.TimeoutException, httpx.ConnectError, httpx.RemoteProtocolError) as e:
66
+ last_exc = e
67
+ if attempt < MAX_RETRIES:
68
+ logger.warning(
69
+ "%s na %s (pokus %d/%d), retry za %.1fs",
70
+ type(e).__name__, url, attempt + 1, MAX_RETRIES + 1, backoff,
71
+ )
72
+ await asyncio.sleep(backoff)
73
+ backoff *= BACKOFF_MULTIPLIER
74
+ continue
75
+ logger.error("Vše %d pokusů selhalo na %s: %s", MAX_RETRIES + 1, url, e)
76
+ # Vytvoř exception s explicitní message — httpx.ReadTimeout má prázdné
77
+ # str(), což znesnadňuje debug u uživatele.
78
+ err_type = type(e).__name__
79
+ msg = str(e) or f"{err_type} po {timeout}s na {url} (server pravděpodobně přetížený)"
80
+ raise type(e)(msg) from e
81
+ except httpx.HTTPStatusError as e:
82
+ # 4xx errors except 429 — fail immediately, no point retrying client errors
83
+ logger.error("HTTP %d na %s: %s", e.response.status_code, url, e)
84
+ raise
85
+
86
+ if last_exc:
87
+ raise last_exc
88
+ raise RuntimeError(f"Unexpected: vyčerpáno {MAX_RETRIES + 1} pokusů na {url} bez exception")
89
+
90
+
91
+ async def post_form(url: str, data: dict[str, str]) -> dict[str, Any]:
92
+ """POST x-www-form-urlencoded → JSON response (s retry + logging)."""
93
+ response = await _post_with_retry(url, data, HTTP_TIMEOUT)
94
+ return response.json()
95
+
96
+
97
+ async def post_form_text(
98
+ url: str,
99
+ data: dict[str, str],
100
+ timeout: float = HTTP_TIMEOUT_LONG,
101
+ ) -> str:
102
+ """POST x-www-form-urlencoded → plain text response (s retry + logging).
103
+
104
+ Použito pro Charles Translator, který vrací přeložený text přímo,
105
+ ne JSON. Vyšší default timeout (180s) protože doc mode + velký vstup.
106
+ """
107
+ response = await _post_with_retry(url, data, timeout)
108
+ return response.text
@@ -0,0 +1,65 @@
1
+ """Korektor — český spell checker + auto-doplnění diakritiky (LINDAT).
2
+
3
+ Wrapper kolem `https://lindat.mff.cuni.cz/services/korektor/api/correct`.
4
+ Dostupné modely:
5
+ - ``czech-spellchecker-130202`` (default) — opravy pravopisu
6
+ - ``czech-spellchecker_2edits-130202`` — agresivnější (až 2 edits/word)
7
+ - ``czech-diacritics_generator-130202`` — doplnění diakritiky do textu
8
+ - ``strip_diacritics-130202`` — odstranění diakritiky
9
+
10
+ Use cases pro legal-tech:
11
+ - Před odesláním podání na soud — checkuje pravopis
12
+ - OCR/email texty bez diakritiky — auto-doplnění (`Jiri` → `Jiří`)
13
+ - Občanské porady — text bez diakritiky z mobilní klávesnice → korektně formátovaný
14
+
15
+ Pozor: Korektor je CZ-only, modely jsou z roku 2013. Pro vlastní jména
16
+ (příjmení Pluhařík, slovenská jména…) může mít omezenou přesnost.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from typing import Any, Literal
22
+
23
+ from .http import post_form
24
+
25
+ KOREKTOR_URL = "https://lindat.mff.cuni.cz/services/korektor/api/correct"
26
+
27
+ _MODEL_ALIASES: dict[str, str] = {
28
+ "spellcheck": "czech-spellchecker-130202",
29
+ "spellcheck_strict": "czech-spellchecker_2edits-130202",
30
+ "diacritics": "czech-diacritics_generator-130202",
31
+ "strip": "strip_diacritics-130202",
32
+ }
33
+
34
+
35
+ async def correct(
36
+ text: str,
37
+ mode: Literal["spellcheck", "spellcheck_strict", "diacritics", "strip"] = "spellcheck",
38
+ ) -> dict[str, Any]:
39
+ """Vrátí opravený / upravený text podle zvoleného Korektor modelu.
40
+
41
+ Args:
42
+ text: Vstupní český text.
43
+ mode: ``spellcheck`` (default), ``spellcheck_strict``, ``diacritics``,
44
+ ``strip``.
45
+
46
+ Returns:
47
+ ``corrected`` (text), ``model`` (server-reported), ``mode``,
48
+ ``changed`` (bool — došlo k úpravě?).
49
+ """
50
+ if not text.strip():
51
+ return {"corrected": "", "model": None, "mode": mode, "changed": False}
52
+
53
+ model_name = _MODEL_ALIASES.get(mode, mode)
54
+ payload: dict[str, str] = {"data": text}
55
+ if model_name:
56
+ payload["model"] = model_name
57
+
58
+ data = await post_form(KOREKTOR_URL, payload)
59
+ corrected = data.get("result", text)
60
+ return {
61
+ "corrected": corrected,
62
+ "model": data.get("model"),
63
+ "mode": mode,
64
+ "changed": corrected != text,
65
+ }