svo-client 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.4
2
+ Name: svo-client
3
+ Version: 0.1.0
4
+ Summary: Async client for SVO semantic chunker microservice.
5
+ Home-page: https://github.com/your_org/svo_client
6
+ Author: Your Name
7
+ Author-email: your@email.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8
12
+ Requires-Dist: aiohttp>=3.8.0
13
+ Requires-Dist: pydantic>=2.0.0
14
+ Requires-Dist: chunk_metadata_adapter
15
+ Dynamic: author
16
+ Dynamic: author-email
17
+ Dynamic: classifier
18
+ Dynamic: home-page
19
+ Dynamic: requires-dist
20
+ Dynamic: requires-python
21
+ Dynamic: summary
@@ -0,0 +1,27 @@
1
+ # svo-client
2
+
3
+ Асинхронный Python-клиент для SVO Semantic Chunker microservice.
4
+
5
+ ## Установка
6
+
7
+ ```bash
8
+ pip install svo-client
9
+ ```
10
+
11
+ ## Пример использования
12
+
13
+ ```python
14
+ from svo_client.chunker_client import ChunkerClient
15
+ import asyncio
16
+
17
+ async def main():
18
+ async with ChunkerClient() as client:
19
+ chunks = await client.chunk_text("Your text here.")
20
+ print(client.reconstruct_text(chunks))
21
+
22
+ asyncio.run(main())
23
+ ```
24
+
25
+ ## Документация
26
+ - [OpenAPI schema](docs/openapi.json)
27
+ - [Примеры и тесты](tests/test_chunker_client.py)
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,23 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="svo-client",
5
+ version="0.1.0",
6
+ description="Async client for SVO semantic chunker microservice.",
7
+ author="Your Name",
8
+ author_email="your@email.com",
9
+ packages=find_packages("."),
10
+ install_requires=[
11
+ "aiohttp>=3.8.0",
12
+ "pydantic>=2.0.0",
13
+ "chunk_metadata_adapter"
14
+ ],
15
+ python_requires=">=3.8",
16
+ url="https://github.com/your_org/svo_client",
17
+ classifiers=[
18
+ "Programming Language :: Python :: 3",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Operating System :: OS Independent",
21
+ ],
22
+ include_package_data=True,
23
+ )
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,143 @@
1
+ """Async client for SVO semantic chunker microservice."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ import aiohttp
6
+ from typing import List, Optional, Any, Dict
7
+ from chunk_metadata_adapter import SemanticChunk
8
+ from pydantic import BaseModel
9
+
10
+ class Token(BaseModel):
11
+ text: str
12
+ lemma: Optional[str] = None
13
+ pos: Optional[str] = None
14
+ head: Optional[int] = None
15
+ deprel: Optional[str] = None
16
+ id: Optional[int] = None
17
+ sent_id: Optional[str] = None
18
+
19
+ class SV(BaseModel):
20
+ subject: Optional[Token] = None
21
+ verb: Optional[Token] = None
22
+
23
+ class ChunkFull(BaseModel):
24
+ uuid: str
25
+ source_id: Optional[str] = None
26
+ ordinal: Optional[int] = None
27
+ sha256: str
28
+ text: str
29
+ summary: Optional[str] = None
30
+ language: Optional[str] = None
31
+ type: Optional[str] = None
32
+ source_path: Optional[str] = None
33
+ source_lines_start: Optional[int] = None
34
+ source_lines_end: Optional[int] = None
35
+ project: Optional[str] = None
36
+ task_id: Optional[str] = None
37
+ subtask_id: Optional[str] = None
38
+ status: Optional[str] = None
39
+ unit_id: Optional[str] = None
40
+ created_at: Optional[str] = None
41
+ tags: Optional[Any] = None
42
+ role: Optional[str] = None
43
+ link_parent: Optional[str] = None
44
+ link_related: Optional[str] = None
45
+ quality_score: Optional[float] = None
46
+ coverage: Optional[float] = None
47
+ cohesion: Optional[float] = None
48
+ boundary_prev: Optional[float] = None
49
+ boundary_next: Optional[float] = None
50
+ used_in_generation: Optional[bool] = None
51
+ feedback_accepted: Optional[int] = None
52
+ feedback_rejected: Optional[int] = None
53
+ start: Optional[int] = None
54
+ end: Optional[int] = None
55
+ sv: Optional[SV] = None
56
+ score: Optional[float] = None
57
+ embedding: Optional[List[float]] = None
58
+ tokens: Optional[List[Token]] = None
59
+ block: Optional[List[Token]] = None
60
+
61
+ class ChunkerClient:
62
+ def __init__(self, url: str = "http://localhost", port: int = 8009):
63
+ self.base_url = f"{url.rstrip('/')}: {port}"
64
+ self.session: Optional[aiohttp.ClientSession] = None
65
+
66
+ async def __aenter__(self):
67
+ self.session = aiohttp.ClientSession()
68
+ return self
69
+
70
+ async def __aexit__(self, exc_type, exc, tb):
71
+ if self.session:
72
+ await self.session.close()
73
+
74
+ async def get_openapi_schema(self) -> Any:
75
+ url = f"{self.base_url}/openapi.json"
76
+ async with self.session.get(url) as resp:
77
+ resp.raise_for_status()
78
+ return await resp.json()
79
+
80
+ def parse_chunk(self, chunk: Dict[str, Any]) -> ChunkFull:
81
+ tokens = [Token(**t) for t in chunk.get("tokens", [])] if chunk.get("tokens") else None
82
+ block = [Token(**t) for t in chunk.get("block", [])] if chunk.get("block") else None
83
+ sv = None
84
+ if chunk.get("sv"):
85
+ sv = SV(**{
86
+ k: Token(**v) if v else None
87
+ for k, v in chunk["sv"].items()
88
+ })
89
+ return ChunkFull(
90
+ **{k: v for k, v in chunk.items() if k not in ("tokens", "block", "sv")},
91
+ tokens=tokens,
92
+ block=block,
93
+ sv=sv
94
+ )
95
+
96
+ async def chunk_text(self, text: str, **params) -> List[ChunkFull]:
97
+ url = f"{self.base_url}/cmd"
98
+ payload = {
99
+ "jsonrpc": "2.0",
100
+ "method": "chunk",
101
+ "params": {"text": text, **params},
102
+ "id": 1
103
+ }
104
+ async with self.session.post(url, json=payload) as resp:
105
+ resp.raise_for_status()
106
+ data = await resp.json()
107
+ chunks = data.get("result", {}).get("chunks", [])
108
+ return [self.parse_chunk(chunk) for chunk in chunks]
109
+
110
+ async def get_help(self, cmdname: Optional[str] = None) -> Any:
111
+ url = f"{self.base_url}/cmd"
112
+ payload = {
113
+ "jsonrpc": "2.0",
114
+ "method": "help",
115
+ "id": 1
116
+ }
117
+ if cmdname:
118
+ payload["params"] = {"cmdname": cmdname}
119
+ async with self.session.post(url, json=payload) as resp:
120
+ resp.raise_for_status()
121
+ return await resp.json()
122
+
123
+ async def health(self) -> Any:
124
+ url = f"{self.base_url}/cmd"
125
+ payload = {
126
+ "jsonrpc": "2.0",
127
+ "method": "health",
128
+ "id": 1
129
+ }
130
+ async with self.session.post(url, json=payload) as resp:
131
+ resp.raise_for_status()
132
+ return await resp.json()
133
+
134
+ def reconstruct_text(self, chunks: List[ChunkFull]) -> str:
135
+ """
136
+ Reconstruct the original text from a list of ChunkFull objects.
137
+ Склеивает текст из чанков в исходном порядке.
138
+ """
139
+ sorted_chunks = sorted(
140
+ chunks,
141
+ key=lambda c: c.ordinal if c.ordinal is not None else chunks.index(c)
142
+ )
143
+ return ''.join(chunk.text for chunk in sorted_chunks if chunk.text)
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.4
2
+ Name: svo-client
3
+ Version: 0.1.0
4
+ Summary: Async client for SVO semantic chunker microservice.
5
+ Home-page: https://github.com/your_org/svo_client
6
+ Author: Your Name
7
+ Author-email: your@email.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8
12
+ Requires-Dist: aiohttp>=3.8.0
13
+ Requires-Dist: pydantic>=2.0.0
14
+ Requires-Dist: chunk_metadata_adapter
15
+ Dynamic: author
16
+ Dynamic: author-email
17
+ Dynamic: classifier
18
+ Dynamic: home-page
19
+ Dynamic: requires-dist
20
+ Dynamic: requires-python
21
+ Dynamic: summary
@@ -0,0 +1,11 @@
1
+ README.md
2
+ setup.py
3
+ svo_client/__init__.py
4
+ svo_client/chunker_client.py
5
+ svo_client.egg-info/PKG-INFO
6
+ svo_client.egg-info/SOURCES.txt
7
+ svo_client.egg-info/dependency_links.txt
8
+ svo_client.egg-info/requires.txt
9
+ svo_client.egg-info/top_level.txt
10
+ tests/test_chunker_client.py
11
+ tests/test_examples.py
@@ -0,0 +1,3 @@
1
+ aiohttp>=3.8.0
2
+ pydantic>=2.0.0
3
+ chunk_metadata_adapter
@@ -0,0 +1 @@
1
+ svo_client
@@ -0,0 +1,119 @@
1
+ import pytest
2
+ import asyncio
3
+ from svo_client.chunker_client import ChunkerClient, ChunkFull, Token, SV
4
+ from typing import List
5
+ import aiohttp
6
+ import sys
7
+ import types
8
+
9
+ @pytest.mark.asyncio
10
+ async def test_chunk_text_and_reconstruct(monkeypatch):
11
+ # Мокаем ответ сервера
12
+ fake_chunks = [
13
+ {
14
+ "uuid": "1", "text": "Hello, ", "ordinal": 0, "sha256": "x", "embedding": [1.0],
15
+ "tokens": [{"text": "Hello"}], "block": [{"text": "Hello"}], "sv": {"subject": {"text": "Hello"}, "verb": {"text": "is"}}
16
+ },
17
+ {
18
+ "uuid": "2", "text": "world!", "ordinal": 1, "sha256": "y", "embedding": [2.0],
19
+ "tokens": [{"text": "world"}], "block": [{"text": "world"}], "sv": {"subject": {"text": "world"}, "verb": {"text": "exists"}}
20
+ }
21
+ ]
22
+ class FakeResponse:
23
+ def __init__(self, data): self._data = data
24
+ async def json(self): return {"result": {"chunks": self._data}}
25
+ def raise_for_status(self): pass
26
+ class FakeSession:
27
+ def __init__(self): self.last_url = None; self.last_json = None
28
+ def post(self, url, json):
29
+ class _Ctx:
30
+ async def __aenter__(self_): return FakeResponse(fake_chunks)
31
+ async def __aexit__(self_, exc_type, exc, tb): pass
32
+ self.last_url = url; self.last_json = json
33
+ return _Ctx()
34
+ def get(self, url):
35
+ class _Ctx:
36
+ async def __aenter__(self_): return FakeResponse({"openapi": "3.0.2"})
37
+ async def __aexit__(self_, exc_type, exc, tb): pass
38
+ return _Ctx()
39
+ async def close(self): pass
40
+ client = ChunkerClient()
41
+ client.session = FakeSession()
42
+ # chunk_text
43
+ chunks = await client.chunk_text("Hello, world!")
44
+ assert isinstance(chunks, list)
45
+ assert all(isinstance(c, ChunkFull) for c in chunks)
46
+ assert chunks[0].text == "Hello, "
47
+ assert chunks[1].text == "world!"
48
+ # reconstruct_text
49
+ text = client.reconstruct_text(chunks)
50
+ assert text == "Hello, world!"
51
+ # Проверка вложенных структур
52
+ assert isinstance(chunks[0].tokens[0], Token)
53
+ assert isinstance(chunks[0].sv, SV)
54
+ assert chunks[0].sv.subject.text == "Hello"
55
+ assert chunks[1].sv.verb.text == "exists"
56
+
57
+ @pytest.mark.asyncio
58
+ async def test_get_openapi_schema(monkeypatch):
59
+ class FakeResponse:
60
+ async def json(self): return {"openapi": "3.0.2"}
61
+ def raise_for_status(self): pass
62
+ class FakeSession:
63
+ def get(self, url):
64
+ class _Ctx:
65
+ async def __aenter__(self_): return FakeResponse()
66
+ async def __aexit__(self_, exc_type, exc, tb): pass
67
+ return _Ctx()
68
+ async def close(self): pass
69
+ client = ChunkerClient()
70
+ client.session = FakeSession()
71
+ schema = await client.get_openapi_schema()
72
+ assert schema["openapi"] == "3.0.2"
73
+
74
+ @pytest.mark.asyncio
75
+ async def test_get_help(monkeypatch):
76
+ class FakeResponse:
77
+ async def json(self): return {"result": {"commands": {"chunk": {}}}}
78
+ def raise_for_status(self): pass
79
+ class FakeSession:
80
+ def post(self, url, json):
81
+ class _Ctx:
82
+ async def __aenter__(self_): return FakeResponse()
83
+ async def __aexit__(self_, exc_type, exc, tb): pass
84
+ return _Ctx()
85
+ async def close(self): pass
86
+ client = ChunkerClient()
87
+ client.session = FakeSession()
88
+ help_info = await client.get_help()
89
+ assert "commands" in help_info["result"]
90
+
91
+ @pytest.mark.asyncio
92
+ async def test_health(monkeypatch):
93
+ class FakeResponse:
94
+ async def json(self): return {"result": {"success": True}}
95
+ def raise_for_status(self): pass
96
+ class FakeSession:
97
+ def post(self, url, json):
98
+ class _Ctx:
99
+ async def __aenter__(self_): return FakeResponse()
100
+ async def __aexit__(self_, exc_type, exc, tb): pass
101
+ return _Ctx()
102
+ async def close(self): pass
103
+ client = ChunkerClient()
104
+ client.session = FakeSession()
105
+ health = await client.health()
106
+ assert health["result"]["success"] is True
107
+
108
+ # Интеграционный тест (если сервер доступен)
109
+ @pytest.mark.asyncio
110
+ async def test_chunk_text_integration():
111
+ try:
112
+ async with ChunkerClient() as client:
113
+ chunks = await client.chunk_text("Integration test.")
114
+ assert isinstance(chunks, list)
115
+ assert all(isinstance(c, ChunkFull) for c in chunks)
116
+ if chunks:
117
+ assert hasattr(chunks[0], "text")
118
+ except aiohttp.ClientConnectorError:
119
+ pytest.skip("Chunker server not available for integration test.")
@@ -0,0 +1,30 @@
1
+ import pytest
2
+ import asyncio
3
+ from svo_client.chunker_client import ChunkerClient, ChunkFull
4
+ import sys
5
+ import types
6
+
7
+ @pytest.mark.asyncio
8
+ async def test_example_usage(monkeypatch):
9
+ # Мокаем методы клиента
10
+ async def fake_chunk_text(self, text, **params):
11
+ return [ChunkFull(uuid="1", text="Hello, ", sha256="x", ordinal=0), ChunkFull(uuid="2", text="world!", sha256="y", ordinal=1)]
12
+ async def fake_health(self):
13
+ return {"status": "ok"}
14
+ async def fake_get_help(self, cmdname=None):
15
+ return {"help": "info"}
16
+ # Подмена методов
17
+ monkeypatch.setattr(ChunkerClient, "chunk_text", fake_chunk_text)
18
+ monkeypatch.setattr(ChunkerClient, "health", fake_health)
19
+ monkeypatch.setattr(ChunkerClient, "get_help", fake_get_help)
20
+
21
+ async with ChunkerClient() as client:
22
+ chunks = await client.chunk_text("test")
23
+ assert isinstance(chunks, list)
24
+ assert all(isinstance(c, ChunkFull) for c in chunks)
25
+ text = client.reconstruct_text(chunks)
26
+ assert text == "Hello, world!"
27
+ health = await client.health()
28
+ assert health["status"] == "ok"
29
+ help_info = await client.get_help()
30
+ assert help_info["help"] == "info"