svo-client 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- svo_client-0.1.0/PKG-INFO +21 -0
- svo_client-0.1.0/README.md +27 -0
- svo_client-0.1.0/setup.cfg +4 -0
- svo_client-0.1.0/setup.py +23 -0
- svo_client-0.1.0/svo_client/__init__.py +1 -0
- svo_client-0.1.0/svo_client/chunker_client.py +143 -0
- svo_client-0.1.0/svo_client.egg-info/PKG-INFO +21 -0
- svo_client-0.1.0/svo_client.egg-info/SOURCES.txt +11 -0
- svo_client-0.1.0/svo_client.egg-info/dependency_links.txt +1 -0
- svo_client-0.1.0/svo_client.egg-info/requires.txt +3 -0
- svo_client-0.1.0/svo_client.egg-info/top_level.txt +1 -0
- svo_client-0.1.0/tests/test_chunker_client.py +119 -0
- svo_client-0.1.0/tests/test_examples.py +30 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: svo-client
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Async client for SVO semantic chunker microservice.
|
|
5
|
+
Home-page: https://github.com/your_org/svo_client
|
|
6
|
+
Author: Your Name
|
|
7
|
+
Author-email: your@email.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Requires-Dist: aiohttp>=3.8.0
|
|
13
|
+
Requires-Dist: pydantic>=2.0.0
|
|
14
|
+
Requires-Dist: chunk_metadata_adapter
|
|
15
|
+
Dynamic: author
|
|
16
|
+
Dynamic: author-email
|
|
17
|
+
Dynamic: classifier
|
|
18
|
+
Dynamic: home-page
|
|
19
|
+
Dynamic: requires-dist
|
|
20
|
+
Dynamic: requires-python
|
|
21
|
+
Dynamic: summary
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# svo-client
|
|
2
|
+
|
|
3
|
+
Асинхронный Python-клиент для SVO Semantic Chunker microservice.
|
|
4
|
+
|
|
5
|
+
## Установка
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install svo-client
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Пример использования
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from svo_client.chunker_client import ChunkerClient
|
|
15
|
+
import asyncio
|
|
16
|
+
|
|
17
|
+
async def main():
|
|
18
|
+
async with ChunkerClient() as client:
|
|
19
|
+
chunks = await client.chunk_text("Your text here.")
|
|
20
|
+
print(client.reconstruct_text(chunks))
|
|
21
|
+
|
|
22
|
+
asyncio.run(main())
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Документация
|
|
26
|
+
- [OpenAPI schema](docs/openapi.json)
|
|
27
|
+
- [Примеры и тесты](tests/test_chunker_client.py)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="svo-client",
|
|
5
|
+
version="0.1.0",
|
|
6
|
+
description="Async client for SVO semantic chunker microservice.",
|
|
7
|
+
author="Your Name",
|
|
8
|
+
author_email="your@email.com",
|
|
9
|
+
packages=find_packages("."),
|
|
10
|
+
install_requires=[
|
|
11
|
+
"aiohttp>=3.8.0",
|
|
12
|
+
"pydantic>=2.0.0",
|
|
13
|
+
"chunk_metadata_adapter"
|
|
14
|
+
],
|
|
15
|
+
python_requires=">=3.8",
|
|
16
|
+
url="https://github.com/your_org/svo_client",
|
|
17
|
+
classifiers=[
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
],
|
|
22
|
+
include_package_data=True,
|
|
23
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""Async client for SVO semantic chunker microservice."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
import aiohttp
|
|
6
|
+
from typing import List, Optional, Any, Dict
|
|
7
|
+
from chunk_metadata_adapter import SemanticChunk
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
class Token(BaseModel):
|
|
11
|
+
text: str
|
|
12
|
+
lemma: Optional[str] = None
|
|
13
|
+
pos: Optional[str] = None
|
|
14
|
+
head: Optional[int] = None
|
|
15
|
+
deprel: Optional[str] = None
|
|
16
|
+
id: Optional[int] = None
|
|
17
|
+
sent_id: Optional[str] = None
|
|
18
|
+
|
|
19
|
+
class SV(BaseModel):
|
|
20
|
+
subject: Optional[Token] = None
|
|
21
|
+
verb: Optional[Token] = None
|
|
22
|
+
|
|
23
|
+
class ChunkFull(BaseModel):
|
|
24
|
+
uuid: str
|
|
25
|
+
source_id: Optional[str] = None
|
|
26
|
+
ordinal: Optional[int] = None
|
|
27
|
+
sha256: str
|
|
28
|
+
text: str
|
|
29
|
+
summary: Optional[str] = None
|
|
30
|
+
language: Optional[str] = None
|
|
31
|
+
type: Optional[str] = None
|
|
32
|
+
source_path: Optional[str] = None
|
|
33
|
+
source_lines_start: Optional[int] = None
|
|
34
|
+
source_lines_end: Optional[int] = None
|
|
35
|
+
project: Optional[str] = None
|
|
36
|
+
task_id: Optional[str] = None
|
|
37
|
+
subtask_id: Optional[str] = None
|
|
38
|
+
status: Optional[str] = None
|
|
39
|
+
unit_id: Optional[str] = None
|
|
40
|
+
created_at: Optional[str] = None
|
|
41
|
+
tags: Optional[Any] = None
|
|
42
|
+
role: Optional[str] = None
|
|
43
|
+
link_parent: Optional[str] = None
|
|
44
|
+
link_related: Optional[str] = None
|
|
45
|
+
quality_score: Optional[float] = None
|
|
46
|
+
coverage: Optional[float] = None
|
|
47
|
+
cohesion: Optional[float] = None
|
|
48
|
+
boundary_prev: Optional[float] = None
|
|
49
|
+
boundary_next: Optional[float] = None
|
|
50
|
+
used_in_generation: Optional[bool] = None
|
|
51
|
+
feedback_accepted: Optional[int] = None
|
|
52
|
+
feedback_rejected: Optional[int] = None
|
|
53
|
+
start: Optional[int] = None
|
|
54
|
+
end: Optional[int] = None
|
|
55
|
+
sv: Optional[SV] = None
|
|
56
|
+
score: Optional[float] = None
|
|
57
|
+
embedding: Optional[List[float]] = None
|
|
58
|
+
tokens: Optional[List[Token]] = None
|
|
59
|
+
block: Optional[List[Token]] = None
|
|
60
|
+
|
|
61
|
+
class ChunkerClient:
|
|
62
|
+
def __init__(self, url: str = "http://localhost", port: int = 8009):
|
|
63
|
+
self.base_url = f"{url.rstrip('/')}: {port}"
|
|
64
|
+
self.session: Optional[aiohttp.ClientSession] = None
|
|
65
|
+
|
|
66
|
+
async def __aenter__(self):
|
|
67
|
+
self.session = aiohttp.ClientSession()
|
|
68
|
+
return self
|
|
69
|
+
|
|
70
|
+
async def __aexit__(self, exc_type, exc, tb):
|
|
71
|
+
if self.session:
|
|
72
|
+
await self.session.close()
|
|
73
|
+
|
|
74
|
+
async def get_openapi_schema(self) -> Any:
|
|
75
|
+
url = f"{self.base_url}/openapi.json"
|
|
76
|
+
async with self.session.get(url) as resp:
|
|
77
|
+
resp.raise_for_status()
|
|
78
|
+
return await resp.json()
|
|
79
|
+
|
|
80
|
+
def parse_chunk(self, chunk: Dict[str, Any]) -> ChunkFull:
|
|
81
|
+
tokens = [Token(**t) for t in chunk.get("tokens", [])] if chunk.get("tokens") else None
|
|
82
|
+
block = [Token(**t) for t in chunk.get("block", [])] if chunk.get("block") else None
|
|
83
|
+
sv = None
|
|
84
|
+
if chunk.get("sv"):
|
|
85
|
+
sv = SV(**{
|
|
86
|
+
k: Token(**v) if v else None
|
|
87
|
+
for k, v in chunk["sv"].items()
|
|
88
|
+
})
|
|
89
|
+
return ChunkFull(
|
|
90
|
+
**{k: v for k, v in chunk.items() if k not in ("tokens", "block", "sv")},
|
|
91
|
+
tokens=tokens,
|
|
92
|
+
block=block,
|
|
93
|
+
sv=sv
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
async def chunk_text(self, text: str, **params) -> List[ChunkFull]:
|
|
97
|
+
url = f"{self.base_url}/cmd"
|
|
98
|
+
payload = {
|
|
99
|
+
"jsonrpc": "2.0",
|
|
100
|
+
"method": "chunk",
|
|
101
|
+
"params": {"text": text, **params},
|
|
102
|
+
"id": 1
|
|
103
|
+
}
|
|
104
|
+
async with self.session.post(url, json=payload) as resp:
|
|
105
|
+
resp.raise_for_status()
|
|
106
|
+
data = await resp.json()
|
|
107
|
+
chunks = data.get("result", {}).get("chunks", [])
|
|
108
|
+
return [self.parse_chunk(chunk) for chunk in chunks]
|
|
109
|
+
|
|
110
|
+
async def get_help(self, cmdname: Optional[str] = None) -> Any:
|
|
111
|
+
url = f"{self.base_url}/cmd"
|
|
112
|
+
payload = {
|
|
113
|
+
"jsonrpc": "2.0",
|
|
114
|
+
"method": "help",
|
|
115
|
+
"id": 1
|
|
116
|
+
}
|
|
117
|
+
if cmdname:
|
|
118
|
+
payload["params"] = {"cmdname": cmdname}
|
|
119
|
+
async with self.session.post(url, json=payload) as resp:
|
|
120
|
+
resp.raise_for_status()
|
|
121
|
+
return await resp.json()
|
|
122
|
+
|
|
123
|
+
async def health(self) -> Any:
|
|
124
|
+
url = f"{self.base_url}/cmd"
|
|
125
|
+
payload = {
|
|
126
|
+
"jsonrpc": "2.0",
|
|
127
|
+
"method": "health",
|
|
128
|
+
"id": 1
|
|
129
|
+
}
|
|
130
|
+
async with self.session.post(url, json=payload) as resp:
|
|
131
|
+
resp.raise_for_status()
|
|
132
|
+
return await resp.json()
|
|
133
|
+
|
|
134
|
+
def reconstruct_text(self, chunks: List[ChunkFull]) -> str:
|
|
135
|
+
"""
|
|
136
|
+
Reconstruct the original text from a list of ChunkFull objects.
|
|
137
|
+
Склеивает текст из чанков в исходном порядке.
|
|
138
|
+
"""
|
|
139
|
+
sorted_chunks = sorted(
|
|
140
|
+
chunks,
|
|
141
|
+
key=lambda c: c.ordinal if c.ordinal is not None else chunks.index(c)
|
|
142
|
+
)
|
|
143
|
+
return ''.join(chunk.text for chunk in sorted_chunks if chunk.text)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: svo-client
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Async client for SVO semantic chunker microservice.
|
|
5
|
+
Home-page: https://github.com/your_org/svo_client
|
|
6
|
+
Author: Your Name
|
|
7
|
+
Author-email: your@email.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Requires-Dist: aiohttp>=3.8.0
|
|
13
|
+
Requires-Dist: pydantic>=2.0.0
|
|
14
|
+
Requires-Dist: chunk_metadata_adapter
|
|
15
|
+
Dynamic: author
|
|
16
|
+
Dynamic: author-email
|
|
17
|
+
Dynamic: classifier
|
|
18
|
+
Dynamic: home-page
|
|
19
|
+
Dynamic: requires-dist
|
|
20
|
+
Dynamic: requires-python
|
|
21
|
+
Dynamic: summary
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
setup.py
|
|
3
|
+
svo_client/__init__.py
|
|
4
|
+
svo_client/chunker_client.py
|
|
5
|
+
svo_client.egg-info/PKG-INFO
|
|
6
|
+
svo_client.egg-info/SOURCES.txt
|
|
7
|
+
svo_client.egg-info/dependency_links.txt
|
|
8
|
+
svo_client.egg-info/requires.txt
|
|
9
|
+
svo_client.egg-info/top_level.txt
|
|
10
|
+
tests/test_chunker_client.py
|
|
11
|
+
tests/test_examples.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
svo_client
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import asyncio
|
|
3
|
+
from svo_client.chunker_client import ChunkerClient, ChunkFull, Token, SV
|
|
4
|
+
from typing import List
|
|
5
|
+
import aiohttp
|
|
6
|
+
import sys
|
|
7
|
+
import types
|
|
8
|
+
|
|
9
|
+
@pytest.mark.asyncio
|
|
10
|
+
async def test_chunk_text_and_reconstruct(monkeypatch):
|
|
11
|
+
# Мокаем ответ сервера
|
|
12
|
+
fake_chunks = [
|
|
13
|
+
{
|
|
14
|
+
"uuid": "1", "text": "Hello, ", "ordinal": 0, "sha256": "x", "embedding": [1.0],
|
|
15
|
+
"tokens": [{"text": "Hello"}], "block": [{"text": "Hello"}], "sv": {"subject": {"text": "Hello"}, "verb": {"text": "is"}}
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"uuid": "2", "text": "world!", "ordinal": 1, "sha256": "y", "embedding": [2.0],
|
|
19
|
+
"tokens": [{"text": "world"}], "block": [{"text": "world"}], "sv": {"subject": {"text": "world"}, "verb": {"text": "exists"}}
|
|
20
|
+
}
|
|
21
|
+
]
|
|
22
|
+
class FakeResponse:
|
|
23
|
+
def __init__(self, data): self._data = data
|
|
24
|
+
async def json(self): return {"result": {"chunks": self._data}}
|
|
25
|
+
def raise_for_status(self): pass
|
|
26
|
+
class FakeSession:
|
|
27
|
+
def __init__(self): self.last_url = None; self.last_json = None
|
|
28
|
+
def post(self, url, json):
|
|
29
|
+
class _Ctx:
|
|
30
|
+
async def __aenter__(self_): return FakeResponse(fake_chunks)
|
|
31
|
+
async def __aexit__(self_, exc_type, exc, tb): pass
|
|
32
|
+
self.last_url = url; self.last_json = json
|
|
33
|
+
return _Ctx()
|
|
34
|
+
def get(self, url):
|
|
35
|
+
class _Ctx:
|
|
36
|
+
async def __aenter__(self_): return FakeResponse({"openapi": "3.0.2"})
|
|
37
|
+
async def __aexit__(self_, exc_type, exc, tb): pass
|
|
38
|
+
return _Ctx()
|
|
39
|
+
async def close(self): pass
|
|
40
|
+
client = ChunkerClient()
|
|
41
|
+
client.session = FakeSession()
|
|
42
|
+
# chunk_text
|
|
43
|
+
chunks = await client.chunk_text("Hello, world!")
|
|
44
|
+
assert isinstance(chunks, list)
|
|
45
|
+
assert all(isinstance(c, ChunkFull) for c in chunks)
|
|
46
|
+
assert chunks[0].text == "Hello, "
|
|
47
|
+
assert chunks[1].text == "world!"
|
|
48
|
+
# reconstruct_text
|
|
49
|
+
text = client.reconstruct_text(chunks)
|
|
50
|
+
assert text == "Hello, world!"
|
|
51
|
+
# Проверка вложенных структур
|
|
52
|
+
assert isinstance(chunks[0].tokens[0], Token)
|
|
53
|
+
assert isinstance(chunks[0].sv, SV)
|
|
54
|
+
assert chunks[0].sv.subject.text == "Hello"
|
|
55
|
+
assert chunks[1].sv.verb.text == "exists"
|
|
56
|
+
|
|
57
|
+
@pytest.mark.asyncio
|
|
58
|
+
async def test_get_openapi_schema(monkeypatch):
|
|
59
|
+
class FakeResponse:
|
|
60
|
+
async def json(self): return {"openapi": "3.0.2"}
|
|
61
|
+
def raise_for_status(self): pass
|
|
62
|
+
class FakeSession:
|
|
63
|
+
def get(self, url):
|
|
64
|
+
class _Ctx:
|
|
65
|
+
async def __aenter__(self_): return FakeResponse()
|
|
66
|
+
async def __aexit__(self_, exc_type, exc, tb): pass
|
|
67
|
+
return _Ctx()
|
|
68
|
+
async def close(self): pass
|
|
69
|
+
client = ChunkerClient()
|
|
70
|
+
client.session = FakeSession()
|
|
71
|
+
schema = await client.get_openapi_schema()
|
|
72
|
+
assert schema["openapi"] == "3.0.2"
|
|
73
|
+
|
|
74
|
+
@pytest.mark.asyncio
|
|
75
|
+
async def test_get_help(monkeypatch):
|
|
76
|
+
class FakeResponse:
|
|
77
|
+
async def json(self): return {"result": {"commands": {"chunk": {}}}}
|
|
78
|
+
def raise_for_status(self): pass
|
|
79
|
+
class FakeSession:
|
|
80
|
+
def post(self, url, json):
|
|
81
|
+
class _Ctx:
|
|
82
|
+
async def __aenter__(self_): return FakeResponse()
|
|
83
|
+
async def __aexit__(self_, exc_type, exc, tb): pass
|
|
84
|
+
return _Ctx()
|
|
85
|
+
async def close(self): pass
|
|
86
|
+
client = ChunkerClient()
|
|
87
|
+
client.session = FakeSession()
|
|
88
|
+
help_info = await client.get_help()
|
|
89
|
+
assert "commands" in help_info["result"]
|
|
90
|
+
|
|
91
|
+
@pytest.mark.asyncio
|
|
92
|
+
async def test_health(monkeypatch):
|
|
93
|
+
class FakeResponse:
|
|
94
|
+
async def json(self): return {"result": {"success": True}}
|
|
95
|
+
def raise_for_status(self): pass
|
|
96
|
+
class FakeSession:
|
|
97
|
+
def post(self, url, json):
|
|
98
|
+
class _Ctx:
|
|
99
|
+
async def __aenter__(self_): return FakeResponse()
|
|
100
|
+
async def __aexit__(self_, exc_type, exc, tb): pass
|
|
101
|
+
return _Ctx()
|
|
102
|
+
async def close(self): pass
|
|
103
|
+
client = ChunkerClient()
|
|
104
|
+
client.session = FakeSession()
|
|
105
|
+
health = await client.health()
|
|
106
|
+
assert health["result"]["success"] is True
|
|
107
|
+
|
|
108
|
+
# Интеграционный тест (если сервер доступен)
|
|
109
|
+
@pytest.mark.asyncio
|
|
110
|
+
async def test_chunk_text_integration():
|
|
111
|
+
try:
|
|
112
|
+
async with ChunkerClient() as client:
|
|
113
|
+
chunks = await client.chunk_text("Integration test.")
|
|
114
|
+
assert isinstance(chunks, list)
|
|
115
|
+
assert all(isinstance(c, ChunkFull) for c in chunks)
|
|
116
|
+
if chunks:
|
|
117
|
+
assert hasattr(chunks[0], "text")
|
|
118
|
+
except aiohttp.ClientConnectorError:
|
|
119
|
+
pytest.skip("Chunker server not available for integration test.")
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import asyncio
|
|
3
|
+
from svo_client.chunker_client import ChunkerClient, ChunkFull
|
|
4
|
+
import sys
|
|
5
|
+
import types
|
|
6
|
+
|
|
7
|
+
@pytest.mark.asyncio
|
|
8
|
+
async def test_example_usage(monkeypatch):
|
|
9
|
+
# Мокаем методы клиента
|
|
10
|
+
async def fake_chunk_text(self, text, **params):
|
|
11
|
+
return [ChunkFull(uuid="1", text="Hello, ", sha256="x", ordinal=0), ChunkFull(uuid="2", text="world!", sha256="y", ordinal=1)]
|
|
12
|
+
async def fake_health(self):
|
|
13
|
+
return {"status": "ok"}
|
|
14
|
+
async def fake_get_help(self, cmdname=None):
|
|
15
|
+
return {"help": "info"}
|
|
16
|
+
# Подмена методов
|
|
17
|
+
monkeypatch.setattr(ChunkerClient, "chunk_text", fake_chunk_text)
|
|
18
|
+
monkeypatch.setattr(ChunkerClient, "health", fake_health)
|
|
19
|
+
monkeypatch.setattr(ChunkerClient, "get_help", fake_get_help)
|
|
20
|
+
|
|
21
|
+
async with ChunkerClient() as client:
|
|
22
|
+
chunks = await client.chunk_text("test")
|
|
23
|
+
assert isinstance(chunks, list)
|
|
24
|
+
assert all(isinstance(c, ChunkFull) for c in chunks)
|
|
25
|
+
text = client.reconstruct_text(chunks)
|
|
26
|
+
assert text == "Hello, world!"
|
|
27
|
+
health = await client.health()
|
|
28
|
+
assert health["status"] == "ok"
|
|
29
|
+
help_info = await client.get_help()
|
|
30
|
+
assert help_info["help"] == "info"
|