uip-sdk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uip_sdk-0.1.0/PKG-INFO +103 -0
- uip_sdk-0.1.0/README.md +88 -0
- uip_sdk-0.1.0/pyproject.toml +23 -0
- uip_sdk-0.1.0/setup.cfg +4 -0
- uip_sdk-0.1.0/uip_sdk/__init__.py +52 -0
- uip_sdk-0.1.0/uip_sdk/client.py +437 -0
- uip_sdk-0.1.0/uip_sdk/errors.py +46 -0
- uip_sdk-0.1.0/uip_sdk/models.py +122 -0
- uip_sdk-0.1.0/uip_sdk/stream.py +106 -0
- uip_sdk-0.1.0/uip_sdk.egg-info/PKG-INFO +103 -0
- uip_sdk-0.1.0/uip_sdk.egg-info/SOURCES.txt +12 -0
- uip_sdk-0.1.0/uip_sdk.egg-info/dependency_links.txt +1 -0
- uip_sdk-0.1.0/uip_sdk.egg-info/requires.txt +1 -0
- uip_sdk-0.1.0/uip_sdk.egg-info/top_level.txt +1 -0
uip_sdk-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: uip-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: UIP — Universal Inference Platform Python SDK
|
|
5
|
+
Author-email: Zhu Wenbo <zwb.2002@tsinghua.org.cn>
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Intended Audience :: Developers
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
|
+
Requires-Python: >=3.10
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
Requires-Dist: httpx>=0.27
|
|
15
|
+
|
|
16
|
+
# UIP Python SDK
|
|
17
|
+
|
|
18
|
+
Universal Inference Platform 的 Python 客户端库。
|
|
19
|
+
|
|
20
|
+
## 安装
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install uip-sdk
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## 快速开始
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from uip_sdk import UIPClient
|
|
30
|
+
|
|
31
|
+
# 方式 1: API Key
|
|
32
|
+
client = UIPClient(api_key="ggw-xxx...")
|
|
33
|
+
|
|
34
|
+
# 方式 2: JWT Token
|
|
35
|
+
client = UIPClient(token="eyJhbGciOiJIUzI1NiIs...")
|
|
36
|
+
|
|
37
|
+
# 方式 3: 环境变量 (UIP_API_KEY)
|
|
38
|
+
client = UIPClient()
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## 使用示例
|
|
42
|
+
|
|
43
|
+
### 对话 (Chat Completions)
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
resp = client.chat(
|
|
47
|
+
messages=[{"role": "user", "content": "你好"}],
|
|
48
|
+
model="qwen2.5:7b",
|
|
49
|
+
)
|
|
50
|
+
print(resp.text)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### 流式生成
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
for chunk in client.generate("写一首关于春天的诗", stream=True):
|
|
57
|
+
print(chunk.text, end="", flush=True)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Rerank (文档重排序)
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
results = client.rerank(
|
|
64
|
+
query="CBA季后赛战术分析",
|
|
65
|
+
documents=[
|
|
66
|
+
"CBA联赛采用胜率决定排名",
|
|
67
|
+
"篮球三分线距离为6.75米",
|
|
68
|
+
"广东队采用全场紧逼战术",
|
|
69
|
+
],
|
|
70
|
+
model="Qwen3-Reranker-0.6B",
|
|
71
|
+
top_n=2,
|
|
72
|
+
)
|
|
73
|
+
for r in results.results:
|
|
74
|
+
print(f"#{r.index}: {r.document[:30]}... score={r.relevance_score:.2f}")
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 批量推理
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
batch = client.batch(
|
|
81
|
+
prompts=["你好", "介绍你自己"],
|
|
82
|
+
model="qwen2.5:7b",
|
|
83
|
+
)
|
|
84
|
+
for item in batch.results:
|
|
85
|
+
print(f"[{item.index}] {item.response[:50]}")
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### 指定调度策略
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
client.with_strategy("least_queue").generate("hi")
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### 嵌入向量
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
resp = client.embed(input="需要向量化的文本", model="bge-m3:567m")
|
|
98
|
+
print(len(resp.embedding)) # 768
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## License
|
|
102
|
+
|
|
103
|
+
MIT License. Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn).
|
uip_sdk-0.1.0/README.md
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# UIP Python SDK
|
|
2
|
+
|
|
3
|
+
Universal Inference Platform 的 Python 客户端库。
|
|
4
|
+
|
|
5
|
+
## 安装
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install uip-sdk
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## 快速开始
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from uip_sdk import UIPClient
|
|
15
|
+
|
|
16
|
+
# 方式 1: API Key
|
|
17
|
+
client = UIPClient(api_key="ggw-xxx...")
|
|
18
|
+
|
|
19
|
+
# 方式 2: JWT Token
|
|
20
|
+
client = UIPClient(token="eyJhbGciOiJIUzI1NiIs...")
|
|
21
|
+
|
|
22
|
+
# 方式 3: 环境变量 (UIP_API_KEY)
|
|
23
|
+
client = UIPClient()
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## 使用示例
|
|
27
|
+
|
|
28
|
+
### 对话 (Chat Completions)
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
resp = client.chat(
|
|
32
|
+
messages=[{"role": "user", "content": "你好"}],
|
|
33
|
+
model="qwen2.5:7b",
|
|
34
|
+
)
|
|
35
|
+
print(resp.text)
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### 流式生成
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
for chunk in client.generate("写一首关于春天的诗", stream=True):
|
|
42
|
+
print(chunk.text, end="", flush=True)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Rerank (文档重排序)
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
results = client.rerank(
|
|
49
|
+
query="CBA季后赛战术分析",
|
|
50
|
+
documents=[
|
|
51
|
+
"CBA联赛采用胜率决定排名",
|
|
52
|
+
"篮球三分线距离为6.75米",
|
|
53
|
+
"广东队采用全场紧逼战术",
|
|
54
|
+
],
|
|
55
|
+
model="Qwen3-Reranker-0.6B",
|
|
56
|
+
top_n=2,
|
|
57
|
+
)
|
|
58
|
+
for r in results.results:
|
|
59
|
+
print(f"#{r.index}: {r.document[:30]}... score={r.relevance_score:.2f}")
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### 批量推理
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
batch = client.batch(
|
|
66
|
+
prompts=["你好", "介绍你自己"],
|
|
67
|
+
model="qwen2.5:7b",
|
|
68
|
+
)
|
|
69
|
+
for item in batch.results:
|
|
70
|
+
print(f"[{item.index}] {item.response[:50]}")
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### 指定调度策略
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
client.with_strategy("least_queue").generate("hi")
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### 嵌入向量
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
resp = client.embed(input="需要向量化的文本", model="bge-m3:567m")
|
|
83
|
+
print(len(resp.embedding)) # 768
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## License
|
|
87
|
+
|
|
88
|
+
MIT License. Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn).
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "uip-sdk"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "UIP — Universal Inference Platform Python SDK"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
authors = [{name = "Zhu Wenbo", email = "zwb.2002@tsinghua.org.cn"}]
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
requires-python = ">=3.10"
|
|
13
|
+
dependencies = ["httpx>=0.27"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[tool.setuptools.packages.find]
|
|
23
|
+
include = ["uip_sdk*"]
|
uip_sdk-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn). Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
UIP Python SDK — Universal Inference Platform Client Library
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from uip_sdk import UIPClient
|
|
8
|
+
|
|
9
|
+
client = UIPClient(base_url="http://10.0.1.115:11438", api_key="ggw-xxx")
|
|
10
|
+
for chunk in client.generate("你好", stream=True):
|
|
11
|
+
print(chunk.text, end="")
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from .client import UIPClient
|
|
15
|
+
from .models import (
|
|
16
|
+
ChatResponse,
|
|
17
|
+
GenResponse,
|
|
18
|
+
EmbedResponse,
|
|
19
|
+
RerankResponse,
|
|
20
|
+
BatchResponse,
|
|
21
|
+
UploadResponse,
|
|
22
|
+
ModelItem,
|
|
23
|
+
StreamChunk,
|
|
24
|
+
TokenUsage,
|
|
25
|
+
)
|
|
26
|
+
from .errors import (
|
|
27
|
+
UIPError,
|
|
28
|
+
AuthenticationError,
|
|
29
|
+
InsufficientBalanceError,
|
|
30
|
+
UIGOfflineError,
|
|
31
|
+
TimeoutError,
|
|
32
|
+
RateLimitError,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"UIPClient",
|
|
37
|
+
"ChatResponse",
|
|
38
|
+
"GenResponse",
|
|
39
|
+
"EmbedResponse",
|
|
40
|
+
"RerankResponse",
|
|
41
|
+
"BatchResponse",
|
|
42
|
+
"UploadResponse",
|
|
43
|
+
"ModelItem",
|
|
44
|
+
"StreamChunk",
|
|
45
|
+
"TokenUsage",
|
|
46
|
+
"UIPError",
|
|
47
|
+
"AuthenticationError",
|
|
48
|
+
"InsufficientBalanceError",
|
|
49
|
+
"UIGOfflineError",
|
|
50
|
+
"TimeoutError",
|
|
51
|
+
"RateLimitError",
|
|
52
|
+
]
|
|
@@ -0,0 +1,437 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn). Licensed under the MIT License.
|
|
3
|
+
"""UIP SDK — 核心客户端"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any, AsyncIterator, Iterator, Optional, Union
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from .errors import (
|
|
13
|
+
AuthenticationError,
|
|
14
|
+
InsufficientBalanceError,
|
|
15
|
+
NotFoundError,
|
|
16
|
+
RateLimitError,
|
|
17
|
+
ServerError,
|
|
18
|
+
TimeoutError,
|
|
19
|
+
UIGOfflineError,
|
|
20
|
+
UIPError,
|
|
21
|
+
)
|
|
22
|
+
from .models import (
|
|
23
|
+
BatchResponse,
|
|
24
|
+
BatchResult,
|
|
25
|
+
ChatResponse,
|
|
26
|
+
EmbedResponse,
|
|
27
|
+
GenResponse,
|
|
28
|
+
ModelItem,
|
|
29
|
+
RerankItem,
|
|
30
|
+
RerankResponse,
|
|
31
|
+
StreamChunk,
|
|
32
|
+
TokenUsage,
|
|
33
|
+
UploadResponse,
|
|
34
|
+
)
|
|
35
|
+
from .stream import iter_sse_lines
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class UIPClient:
|
|
39
|
+
"""UIP API 客户端
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
base_url: UIP API 地址 (默认 http://10.0.1.115:11438)
|
|
43
|
+
api_key: API Key (与 token 二选一)
|
|
44
|
+
token: JWT Token (与 api_key 二选一)
|
|
45
|
+
timeout: HTTP 超时秒数 (默认 300)
|
|
46
|
+
strategy: 调度策略 (可选,如 "least_queue")
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
base_url: str = "http://10.0.1.115:11438",
|
|
52
|
+
api_key: Optional[str] = None,
|
|
53
|
+
token: Optional[str] = None,
|
|
54
|
+
timeout: int = 300,
|
|
55
|
+
strategy: Optional[str] = None,
|
|
56
|
+
):
|
|
57
|
+
self.base_url = base_url.rstrip("/")
|
|
58
|
+
self._api_key = api_key or os.environ.get("UIP_API_KEY", "")
|
|
59
|
+
self._token = token or os.environ.get("UIP_TOKEN", "")
|
|
60
|
+
self.timeout = timeout
|
|
61
|
+
self._strategy = strategy
|
|
62
|
+
|
|
63
|
+
if not self._api_key and not self._token:
|
|
64
|
+
# 尝试从 UIP_API_KEY 环境变量读取
|
|
65
|
+
self._api_key = os.environ.get("UIP_API_KEY", "")
|
|
66
|
+
if not self._api_key and not self._token:
|
|
67
|
+
raise AuthenticationError(
|
|
68
|
+
"需要提供 api_key 或 token。"
|
|
69
|
+
"可通过参数传入或设置 UIP_API_KEY 环境变量。"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
self._client = httpx.Client(timeout=httpx.Timeout(self.timeout), base_url=self.base_url)
|
|
73
|
+
|
|
74
|
+
# ─── 认证头 ─────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
def _headers(self) -> dict:
|
|
77
|
+
h = {}
|
|
78
|
+
if self._token:
|
|
79
|
+
h["Authorization"] = f"Bearer {self._token}"
|
|
80
|
+
elif self._api_key:
|
|
81
|
+
h["x-api-key"] = self._api_key
|
|
82
|
+
if self._strategy:
|
|
83
|
+
h["X-UIP-Strategy"] = self._strategy
|
|
84
|
+
return h
|
|
85
|
+
|
|
86
|
+
def _raise_on_error(self, resp: httpx.Response) -> None:
|
|
87
|
+
if resp.status_code < 400:
|
|
88
|
+
return
|
|
89
|
+
detail = ""
|
|
90
|
+
try:
|
|
91
|
+
body = resp.json()
|
|
92
|
+
detail = body.get("detail", resp.text[:200])
|
|
93
|
+
except Exception:
|
|
94
|
+
detail = resp.text[:200]
|
|
95
|
+
|
|
96
|
+
if resp.status_code == 401:
|
|
97
|
+
raise AuthenticationError("认证失败,请检查 api_key 或 token", 401, detail)
|
|
98
|
+
elif resp.status_code == 402:
|
|
99
|
+
raise InsufficientBalanceError("余额不足", 402, detail)
|
|
100
|
+
elif resp.status_code == 404:
|
|
101
|
+
raise NotFoundError("资源不存在", 404, detail)
|
|
102
|
+
elif resp.status_code == 429:
|
|
103
|
+
raise RateLimitError("请求频率限制", 429, detail)
|
|
104
|
+
elif resp.status_code == 503:
|
|
105
|
+
raise UIGOfflineError("UIG 全部离线", 503, detail)
|
|
106
|
+
elif resp.status_code == 504:
|
|
107
|
+
raise TimeoutError("请求超时", 504, detail)
|
|
108
|
+
elif resp.status_code >= 500:
|
|
109
|
+
raise ServerError(f"服务端错误", resp.status_code, detail)
|
|
110
|
+
else:
|
|
111
|
+
raise UIPError(f"请求失败", resp.status_code, detail)
|
|
112
|
+
|
|
113
|
+
def _post(self, endpoint: str, json_body: dict) -> httpx.Response:
|
|
114
|
+
url = f"{self.base_url}/{endpoint.lstrip('/')}"
|
|
115
|
+
resp = self._client.post(url, json=json_body, headers=self._headers())
|
|
116
|
+
self._raise_on_error(resp)
|
|
117
|
+
return resp
|
|
118
|
+
|
|
119
|
+
def _post_stream(self, endpoint: str, json_body: dict) -> httpx.Response:
|
|
120
|
+
url = f"{self.base_url}/{endpoint.lstrip('/')}"
|
|
121
|
+
resp = self._client.post(
|
|
122
|
+
url, json=json_body,
|
|
123
|
+
headers={**self._headers(), "Accept": "text/event-stream"},
|
|
124
|
+
)
|
|
125
|
+
self._raise_on_error(resp)
|
|
126
|
+
return resp
|
|
127
|
+
|
|
128
|
+
# ─── 上下文管理器 ─────────────────────────────────────────────
|
|
129
|
+
|
|
130
|
+
def close(self):
|
|
131
|
+
self._client.close()
|
|
132
|
+
|
|
133
|
+
def __enter__(self):
|
|
134
|
+
return self
|
|
135
|
+
|
|
136
|
+
def __exit__(self, *args):
|
|
137
|
+
self.close()
|
|
138
|
+
|
|
139
|
+
# ─── Chat Completions ──────────────────────────────────────────
|
|
140
|
+
|
|
141
|
+
def chat(
|
|
142
|
+
self,
|
|
143
|
+
messages: list[dict],
|
|
144
|
+
model: str = "qwen2.5:7b",
|
|
145
|
+
stream: bool = False,
|
|
146
|
+
temperature: float = 0.7,
|
|
147
|
+
max_tokens: Optional[int] = None,
|
|
148
|
+
top_p: float = 1.0,
|
|
149
|
+
**kwargs,
|
|
150
|
+
) -> Union[ChatResponse, Iterator[StreamChunk]]:
|
|
151
|
+
"""OpenAI 兼容 Chat API
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
messages: 消息列表 [{"role":"user","content":"hi"}]
|
|
155
|
+
model: 模型名
|
|
156
|
+
stream: 是否流式
|
|
157
|
+
temperature: 温度 (0-2)
|
|
158
|
+
max_tokens: 最大输出 Token
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
非流式 → ChatResponse
|
|
162
|
+
流式 → Iterator[StreamChunk]
|
|
163
|
+
"""
|
|
164
|
+
body = {
|
|
165
|
+
"model": model,
|
|
166
|
+
"messages": messages,
|
|
167
|
+
"stream": stream,
|
|
168
|
+
"temperature": temperature,
|
|
169
|
+
**kwargs,
|
|
170
|
+
}
|
|
171
|
+
if max_tokens is not None:
|
|
172
|
+
body["max_tokens"] = max_tokens
|
|
173
|
+
if top_p != 1.0:
|
|
174
|
+
body["top_p"] = top_p
|
|
175
|
+
|
|
176
|
+
if stream:
|
|
177
|
+
return self._stream_chat(body)
|
|
178
|
+
|
|
179
|
+
resp = self._post("api/v1/chat/completions", body)
|
|
180
|
+
data = resp.json()
|
|
181
|
+
choice = data.get("choices", [{}])[0]
|
|
182
|
+
usage = data.get("usage", {})
|
|
183
|
+
return ChatResponse(
|
|
184
|
+
text=choice.get("message", {}).get("content", ""),
|
|
185
|
+
model=data.get("model", model),
|
|
186
|
+
tokens=TokenUsage(
|
|
187
|
+
input=usage.get("prompt_tokens", 0),
|
|
188
|
+
output=usage.get("completion_tokens", 0),
|
|
189
|
+
total=usage.get("total_tokens", 0),
|
|
190
|
+
),
|
|
191
|
+
finish_reason=choice.get("finish_reason", ""),
|
|
192
|
+
raw=data,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
def _stream_chat(self, body: dict) -> Iterator[StreamChunk]:
|
|
196
|
+
resp = self._post_stream("api/v1/chat/completions", body)
|
|
197
|
+
from .stream import parse_chat_stream_chunk, parse_sse_line
|
|
198
|
+
buf = b""
|
|
199
|
+
for chunk in resp.iter_bytes():
|
|
200
|
+
buf += chunk
|
|
201
|
+
while b"\n" in buf:
|
|
202
|
+
line, buf = buf.split(b"\n", 1)
|
|
203
|
+
raw = line.decode("utf-8", errors="replace").strip()
|
|
204
|
+
if not raw:
|
|
205
|
+
continue
|
|
206
|
+
parsed = parse_sse_line(raw)
|
|
207
|
+
if not parsed:
|
|
208
|
+
continue
|
|
209
|
+
if parsed.get("done"):
|
|
210
|
+
return
|
|
211
|
+
yield parse_chat_stream_chunk(parsed)
|
|
212
|
+
|
|
213
|
+
# ─── Generate ─────────────────────────────────────────────────
|
|
214
|
+
|
|
215
|
+
def generate(
|
|
216
|
+
self,
|
|
217
|
+
prompt: str,
|
|
218
|
+
model: str = "qwen2.5:7b",
|
|
219
|
+
system: str = "",
|
|
220
|
+
stream: bool = False,
|
|
221
|
+
options: Optional[dict] = None,
|
|
222
|
+
) -> Union[GenResponse, Iterator[StreamChunk]]:
|
|
223
|
+
"""Ollama 兼容生成 API
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
prompt: 输入提示
|
|
227
|
+
model: 模型名
|
|
228
|
+
system: 系统提示词
|
|
229
|
+
stream: 是否流式
|
|
230
|
+
options: Ollama 选项 (如 {"num_predict": 100})
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
非流式 → GenResponse
|
|
234
|
+
流式 → Iterator[StreamChunk]
|
|
235
|
+
"""
|
|
236
|
+
body = {"model": model, "prompt": prompt, "stream": stream}
|
|
237
|
+
if system:
|
|
238
|
+
body["system"] = system
|
|
239
|
+
if options:
|
|
240
|
+
body["options"] = options
|
|
241
|
+
|
|
242
|
+
if stream:
|
|
243
|
+
return self._stream_generate(body)
|
|
244
|
+
|
|
245
|
+
t0 = time.monotonic()
|
|
246
|
+
resp = self._post("api/v1/generate", body)
|
|
247
|
+
elapsed_ms = int((time.monotonic() - t0) * 1000)
|
|
248
|
+
data = resp.json()
|
|
249
|
+
return GenResponse(
|
|
250
|
+
text=data.get("response", ""),
|
|
251
|
+
thinking=data.get("thinking", ""),
|
|
252
|
+
model=data.get("model", model),
|
|
253
|
+
done=data.get("done", True),
|
|
254
|
+
done_reason=data.get("done_reason", ""),
|
|
255
|
+
tokens=TokenUsage(
|
|
256
|
+
input=data.get("prompt_eval_count", 0),
|
|
257
|
+
output=data.get("eval_count", 0),
|
|
258
|
+
),
|
|
259
|
+
total_duration=data.get("total_duration"),
|
|
260
|
+
load_duration=data.get("load_duration"),
|
|
261
|
+
prompt_eval_count=data.get("prompt_eval_count"),
|
|
262
|
+
eval_count=data.get("eval_count"),
|
|
263
|
+
eval_duration=data.get("eval_duration"),
|
|
264
|
+
elapsed_ms=elapsed_ms,
|
|
265
|
+
raw=data,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def _stream_generate(self, body: dict) -> Iterator[StreamChunk]:
|
|
269
|
+
resp = self._post_stream("api/v1/generate", body)
|
|
270
|
+
yield from iter_sse_lines(resp.iter_bytes())
|
|
271
|
+
|
|
272
|
+
# ─── Embeddings ───────────────────────────────────────────────
|
|
273
|
+
|
|
274
|
+
def embed(
|
|
275
|
+
self,
|
|
276
|
+
input: Union[str, list[str]],
|
|
277
|
+
model: str = "bge-m3:567m",
|
|
278
|
+
) -> EmbedResponse:
|
|
279
|
+
"""文本向量化"""
|
|
280
|
+
body = {"model": model, "input": input}
|
|
281
|
+
resp = self._post("api/v1/embeddings", body)
|
|
282
|
+
data = resp.json()
|
|
283
|
+
|
|
284
|
+
usage = data.get("usage", {})
|
|
285
|
+
embedding = data.get("data", [{}])[0].get("embedding", data.get("embedding", []))
|
|
286
|
+
if not embedding:
|
|
287
|
+
embedding = data.get("embeddings", [[]])[0]
|
|
288
|
+
|
|
289
|
+
return EmbedResponse(
|
|
290
|
+
embedding=embedding,
|
|
291
|
+
model=data.get("model", model),
|
|
292
|
+
tokens=TokenUsage(
|
|
293
|
+
input=usage.get("prompt_tokens", usage.get("total_tokens", 0)),
|
|
294
|
+
output=0,
|
|
295
|
+
total=usage.get("total_tokens", 0),
|
|
296
|
+
),
|
|
297
|
+
raw=data,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# ─── Rerank ───────────────────────────────────────────────────
|
|
301
|
+
|
|
302
|
+
def rerank(
|
|
303
|
+
self,
|
|
304
|
+
query: str,
|
|
305
|
+
documents: list[str],
|
|
306
|
+
model: str = "Qwen3-Reranker-0.6B",
|
|
307
|
+
top_n: Optional[int] = None,
|
|
308
|
+
) -> RerankResponse:
|
|
309
|
+
"""自定义 RerankZ 接口 — 文档重排序
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
query: 查询语句
|
|
313
|
+
documents: 待排序文档列表
|
|
314
|
+
model: Rerank 模型名
|
|
315
|
+
top_n: 返回前 N 条 (默认全部)
|
|
316
|
+
"""
|
|
317
|
+
body = {"model": model, "query": query, "documents": documents}
|
|
318
|
+
if top_n is not None:
|
|
319
|
+
body["top_n"] = top_n
|
|
320
|
+
|
|
321
|
+
t0 = time.monotonic()
|
|
322
|
+
resp = self._post("rerank", body)
|
|
323
|
+
elapsed_ms = int((time.monotonic() - t0) * 1000)
|
|
324
|
+
data = resp.json()
|
|
325
|
+
|
|
326
|
+
results = []
|
|
327
|
+
for item in data.get("results", []):
|
|
328
|
+
results.append(RerankItem(
|
|
329
|
+
index=item.get("index", 0),
|
|
330
|
+
document=item.get("document", ""),
|
|
331
|
+
relevance_score=item.get("relevance_score", 0.0),
|
|
332
|
+
))
|
|
333
|
+
|
|
334
|
+
return RerankResponse(
|
|
335
|
+
results=results,
|
|
336
|
+
total=data.get("total", len(documents)),
|
|
337
|
+
model=data.get("model", model),
|
|
338
|
+
elapsed_ms=data.get("elapsed_ms", elapsed_ms),
|
|
339
|
+
raw=data,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# ─── Batch ────────────────────────────────────────────────────
|
|
343
|
+
|
|
344
|
+
def batch(
|
|
345
|
+
self,
|
|
346
|
+
prompts: list[str],
|
|
347
|
+
model: str = "qwen2.5:7b",
|
|
348
|
+
system: str = "",
|
|
349
|
+
) -> BatchResponse:
|
|
350
|
+
"""批量推理 — 并发转发多个 prompt"""
|
|
351
|
+
body = {"model": model, "prompts": prompts, "stream": False}
|
|
352
|
+
if system:
|
|
353
|
+
body["system"] = system
|
|
354
|
+
|
|
355
|
+
resp = self._post("api/v1/batch/completions", body)
|
|
356
|
+
data = resp.json()
|
|
357
|
+
|
|
358
|
+
results = []
|
|
359
|
+
for item in data.get("results", []):
|
|
360
|
+
results.append(BatchResult(
|
|
361
|
+
index=item.get("index", 0),
|
|
362
|
+
prompt=item.get("prompt", ""),
|
|
363
|
+
response=item.get("response", ""),
|
|
364
|
+
error=item.get("error"),
|
|
365
|
+
))
|
|
366
|
+
|
|
367
|
+
return BatchResponse(
|
|
368
|
+
total=data.get("total", 0),
|
|
369
|
+
completed=data.get("completed", 0),
|
|
370
|
+
errors=data.get("errors", 0),
|
|
371
|
+
elapsed_ms=data.get("elapsed_ms", 0),
|
|
372
|
+
results=results,
|
|
373
|
+
model=data.get("model", model),
|
|
374
|
+
raw=data,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
# ─── Upload ───────────────────────────────────────────────────
|
|
378
|
+
|
|
379
|
+
def upload(self, file_path: str) -> UploadResponse:
|
|
380
|
+
"""上传文件
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
file_path: 本地文件路径
|
|
384
|
+
"""
|
|
385
|
+
import os as _os
|
|
386
|
+
filename = _os.path.basename(file_path)
|
|
387
|
+
with open(file_path, "rb") as f:
|
|
388
|
+
files = {"file": (filename, f)}
|
|
389
|
+
url = f"{self.base_url}/api/v1/upload"
|
|
390
|
+
resp = httpx.post(url, files=files, headers=self._headers(), timeout=self.timeout)
|
|
391
|
+
self._raise_on_error(resp)
|
|
392
|
+
|
|
393
|
+
data = resp.json()
|
|
394
|
+
return UploadResponse(
|
|
395
|
+
filename=data.get("filename", ""),
|
|
396
|
+
original_name=data.get("original_name", filename),
|
|
397
|
+
size=data.get("size", 0),
|
|
398
|
+
url=data.get("url", ""),
|
|
399
|
+
content_type=data.get("content_type", ""),
|
|
400
|
+
raw=data,
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
# ─── Models ───────────────────────────────────────────────────
|
|
404
|
+
|
|
405
|
+
def list_models(self) -> list[ModelItem]:
|
|
406
|
+
"""获取可用模型列表"""
|
|
407
|
+
resp = self._post("api/v1/models", {})
|
|
408
|
+
data = resp.json()
|
|
409
|
+
items = []
|
|
410
|
+
for m in data.get("data", []):
|
|
411
|
+
items.append(ModelItem(
|
|
412
|
+
id=m.get("id", ""),
|
|
413
|
+
created=m.get("created", 0),
|
|
414
|
+
owned_by=m.get("owned_by", "uip"),
|
|
415
|
+
))
|
|
416
|
+
return items
|
|
417
|
+
|
|
418
|
+
# ─── Strategy ─────────────────────────────────────────────────
|
|
419
|
+
|
|
420
|
+
def with_strategy(self, strategy: str) -> "UIPClient":
|
|
421
|
+
"""指定调度策略 (链式调用)
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
strategy: model_first / least_queue / weighted_rr / affinity
|
|
425
|
+
|
|
426
|
+
Returns:
|
|
427
|
+
self (支持链式调用)
|
|
428
|
+
"""
|
|
429
|
+
self._strategy = strategy
|
|
430
|
+
return self
|
|
431
|
+
|
|
432
|
+
# ─── Health ───────────────────────────────────────────────────
|
|
433
|
+
|
|
434
|
+
def ping(self) -> dict:
|
|
435
|
+
"""健康检查"""
|
|
436
|
+
resp = self._post("api/v1/ping", {})
|
|
437
|
+
return resp.json()
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn). Licensed under the MIT License.
|
|
3
|
+
"""UIP SDK — 异常层级"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class UIPError(Exception):
|
|
7
|
+
"""UIP SDK 基础异常"""
|
|
8
|
+
def __init__(self, message: str, status_code: int = 0, detail: str = ""):
|
|
9
|
+
self.status_code = status_code
|
|
10
|
+
self.detail = detail
|
|
11
|
+
super().__init__(f"[{status_code}] {message}" if status_code else message)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AuthenticationError(UIPError):
|
|
15
|
+
"""认证失败 (401)"""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class InsufficientBalanceError(UIPError):
|
|
20
|
+
"""余额不足 (402)"""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class UIGOfflineError(UIPError):
|
|
25
|
+
"""UIG 全部离线 (503)"""
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TimeoutError(UIPError):
|
|
30
|
+
"""请求超时 (504)"""
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class RateLimitError(UIPError):
|
|
35
|
+
"""频率限制 (429)"""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class NotFoundError(UIPError):
|
|
40
|
+
"""资源不存在 (404)"""
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ServerError(UIPError):
|
|
45
|
+
"""服务端错误 (5xx)"""
|
|
46
|
+
pass
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn). Licensed under the MIT License.
|
|
3
|
+
"""UIP SDK — 数据结构模型"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class TokenUsage:
|
|
11
|
+
"""Token 用量"""
|
|
12
|
+
input: int = 0
|
|
13
|
+
output: int = 0
|
|
14
|
+
total: int = 0
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class StreamChunk:
|
|
19
|
+
"""流式响应片段"""
|
|
20
|
+
text: str = ""
|
|
21
|
+
done: bool = False
|
|
22
|
+
total_duration: Optional[int] = None
|
|
23
|
+
eval_count: Optional[int] = None
|
|
24
|
+
eval_duration: Optional[int] = None
|
|
25
|
+
tokens: Optional[TokenUsage] = None
|
|
26
|
+
index: Optional[int] = None # chat completions 流式索引
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class ChatResponse:
|
|
31
|
+
"""Chat completions 响应"""
|
|
32
|
+
text: str = ""
|
|
33
|
+
model: str = ""
|
|
34
|
+
tokens: TokenUsage = field(default_factory=TokenUsage)
|
|
35
|
+
elapsed_ms: int = 0
|
|
36
|
+
finish_reason: str = ""
|
|
37
|
+
raw: dict = field(default_factory=dict)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class GenResponse:
|
|
42
|
+
"""Generate 响应"""
|
|
43
|
+
text: str = ""
|
|
44
|
+
thinking: str = ""
|
|
45
|
+
model: str = ""
|
|
46
|
+
done: bool = True
|
|
47
|
+
done_reason: str = ""
|
|
48
|
+
tokens: TokenUsage = field(default_factory=TokenUsage)
|
|
49
|
+
total_duration: Optional[int] = None
|
|
50
|
+
load_duration: Optional[int] = None
|
|
51
|
+
prompt_eval_count: Optional[int] = None
|
|
52
|
+
eval_count: Optional[int] = None
|
|
53
|
+
eval_duration: Optional[int] = None
|
|
54
|
+
elapsed_ms: int = 0
|
|
55
|
+
raw: dict = field(default_factory=dict)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class RerankItem:
|
|
60
|
+
"""Rerank 单条结果"""
|
|
61
|
+
index: int = 0
|
|
62
|
+
document: str = ""
|
|
63
|
+
relevance_score: float = 0.0
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class RerankResponse:
|
|
68
|
+
"""Rerank 响应"""
|
|
69
|
+
results: list[RerankItem] = field(default_factory=list)
|
|
70
|
+
total: int = 0
|
|
71
|
+
model: str = ""
|
|
72
|
+
elapsed_ms: float = 0.0
|
|
73
|
+
raw: dict = field(default_factory=dict)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass
|
|
77
|
+
class EmbedResponse:
|
|
78
|
+
"""Embedding 响应"""
|
|
79
|
+
embedding: list[float] = field(default_factory=list)
|
|
80
|
+
model: str = ""
|
|
81
|
+
tokens: TokenUsage = field(default_factory=TokenUsage)
|
|
82
|
+
raw: dict = field(default_factory=dict)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class BatchResult:
|
|
87
|
+
"""批量推理单条结果"""
|
|
88
|
+
index: int = 0
|
|
89
|
+
prompt: str = ""
|
|
90
|
+
response: str = ""
|
|
91
|
+
error: Optional[str] = None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass
|
|
95
|
+
class BatchResponse:
|
|
96
|
+
"""批量推理响应"""
|
|
97
|
+
total: int = 0
|
|
98
|
+
completed: int = 0
|
|
99
|
+
errors: int = 0
|
|
100
|
+
elapsed_ms: int = 0
|
|
101
|
+
results: list[BatchResult] = field(default_factory=list)
|
|
102
|
+
model: str = ""
|
|
103
|
+
raw: dict = field(default_factory=dict)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dataclass
|
|
107
|
+
class UploadResponse:
|
|
108
|
+
"""文件上传响应"""
|
|
109
|
+
filename: str = ""
|
|
110
|
+
original_name: str = ""
|
|
111
|
+
size: int = 0
|
|
112
|
+
url: str = ""
|
|
113
|
+
content_type: str = ""
|
|
114
|
+
raw: dict = field(default_factory=dict)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass
|
|
118
|
+
class ModelItem:
|
|
119
|
+
"""模型列表项"""
|
|
120
|
+
id: str = ""
|
|
121
|
+
created: int = 0
|
|
122
|
+
owned_by: str = "uip"
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn). Licensed under the MIT License.
|
|
3
|
+
"""UIP SDK — SSE 流解析器"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import AsyncIterator, Iterator, Optional
|
|
7
|
+
|
|
8
|
+
from .models import StreamChunk, TokenUsage
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def parse_sse_line(line: str) -> Optional[dict]:
|
|
12
|
+
"""解析单行 SSE 数据"""
|
|
13
|
+
line = line.strip()
|
|
14
|
+
if not line or not line.startswith("data: "):
|
|
15
|
+
return None
|
|
16
|
+
data = line[6:] # 去掉 "data: " 前缀
|
|
17
|
+
if data.strip() == "[DONE]":
|
|
18
|
+
return {"done": True}
|
|
19
|
+
try:
|
|
20
|
+
return json.loads(data)
|
|
21
|
+
except json.JSONDecodeError:
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_chat_stream_chunk(data: dict) -> StreamChunk:
|
|
26
|
+
"""解析 /v1/chat/completions 的流式 chunk"""
|
|
27
|
+
text = ""
|
|
28
|
+
usage = None
|
|
29
|
+
finish = ""
|
|
30
|
+
|
|
31
|
+
choices = data.get("choices", [])
|
|
32
|
+
if choices:
|
|
33
|
+
ch = choices[0]
|
|
34
|
+
delta = ch.get("delta", {})
|
|
35
|
+
text = delta.get("content", "")
|
|
36
|
+
finish = ch.get("finish_reason", "")
|
|
37
|
+
if ch.get("index") is not None:
|
|
38
|
+
finish = str(ch["index"])
|
|
39
|
+
|
|
40
|
+
if data.get("usage"):
|
|
41
|
+
u = data["usage"]
|
|
42
|
+
usage = TokenUsage(
|
|
43
|
+
input=u.get("prompt_tokens", 0),
|
|
44
|
+
output=u.get("completion_tokens", 0),
|
|
45
|
+
total=u.get("total_tokens", 0),
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
done = finish not in ("", None) or data.get("done", False) or bool(data.get("usage"))
|
|
49
|
+
|
|
50
|
+
return StreamChunk(
|
|
51
|
+
text=text,
|
|
52
|
+
done=done,
|
|
53
|
+
tokens=usage,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def parse_gen_stream_chunk(data: dict) -> StreamChunk:
|
|
58
|
+
"""解析 /api/generate 的流式 chunk"""
|
|
59
|
+
text = data.get("response", "")
|
|
60
|
+
done = data.get("done", False)
|
|
61
|
+
|
|
62
|
+
result = StreamChunk(
|
|
63
|
+
text=text,
|
|
64
|
+
done=done,
|
|
65
|
+
total_duration=data.get("total_duration"),
|
|
66
|
+
eval_count=data.get("eval_count"),
|
|
67
|
+
eval_duration=data.get("eval_duration"),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
if done and data.get("eval_count") is not None:
|
|
71
|
+
result.tokens = TokenUsage(
|
|
72
|
+
input=data.get("prompt_eval_count", 0),
|
|
73
|
+
output=data.get("eval_count", 0),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return result
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def iter_sse_lines(body: Iterator[bytes]) -> Iterator[StreamChunk]:
|
|
80
|
+
"""同步 SSE 解析迭代器"""
|
|
81
|
+
buf = b""
|
|
82
|
+
for chunk in body:
|
|
83
|
+
buf += chunk
|
|
84
|
+
while b"\n" in buf:
|
|
85
|
+
line, buf = buf.split(b"\n", 1)
|
|
86
|
+
raw = line.decode("utf-8", errors="replace")
|
|
87
|
+
parsed = parse_sse_line(raw)
|
|
88
|
+
if parsed:
|
|
89
|
+
yield parse_gen_stream_chunk(parsed)
|
|
90
|
+
if parsed.get("done"):
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
async def iter_sse_lines_async(body: AsyncIterator[bytes]) -> AsyncIterator[StreamChunk]:
|
|
95
|
+
"""异步 SSE 解析迭代器"""
|
|
96
|
+
buf = b""
|
|
97
|
+
async for chunk in body:
|
|
98
|
+
buf += chunk
|
|
99
|
+
while b"\n" in buf:
|
|
100
|
+
line, buf = buf.split(b"\n", 1)
|
|
101
|
+
raw = line.decode("utf-8", errors="replace")
|
|
102
|
+
parsed = parse_sse_line(raw)
|
|
103
|
+
if parsed:
|
|
104
|
+
yield parse_gen_stream_chunk(parsed)
|
|
105
|
+
if parsed.get("done"):
|
|
106
|
+
return
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: uip-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: UIP — Universal Inference Platform Python SDK
|
|
5
|
+
Author-email: Zhu Wenbo <zwb.2002@tsinghua.org.cn>
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Intended Audience :: Developers
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
|
+
Requires-Python: >=3.10
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
Requires-Dist: httpx>=0.27
|
|
15
|
+
|
|
16
|
+
# UIP Python SDK
|
|
17
|
+
|
|
18
|
+
Universal Inference Platform 的 Python 客户端库。
|
|
19
|
+
|
|
20
|
+
## 安装
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install uip-sdk
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## 快速开始
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from uip_sdk import UIPClient
|
|
30
|
+
|
|
31
|
+
# 方式 1: API Key
|
|
32
|
+
client = UIPClient(api_key="ggw-xxx...")
|
|
33
|
+
|
|
34
|
+
# 方式 2: JWT Token
|
|
35
|
+
client = UIPClient(token="eyJhbGciOiJIUzI1NiIs...")
|
|
36
|
+
|
|
37
|
+
# 方式 3: 环境变量 (UIP_API_KEY)
|
|
38
|
+
client = UIPClient()
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## 使用示例
|
|
42
|
+
|
|
43
|
+
### 对话 (Chat Completions)
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
resp = client.chat(
|
|
47
|
+
messages=[{"role": "user", "content": "你好"}],
|
|
48
|
+
model="qwen2.5:7b",
|
|
49
|
+
)
|
|
50
|
+
print(resp.text)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### 流式生成
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
for chunk in client.generate("写一首关于春天的诗", stream=True):
|
|
57
|
+
print(chunk.text, end="", flush=True)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Rerank (文档重排序)
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
results = client.rerank(
|
|
64
|
+
query="CBA季后赛战术分析",
|
|
65
|
+
documents=[
|
|
66
|
+
"CBA联赛采用胜率决定排名",
|
|
67
|
+
"篮球三分线距离为6.75米",
|
|
68
|
+
"广东队采用全场紧逼战术",
|
|
69
|
+
],
|
|
70
|
+
model="Qwen3-Reranker-0.6B",
|
|
71
|
+
top_n=2,
|
|
72
|
+
)
|
|
73
|
+
for r in results.results:
|
|
74
|
+
print(f"#{r.index}: {r.document[:30]}... score={r.relevance_score:.2f}")
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 批量推理
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
batch = client.batch(
|
|
81
|
+
prompts=["你好", "介绍你自己"],
|
|
82
|
+
model="qwen2.5:7b",
|
|
83
|
+
)
|
|
84
|
+
for item in batch.results:
|
|
85
|
+
print(f"[{item.index}] {item.response[:50]}")
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### 指定调度策略
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
client.with_strategy("least_queue").generate("hi")
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### 嵌入向量
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
resp = client.embed(input="需要向量化的文本", model="bge-m3:567m")
|
|
98
|
+
print(len(resp.embedding)) # 768
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## License
|
|
102
|
+
|
|
103
|
+
MIT License. Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn).
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
uip_sdk/__init__.py
|
|
4
|
+
uip_sdk/client.py
|
|
5
|
+
uip_sdk/errors.py
|
|
6
|
+
uip_sdk/models.py
|
|
7
|
+
uip_sdk/stream.py
|
|
8
|
+
uip_sdk.egg-info/PKG-INFO
|
|
9
|
+
uip_sdk.egg-info/SOURCES.txt
|
|
10
|
+
uip_sdk.egg-info/dependency_links.txt
|
|
11
|
+
uip_sdk.egg-info/requires.txt
|
|
12
|
+
uip_sdk.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
httpx>=0.27
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
uip_sdk
|