uip-sdk 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
uip_sdk-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,103 @@
1
+ Metadata-Version: 2.4
2
+ Name: uip-sdk
3
+ Version: 0.1.0
4
+ Summary: UIP — Universal Inference Platform Python SDK
5
+ Author-email: Zhu Wenbo <zwb.2002@tsinghua.org.cn>
6
+ License: MIT
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: httpx>=0.27
15
+
16
+ # UIP Python SDK
17
+
18
+ Universal Inference Platform 的 Python 客户端库。
19
+
20
+ ## 安装
21
+
22
+ ```bash
23
+ pip install uip-sdk
24
+ ```
25
+
26
+ ## 快速开始
27
+
28
+ ```python
29
+ from uip_sdk import UIPClient
30
+
31
+ # 方式 1: API Key
32
+ client = UIPClient(api_key="ggw-xxx...")
33
+
34
+ # 方式 2: JWT Token
35
+ client = UIPClient(token="eyJhbGciOiJIUzI1NiIs...")
36
+
37
+ # 方式 3: 环境变量 (UIP_API_KEY)
38
+ client = UIPClient()
39
+ ```
40
+
41
+ ## 使用示例
42
+
43
+ ### 对话 (Chat Completions)
44
+
45
+ ```python
46
+ resp = client.chat(
47
+ messages=[{"role": "user", "content": "你好"}],
48
+ model="qwen2.5:7b",
49
+ )
50
+ print(resp.text)
51
+ ```
52
+
53
+ ### 流式生成
54
+
55
+ ```python
56
+ for chunk in client.generate("写一首关于春天的诗", stream=True):
57
+ print(chunk.text, end="", flush=True)
58
+ ```
59
+
60
+ ### Rerank (文档重排序)
61
+
62
+ ```python
63
+ results = client.rerank(
64
+ query="CBA季后赛战术分析",
65
+ documents=[
66
+ "CBA联赛采用胜率决定排名",
67
+ "篮球三分线距离为6.75米",
68
+ "广东队采用全场紧逼战术",
69
+ ],
70
+ model="Qwen3-Reranker-0.6B",
71
+ top_n=2,
72
+ )
73
+ for r in results.results:
74
+ print(f"#{r.index}: {r.document[:30]}... score={r.relevance_score:.2f}")
75
+ ```
76
+
77
+ ### 批量推理
78
+
79
+ ```python
80
+ batch = client.batch(
81
+ prompts=["你好", "介绍你自己"],
82
+ model="qwen2.5:7b",
83
+ )
84
+ for item in batch.results:
85
+ print(f"[{item.index}] {item.response[:50]}")
86
+ ```
87
+
88
+ ### 指定调度策略
89
+
90
+ ```python
91
+ client.with_strategy("least_queue").generate("hi")
92
+ ```
93
+
94
+ ### 嵌入向量
95
+
96
+ ```python
97
+ resp = client.embed(input="需要向量化的文本", model="bge-m3:567m")
98
+ print(len(resp.embedding)) # 768
99
+ ```
100
+
101
+ ## License
102
+
103
+ MIT License. Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn).
@@ -0,0 +1,88 @@
1
+ # UIP Python SDK
2
+
3
+ Universal Inference Platform 的 Python 客户端库。
4
+
5
+ ## 安装
6
+
7
+ ```bash
8
+ pip install uip-sdk
9
+ ```
10
+
11
+ ## 快速开始
12
+
13
+ ```python
14
+ from uip_sdk import UIPClient
15
+
16
+ # 方式 1: API Key
17
+ client = UIPClient(api_key="ggw-xxx...")
18
+
19
+ # 方式 2: JWT Token
20
+ client = UIPClient(token="eyJhbGciOiJIUzI1NiIs...")
21
+
22
+ # 方式 3: 环境变量 (UIP_API_KEY)
23
+ client = UIPClient()
24
+ ```
25
+
26
+ ## 使用示例
27
+
28
+ ### 对话 (Chat Completions)
29
+
30
+ ```python
31
+ resp = client.chat(
32
+ messages=[{"role": "user", "content": "你好"}],
33
+ model="qwen2.5:7b",
34
+ )
35
+ print(resp.text)
36
+ ```
37
+
38
+ ### 流式生成
39
+
40
+ ```python
41
+ for chunk in client.generate("写一首关于春天的诗", stream=True):
42
+ print(chunk.text, end="", flush=True)
43
+ ```
44
+
45
+ ### Rerank (文档重排序)
46
+
47
+ ```python
48
+ results = client.rerank(
49
+ query="CBA季后赛战术分析",
50
+ documents=[
51
+ "CBA联赛采用胜率决定排名",
52
+ "篮球三分线距离为6.75米",
53
+ "广东队采用全场紧逼战术",
54
+ ],
55
+ model="Qwen3-Reranker-0.6B",
56
+ top_n=2,
57
+ )
58
+ for r in results.results:
59
+ print(f"#{r.index}: {r.document[:30]}... score={r.relevance_score:.2f}")
60
+ ```
61
+
62
+ ### 批量推理
63
+
64
+ ```python
65
+ batch = client.batch(
66
+ prompts=["你好", "介绍你自己"],
67
+ model="qwen2.5:7b",
68
+ )
69
+ for item in batch.results:
70
+ print(f"[{item.index}] {item.response[:50]}")
71
+ ```
72
+
73
+ ### 指定调度策略
74
+
75
+ ```python
76
+ client.with_strategy("least_queue").generate("hi")
77
+ ```
78
+
79
+ ### 嵌入向量
80
+
81
+ ```python
82
+ resp = client.embed(input="需要向量化的文本", model="bge-m3:567m")
83
+ print(len(resp.embedding)) # 768
84
+ ```
85
+
86
+ ## License
87
+
88
+ MIT License. Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn).
@@ -0,0 +1,23 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "uip-sdk"
7
+ version = "0.1.0"
8
+ description = "UIP — Universal Inference Platform Python SDK"
9
+ readme = "README.md"
10
+ authors = [{name = "Zhu Wenbo", email = "zwb.2002@tsinghua.org.cn"}]
11
+ license = {text = "MIT"}
12
+ requires-python = ">=3.10"
13
+ dependencies = ["httpx>=0.27"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
20
+ ]
21
+
22
+ [tool.setuptools.packages.find]
23
+ include = ["uip_sdk*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,52 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn). Licensed under the MIT License.
3
+ """
4
+ UIP Python SDK — Universal Inference Platform Client Library
5
+
6
+ Usage:
7
+ from uip_sdk import UIPClient
8
+
9
+ client = UIPClient(base_url="http://10.0.1.115:11438", api_key="ggw-xxx")
10
+ for chunk in client.generate("你好", stream=True):
11
+ print(chunk.text, end="")
12
+ """
13
+
14
+ from .client import UIPClient
15
+ from .models import (
16
+ ChatResponse,
17
+ GenResponse,
18
+ EmbedResponse,
19
+ RerankResponse,
20
+ BatchResponse,
21
+ UploadResponse,
22
+ ModelItem,
23
+ StreamChunk,
24
+ TokenUsage,
25
+ )
26
+ from .errors import (
27
+ UIPError,
28
+ AuthenticationError,
29
+ InsufficientBalanceError,
30
+ UIGOfflineError,
31
+ TimeoutError,
32
+ RateLimitError,
33
+ )
34
+
35
+ __all__ = [
36
+ "UIPClient",
37
+ "ChatResponse",
38
+ "GenResponse",
39
+ "EmbedResponse",
40
+ "RerankResponse",
41
+ "BatchResponse",
42
+ "UploadResponse",
43
+ "ModelItem",
44
+ "StreamChunk",
45
+ "TokenUsage",
46
+ "UIPError",
47
+ "AuthenticationError",
48
+ "InsufficientBalanceError",
49
+ "UIGOfflineError",
50
+ "TimeoutError",
51
+ "RateLimitError",
52
+ ]
@@ -0,0 +1,437 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn). Licensed under the MIT License.
3
+ """UIP SDK — 核心客户端"""
4
+
5
+ import json
6
+ import os
7
+ import time
8
+ from typing import Any, AsyncIterator, Iterator, Optional, Union
9
+
10
+ import httpx
11
+
12
+ from .errors import (
13
+ AuthenticationError,
14
+ InsufficientBalanceError,
15
+ NotFoundError,
16
+ RateLimitError,
17
+ ServerError,
18
+ TimeoutError,
19
+ UIGOfflineError,
20
+ UIPError,
21
+ )
22
+ from .models import (
23
+ BatchResponse,
24
+ BatchResult,
25
+ ChatResponse,
26
+ EmbedResponse,
27
+ GenResponse,
28
+ ModelItem,
29
+ RerankItem,
30
+ RerankResponse,
31
+ StreamChunk,
32
+ TokenUsage,
33
+ UploadResponse,
34
+ )
35
+ from .stream import iter_sse_lines
36
+
37
+
38
+ class UIPClient:
39
+ """UIP API 客户端
40
+
41
+ Args:
42
+ base_url: UIP API 地址 (默认 http://10.0.1.115:11438)
43
+ api_key: API Key (与 token 二选一)
44
+ token: JWT Token (与 api_key 二选一)
45
+ timeout: HTTP 超时秒数 (默认 300)
46
+ strategy: 调度策略 (可选,如 "least_queue")
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ base_url: str = "http://10.0.1.115:11438",
52
+ api_key: Optional[str] = None,
53
+ token: Optional[str] = None,
54
+ timeout: int = 300,
55
+ strategy: Optional[str] = None,
56
+ ):
57
+ self.base_url = base_url.rstrip("/")
58
+ self._api_key = api_key or os.environ.get("UIP_API_KEY", "")
59
+ self._token = token or os.environ.get("UIP_TOKEN", "")
60
+ self.timeout = timeout
61
+ self._strategy = strategy
62
+
63
+ if not self._api_key and not self._token:
64
+ # 尝试从 UIP_API_KEY 环境变量读取
65
+ self._api_key = os.environ.get("UIP_API_KEY", "")
66
+ if not self._api_key and not self._token:
67
+ raise AuthenticationError(
68
+ "需要提供 api_key 或 token。"
69
+ "可通过参数传入或设置 UIP_API_KEY 环境变量。"
70
+ )
71
+
72
+ self._client = httpx.Client(timeout=httpx.Timeout(self.timeout), base_url=self.base_url)
73
+
74
+ # ─── 认证头 ─────────────────────────────────────────────────────
75
+
76
+ def _headers(self) -> dict:
77
+ h = {}
78
+ if self._token:
79
+ h["Authorization"] = f"Bearer {self._token}"
80
+ elif self._api_key:
81
+ h["x-api-key"] = self._api_key
82
+ if self._strategy:
83
+ h["X-UIP-Strategy"] = self._strategy
84
+ return h
85
+
86
+ def _raise_on_error(self, resp: httpx.Response) -> None:
87
+ if resp.status_code < 400:
88
+ return
89
+ detail = ""
90
+ try:
91
+ body = resp.json()
92
+ detail = body.get("detail", resp.text[:200])
93
+ except Exception:
94
+ detail = resp.text[:200]
95
+
96
+ if resp.status_code == 401:
97
+ raise AuthenticationError("认证失败,请检查 api_key 或 token", 401, detail)
98
+ elif resp.status_code == 402:
99
+ raise InsufficientBalanceError("余额不足", 402, detail)
100
+ elif resp.status_code == 404:
101
+ raise NotFoundError("资源不存在", 404, detail)
102
+ elif resp.status_code == 429:
103
+ raise RateLimitError("请求频率限制", 429, detail)
104
+ elif resp.status_code == 503:
105
+ raise UIGOfflineError("UIG 全部离线", 503, detail)
106
+ elif resp.status_code == 504:
107
+ raise TimeoutError("请求超时", 504, detail)
108
+ elif resp.status_code >= 500:
109
+ raise ServerError(f"服务端错误", resp.status_code, detail)
110
+ else:
111
+ raise UIPError(f"请求失败", resp.status_code, detail)
112
+
113
+ def _post(self, endpoint: str, json_body: dict) -> httpx.Response:
114
+ url = f"{self.base_url}/{endpoint.lstrip('/')}"
115
+ resp = self._client.post(url, json=json_body, headers=self._headers())
116
+ self._raise_on_error(resp)
117
+ return resp
118
+
119
+ def _post_stream(self, endpoint: str, json_body: dict) -> httpx.Response:
120
+ url = f"{self.base_url}/{endpoint.lstrip('/')}"
121
+ resp = self._client.post(
122
+ url, json=json_body,
123
+ headers={**self._headers(), "Accept": "text/event-stream"},
124
+ )
125
+ self._raise_on_error(resp)
126
+ return resp
127
+
128
+ # ─── 上下文管理器 ─────────────────────────────────────────────
129
+
130
+ def close(self):
131
+ self._client.close()
132
+
133
+ def __enter__(self):
134
+ return self
135
+
136
+ def __exit__(self, *args):
137
+ self.close()
138
+
139
+ # ─── Chat Completions ──────────────────────────────────────────
140
+
141
+ def chat(
142
+ self,
143
+ messages: list[dict],
144
+ model: str = "qwen2.5:7b",
145
+ stream: bool = False,
146
+ temperature: float = 0.7,
147
+ max_tokens: Optional[int] = None,
148
+ top_p: float = 1.0,
149
+ **kwargs,
150
+ ) -> Union[ChatResponse, Iterator[StreamChunk]]:
151
+ """OpenAI 兼容 Chat API
152
+
153
+ Args:
154
+ messages: 消息列表 [{"role":"user","content":"hi"}]
155
+ model: 模型名
156
+ stream: 是否流式
157
+ temperature: 温度 (0-2)
158
+ max_tokens: 最大输出 Token
159
+
160
+ Returns:
161
+ 非流式 → ChatResponse
162
+ 流式 → Iterator[StreamChunk]
163
+ """
164
+ body = {
165
+ "model": model,
166
+ "messages": messages,
167
+ "stream": stream,
168
+ "temperature": temperature,
169
+ **kwargs,
170
+ }
171
+ if max_tokens is not None:
172
+ body["max_tokens"] = max_tokens
173
+ if top_p != 1.0:
174
+ body["top_p"] = top_p
175
+
176
+ if stream:
177
+ return self._stream_chat(body)
178
+
179
+ resp = self._post("api/v1/chat/completions", body)
180
+ data = resp.json()
181
+ choice = data.get("choices", [{}])[0]
182
+ usage = data.get("usage", {})
183
+ return ChatResponse(
184
+ text=choice.get("message", {}).get("content", ""),
185
+ model=data.get("model", model),
186
+ tokens=TokenUsage(
187
+ input=usage.get("prompt_tokens", 0),
188
+ output=usage.get("completion_tokens", 0),
189
+ total=usage.get("total_tokens", 0),
190
+ ),
191
+ finish_reason=choice.get("finish_reason", ""),
192
+ raw=data,
193
+ )
194
+
195
+ def _stream_chat(self, body: dict) -> Iterator[StreamChunk]:
196
+ resp = self._post_stream("api/v1/chat/completions", body)
197
+ from .stream import parse_chat_stream_chunk, parse_sse_line
198
+ buf = b""
199
+ for chunk in resp.iter_bytes():
200
+ buf += chunk
201
+ while b"\n" in buf:
202
+ line, buf = buf.split(b"\n", 1)
203
+ raw = line.decode("utf-8", errors="replace").strip()
204
+ if not raw:
205
+ continue
206
+ parsed = parse_sse_line(raw)
207
+ if not parsed:
208
+ continue
209
+ if parsed.get("done"):
210
+ return
211
+ yield parse_chat_stream_chunk(parsed)
212
+
213
+ # ─── Generate ─────────────────────────────────────────────────
214
+
215
+ def generate(
216
+ self,
217
+ prompt: str,
218
+ model: str = "qwen2.5:7b",
219
+ system: str = "",
220
+ stream: bool = False,
221
+ options: Optional[dict] = None,
222
+ ) -> Union[GenResponse, Iterator[StreamChunk]]:
223
+ """Ollama 兼容生成 API
224
+
225
+ Args:
226
+ prompt: 输入提示
227
+ model: 模型名
228
+ system: 系统提示词
229
+ stream: 是否流式
230
+ options: Ollama 选项 (如 {"num_predict": 100})
231
+
232
+ Returns:
233
+ 非流式 → GenResponse
234
+ 流式 → Iterator[StreamChunk]
235
+ """
236
+ body = {"model": model, "prompt": prompt, "stream": stream}
237
+ if system:
238
+ body["system"] = system
239
+ if options:
240
+ body["options"] = options
241
+
242
+ if stream:
243
+ return self._stream_generate(body)
244
+
245
+ t0 = time.monotonic()
246
+ resp = self._post("api/v1/generate", body)
247
+ elapsed_ms = int((time.monotonic() - t0) * 1000)
248
+ data = resp.json()
249
+ return GenResponse(
250
+ text=data.get("response", ""),
251
+ thinking=data.get("thinking", ""),
252
+ model=data.get("model", model),
253
+ done=data.get("done", True),
254
+ done_reason=data.get("done_reason", ""),
255
+ tokens=TokenUsage(
256
+ input=data.get("prompt_eval_count", 0),
257
+ output=data.get("eval_count", 0),
258
+ ),
259
+ total_duration=data.get("total_duration"),
260
+ load_duration=data.get("load_duration"),
261
+ prompt_eval_count=data.get("prompt_eval_count"),
262
+ eval_count=data.get("eval_count"),
263
+ eval_duration=data.get("eval_duration"),
264
+ elapsed_ms=elapsed_ms,
265
+ raw=data,
266
+ )
267
+
268
+ def _stream_generate(self, body: dict) -> Iterator[StreamChunk]:
269
+ resp = self._post_stream("api/v1/generate", body)
270
+ yield from iter_sse_lines(resp.iter_bytes())
271
+
272
+ # ─── Embeddings ───────────────────────────────────────────────
273
+
274
+ def embed(
275
+ self,
276
+ input: Union[str, list[str]],
277
+ model: str = "bge-m3:567m",
278
+ ) -> EmbedResponse:
279
+ """文本向量化"""
280
+ body = {"model": model, "input": input}
281
+ resp = self._post("api/v1/embeddings", body)
282
+ data = resp.json()
283
+
284
+ usage = data.get("usage", {})
285
+ embedding = data.get("data", [{}])[0].get("embedding", data.get("embedding", []))
286
+ if not embedding:
287
+ embedding = data.get("embeddings", [[]])[0]
288
+
289
+ return EmbedResponse(
290
+ embedding=embedding,
291
+ model=data.get("model", model),
292
+ tokens=TokenUsage(
293
+ input=usage.get("prompt_tokens", usage.get("total_tokens", 0)),
294
+ output=0,
295
+ total=usage.get("total_tokens", 0),
296
+ ),
297
+ raw=data,
298
+ )
299
+
300
+ # ─── Rerank ───────────────────────────────────────────────────
301
+
302
+ def rerank(
303
+ self,
304
+ query: str,
305
+ documents: list[str],
306
+ model: str = "Qwen3-Reranker-0.6B",
307
+ top_n: Optional[int] = None,
308
+ ) -> RerankResponse:
309
+ """自定义 RerankZ 接口 — 文档重排序
310
+
311
+ Args:
312
+ query: 查询语句
313
+ documents: 待排序文档列表
314
+ model: Rerank 模型名
315
+ top_n: 返回前 N 条 (默认全部)
316
+ """
317
+ body = {"model": model, "query": query, "documents": documents}
318
+ if top_n is not None:
319
+ body["top_n"] = top_n
320
+
321
+ t0 = time.monotonic()
322
+ resp = self._post("rerank", body)
323
+ elapsed_ms = int((time.monotonic() - t0) * 1000)
324
+ data = resp.json()
325
+
326
+ results = []
327
+ for item in data.get("results", []):
328
+ results.append(RerankItem(
329
+ index=item.get("index", 0),
330
+ document=item.get("document", ""),
331
+ relevance_score=item.get("relevance_score", 0.0),
332
+ ))
333
+
334
+ return RerankResponse(
335
+ results=results,
336
+ total=data.get("total", len(documents)),
337
+ model=data.get("model", model),
338
+ elapsed_ms=data.get("elapsed_ms", elapsed_ms),
339
+ raw=data,
340
+ )
341
+
342
+ # ─── Batch ────────────────────────────────────────────────────
343
+
344
+ def batch(
345
+ self,
346
+ prompts: list[str],
347
+ model: str = "qwen2.5:7b",
348
+ system: str = "",
349
+ ) -> BatchResponse:
350
+ """批量推理 — 并发转发多个 prompt"""
351
+ body = {"model": model, "prompts": prompts, "stream": False}
352
+ if system:
353
+ body["system"] = system
354
+
355
+ resp = self._post("api/v1/batch/completions", body)
356
+ data = resp.json()
357
+
358
+ results = []
359
+ for item in data.get("results", []):
360
+ results.append(BatchResult(
361
+ index=item.get("index", 0),
362
+ prompt=item.get("prompt", ""),
363
+ response=item.get("response", ""),
364
+ error=item.get("error"),
365
+ ))
366
+
367
+ return BatchResponse(
368
+ total=data.get("total", 0),
369
+ completed=data.get("completed", 0),
370
+ errors=data.get("errors", 0),
371
+ elapsed_ms=data.get("elapsed_ms", 0),
372
+ results=results,
373
+ model=data.get("model", model),
374
+ raw=data,
375
+ )
376
+
377
+ # ─── Upload ───────────────────────────────────────────────────
378
+
379
+ def upload(self, file_path: str) -> UploadResponse:
380
+ """上传文件
381
+
382
+ Args:
383
+ file_path: 本地文件路径
384
+ """
385
+ import os as _os
386
+ filename = _os.path.basename(file_path)
387
+ with open(file_path, "rb") as f:
388
+ files = {"file": (filename, f)}
389
+ url = f"{self.base_url}/api/v1/upload"
390
+ resp = httpx.post(url, files=files, headers=self._headers(), timeout=self.timeout)
391
+ self._raise_on_error(resp)
392
+
393
+ data = resp.json()
394
+ return UploadResponse(
395
+ filename=data.get("filename", ""),
396
+ original_name=data.get("original_name", filename),
397
+ size=data.get("size", 0),
398
+ url=data.get("url", ""),
399
+ content_type=data.get("content_type", ""),
400
+ raw=data,
401
+ )
402
+
403
+ # ─── Models ───────────────────────────────────────────────────
404
+
405
+ def list_models(self) -> list[ModelItem]:
406
+ """获取可用模型列表"""
407
+ resp = self._post("api/v1/models", {})
408
+ data = resp.json()
409
+ items = []
410
+ for m in data.get("data", []):
411
+ items.append(ModelItem(
412
+ id=m.get("id", ""),
413
+ created=m.get("created", 0),
414
+ owned_by=m.get("owned_by", "uip"),
415
+ ))
416
+ return items
417
+
418
+ # ─── Strategy ─────────────────────────────────────────────────
419
+
420
+ def with_strategy(self, strategy: str) -> "UIPClient":
421
+ """指定调度策略 (链式调用)
422
+
423
+ Args:
424
+ strategy: model_first / least_queue / weighted_rr / affinity
425
+
426
+ Returns:
427
+ self (支持链式调用)
428
+ """
429
+ self._strategy = strategy
430
+ return self
431
+
432
+ # ─── Health ───────────────────────────────────────────────────
433
+
434
+ def ping(self) -> dict:
435
+ """健康检查"""
436
+ resp = self._post("api/v1/ping", {})
437
+ return resp.json()
@@ -0,0 +1,46 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn). Licensed under the MIT License.
3
+ """UIP SDK — 异常层级"""
4
+
5
+
6
+ class UIPError(Exception):
7
+ """UIP SDK 基础异常"""
8
+ def __init__(self, message: str, status_code: int = 0, detail: str = ""):
9
+ self.status_code = status_code
10
+ self.detail = detail
11
+ super().__init__(f"[{status_code}] {message}" if status_code else message)
12
+
13
+
14
+ class AuthenticationError(UIPError):
15
+ """认证失败 (401)"""
16
+ pass
17
+
18
+
19
+ class InsufficientBalanceError(UIPError):
20
+ """余额不足 (402)"""
21
+ pass
22
+
23
+
24
+ class UIGOfflineError(UIPError):
25
+ """UIG 全部离线 (503)"""
26
+ pass
27
+
28
+
29
+ class TimeoutError(UIPError):
30
+ """请求超时 (504)"""
31
+ pass
32
+
33
+
34
+ class RateLimitError(UIPError):
35
+ """频率限制 (429)"""
36
+ pass
37
+
38
+
39
+ class NotFoundError(UIPError):
40
+ """资源不存在 (404)"""
41
+ pass
42
+
43
+
44
+ class ServerError(UIPError):
45
+ """服务端错误 (5xx)"""
46
+ pass
@@ -0,0 +1,122 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn). Licensed under the MIT License.
3
+ """UIP SDK — 数据结构模型"""
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Optional
7
+
8
+
9
+ @dataclass
10
+ class TokenUsage:
11
+ """Token 用量"""
12
+ input: int = 0
13
+ output: int = 0
14
+ total: int = 0
15
+
16
+
17
+ @dataclass
18
+ class StreamChunk:
19
+ """流式响应片段"""
20
+ text: str = ""
21
+ done: bool = False
22
+ total_duration: Optional[int] = None
23
+ eval_count: Optional[int] = None
24
+ eval_duration: Optional[int] = None
25
+ tokens: Optional[TokenUsage] = None
26
+ index: Optional[int] = None # chat completions 流式索引
27
+
28
+
29
+ @dataclass
30
+ class ChatResponse:
31
+ """Chat completions 响应"""
32
+ text: str = ""
33
+ model: str = ""
34
+ tokens: TokenUsage = field(default_factory=TokenUsage)
35
+ elapsed_ms: int = 0
36
+ finish_reason: str = ""
37
+ raw: dict = field(default_factory=dict)
38
+
39
+
40
+ @dataclass
41
+ class GenResponse:
42
+ """Generate 响应"""
43
+ text: str = ""
44
+ thinking: str = ""
45
+ model: str = ""
46
+ done: bool = True
47
+ done_reason: str = ""
48
+ tokens: TokenUsage = field(default_factory=TokenUsage)
49
+ total_duration: Optional[int] = None
50
+ load_duration: Optional[int] = None
51
+ prompt_eval_count: Optional[int] = None
52
+ eval_count: Optional[int] = None
53
+ eval_duration: Optional[int] = None
54
+ elapsed_ms: int = 0
55
+ raw: dict = field(default_factory=dict)
56
+
57
+
58
+ @dataclass
59
+ class RerankItem:
60
+ """Rerank 单条结果"""
61
+ index: int = 0
62
+ document: str = ""
63
+ relevance_score: float = 0.0
64
+
65
+
66
+ @dataclass
67
+ class RerankResponse:
68
+ """Rerank 响应"""
69
+ results: list[RerankItem] = field(default_factory=list)
70
+ total: int = 0
71
+ model: str = ""
72
+ elapsed_ms: float = 0.0
73
+ raw: dict = field(default_factory=dict)
74
+
75
+
76
+ @dataclass
77
+ class EmbedResponse:
78
+ """Embedding 响应"""
79
+ embedding: list[float] = field(default_factory=list)
80
+ model: str = ""
81
+ tokens: TokenUsage = field(default_factory=TokenUsage)
82
+ raw: dict = field(default_factory=dict)
83
+
84
+
85
+ @dataclass
86
+ class BatchResult:
87
+ """批量推理单条结果"""
88
+ index: int = 0
89
+ prompt: str = ""
90
+ response: str = ""
91
+ error: Optional[str] = None
92
+
93
+
94
+ @dataclass
95
+ class BatchResponse:
96
+ """批量推理响应"""
97
+ total: int = 0
98
+ completed: int = 0
99
+ errors: int = 0
100
+ elapsed_ms: int = 0
101
+ results: list[BatchResult] = field(default_factory=list)
102
+ model: str = ""
103
+ raw: dict = field(default_factory=dict)
104
+
105
+
106
+ @dataclass
107
+ class UploadResponse:
108
+ """文件上传响应"""
109
+ filename: str = ""
110
+ original_name: str = ""
111
+ size: int = 0
112
+ url: str = ""
113
+ content_type: str = ""
114
+ raw: dict = field(default_factory=dict)
115
+
116
+
117
+ @dataclass
118
+ class ModelItem:
119
+ """模型列表项"""
120
+ id: str = ""
121
+ created: int = 0
122
+ owned_by: str = "uip"
@@ -0,0 +1,106 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn). Licensed under the MIT License.
3
+ """UIP SDK — SSE 流解析器"""
4
+
5
+ import json
6
+ from typing import AsyncIterator, Iterator, Optional
7
+
8
+ from .models import StreamChunk, TokenUsage
9
+
10
+
11
+ def parse_sse_line(line: str) -> Optional[dict]:
12
+ """解析单行 SSE 数据"""
13
+ line = line.strip()
14
+ if not line or not line.startswith("data: "):
15
+ return None
16
+ data = line[6:] # 去掉 "data: " 前缀
17
+ if data.strip() == "[DONE]":
18
+ return {"done": True}
19
+ try:
20
+ return json.loads(data)
21
+ except json.JSONDecodeError:
22
+ return None
23
+
24
+
25
+ def parse_chat_stream_chunk(data: dict) -> StreamChunk:
26
+ """解析 /v1/chat/completions 的流式 chunk"""
27
+ text = ""
28
+ usage = None
29
+ finish = ""
30
+
31
+ choices = data.get("choices", [])
32
+ if choices:
33
+ ch = choices[0]
34
+ delta = ch.get("delta", {})
35
+ text = delta.get("content", "")
36
+ finish = ch.get("finish_reason", "")
37
+ if ch.get("index") is not None:
38
+ finish = str(ch["index"])
39
+
40
+ if data.get("usage"):
41
+ u = data["usage"]
42
+ usage = TokenUsage(
43
+ input=u.get("prompt_tokens", 0),
44
+ output=u.get("completion_tokens", 0),
45
+ total=u.get("total_tokens", 0),
46
+ )
47
+
48
+ done = finish not in ("", None) or data.get("done", False) or bool(data.get("usage"))
49
+
50
+ return StreamChunk(
51
+ text=text,
52
+ done=done,
53
+ tokens=usage,
54
+ )
55
+
56
+
57
+ def parse_gen_stream_chunk(data: dict) -> StreamChunk:
58
+ """解析 /api/generate 的流式 chunk"""
59
+ text = data.get("response", "")
60
+ done = data.get("done", False)
61
+
62
+ result = StreamChunk(
63
+ text=text,
64
+ done=done,
65
+ total_duration=data.get("total_duration"),
66
+ eval_count=data.get("eval_count"),
67
+ eval_duration=data.get("eval_duration"),
68
+ )
69
+
70
+ if done and data.get("eval_count") is not None:
71
+ result.tokens = TokenUsage(
72
+ input=data.get("prompt_eval_count", 0),
73
+ output=data.get("eval_count", 0),
74
+ )
75
+
76
+ return result
77
+
78
+
79
+ def iter_sse_lines(body: Iterator[bytes]) -> Iterator[StreamChunk]:
80
+ """同步 SSE 解析迭代器"""
81
+ buf = b""
82
+ for chunk in body:
83
+ buf += chunk
84
+ while b"\n" in buf:
85
+ line, buf = buf.split(b"\n", 1)
86
+ raw = line.decode("utf-8", errors="replace")
87
+ parsed = parse_sse_line(raw)
88
+ if parsed:
89
+ yield parse_gen_stream_chunk(parsed)
90
+ if parsed.get("done"):
91
+ return
92
+
93
+
94
+ async def iter_sse_lines_async(body: AsyncIterator[bytes]) -> AsyncIterator[StreamChunk]:
95
+ """异步 SSE 解析迭代器"""
96
+ buf = b""
97
+ async for chunk in body:
98
+ buf += chunk
99
+ while b"\n" in buf:
100
+ line, buf = buf.split(b"\n", 1)
101
+ raw = line.decode("utf-8", errors="replace")
102
+ parsed = parse_sse_line(raw)
103
+ if parsed:
104
+ yield parse_gen_stream_chunk(parsed)
105
+ if parsed.get("done"):
106
+ return
@@ -0,0 +1,103 @@
1
+ Metadata-Version: 2.4
2
+ Name: uip-sdk
3
+ Version: 0.1.0
4
+ Summary: UIP — Universal Inference Platform Python SDK
5
+ Author-email: Zhu Wenbo <zwb.2002@tsinghua.org.cn>
6
+ License: MIT
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: httpx>=0.27
15
+
16
+ # UIP Python SDK
17
+
18
+ Universal Inference Platform 的 Python 客户端库。
19
+
20
+ ## 安装
21
+
22
+ ```bash
23
+ pip install uip-sdk
24
+ ```
25
+
26
+ ## 快速开始
27
+
28
+ ```python
29
+ from uip_sdk import UIPClient
30
+
31
+ # 方式 1: API Key
32
+ client = UIPClient(api_key="ggw-xxx...")
33
+
34
+ # 方式 2: JWT Token
35
+ client = UIPClient(token="eyJhbGciOiJIUzI1NiIs...")
36
+
37
+ # 方式 3: 环境变量 (UIP_API_KEY)
38
+ client = UIPClient()
39
+ ```
40
+
41
+ ## 使用示例
42
+
43
+ ### 对话 (Chat Completions)
44
+
45
+ ```python
46
+ resp = client.chat(
47
+ messages=[{"role": "user", "content": "你好"}],
48
+ model="qwen2.5:7b",
49
+ )
50
+ print(resp.text)
51
+ ```
52
+
53
+ ### 流式生成
54
+
55
+ ```python
56
+ for chunk in client.generate("写一首关于春天的诗", stream=True):
57
+ print(chunk.text, end="", flush=True)
58
+ ```
59
+
60
+ ### Rerank (文档重排序)
61
+
62
+ ```python
63
+ results = client.rerank(
64
+ query="CBA季后赛战术分析",
65
+ documents=[
66
+ "CBA联赛采用胜率决定排名",
67
+ "篮球三分线距离为6.75米",
68
+ "广东队采用全场紧逼战术",
69
+ ],
70
+ model="Qwen3-Reranker-0.6B",
71
+ top_n=2,
72
+ )
73
+ for r in results.results:
74
+ print(f"#{r.index}: {r.document[:30]}... score={r.relevance_score:.2f}")
75
+ ```
76
+
77
+ ### 批量推理
78
+
79
+ ```python
80
+ batch = client.batch(
81
+ prompts=["你好", "介绍你自己"],
82
+ model="qwen2.5:7b",
83
+ )
84
+ for item in batch.results:
85
+ print(f"[{item.index}] {item.response[:50]}")
86
+ ```
87
+
88
+ ### 指定调度策略
89
+
90
+ ```python
91
+ client.with_strategy("least_queue").generate("hi")
92
+ ```
93
+
94
+ ### 嵌入向量
95
+
96
+ ```python
97
+ resp = client.embed(input="需要向量化的文本", model="bge-m3:567m")
98
+ print(len(resp.embedding)) # 768
99
+ ```
100
+
101
+ ## License
102
+
103
+ MIT License. Copyright (c) 2026 Zhu Wenbo (zwb.2002@tsinghua.org.cn).
@@ -0,0 +1,12 @@
1
+ README.md
2
+ pyproject.toml
3
+ uip_sdk/__init__.py
4
+ uip_sdk/client.py
5
+ uip_sdk/errors.py
6
+ uip_sdk/models.py
7
+ uip_sdk/stream.py
8
+ uip_sdk.egg-info/PKG-INFO
9
+ uip_sdk.egg-info/SOURCES.txt
10
+ uip_sdk.egg-info/dependency_links.txt
11
+ uip_sdk.egg-info/requires.txt
12
+ uip_sdk.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ httpx>=0.27
@@ -0,0 +1 @@
1
+ uip_sdk