xparse-client 0.2.19__py3-none-any.whl → 0.3.0b8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. example/1_basic_api_usage.py +198 -0
  2. example/2_async_job.py +210 -0
  3. example/3_local_workflow.py +300 -0
  4. example/4_advanced_workflow.py +327 -0
  5. example/README.md +128 -0
  6. example/config_example.json +95 -0
  7. tests/conftest.py +310 -0
  8. tests/unit/__init__.py +1 -0
  9. tests/unit/api/__init__.py +1 -0
  10. tests/unit/api/test_extract.py +232 -0
  11. tests/unit/api/test_local.py +231 -0
  12. tests/unit/api/test_parse.py +374 -0
  13. tests/unit/api/test_pipeline.py +369 -0
  14. tests/unit/api/test_workflows.py +108 -0
  15. tests/unit/connectors/test_ftp.py +525 -0
  16. tests/unit/connectors/test_local_connectors.py +324 -0
  17. tests/unit/connectors/test_milvus.py +368 -0
  18. tests/unit/connectors/test_qdrant.py +399 -0
  19. tests/unit/connectors/test_s3.py +598 -0
  20. tests/unit/connectors/test_smb.py +442 -0
  21. tests/unit/connectors/test_utils.py +335 -0
  22. tests/unit/models/test_local.py +54 -0
  23. tests/unit/models/test_pipeline_stages.py +144 -0
  24. tests/unit/models/test_workflows.py +55 -0
  25. tests/unit/test_base.py +437 -0
  26. tests/unit/test_client.py +110 -0
  27. tests/unit/test_config.py +160 -0
  28. tests/unit/test_exceptions.py +182 -0
  29. tests/unit/test_http.py +562 -0
  30. xparse_client/__init__.py +111 -20
  31. xparse_client/_base.py +188 -0
  32. xparse_client/_client.py +218 -0
  33. xparse_client/_config.py +221 -0
  34. xparse_client/_http.py +351 -0
  35. xparse_client/api/__init__.py +14 -0
  36. xparse_client/api/extract.py +109 -0
  37. xparse_client/api/local.py +225 -0
  38. xparse_client/api/parse.py +209 -0
  39. xparse_client/api/pipeline.py +134 -0
  40. xparse_client/api/workflows.py +204 -0
  41. xparse_client/connectors/__init__.py +45 -0
  42. xparse_client/connectors/_utils.py +138 -0
  43. xparse_client/connectors/destinations/__init__.py +45 -0
  44. xparse_client/connectors/destinations/base.py +116 -0
  45. xparse_client/connectors/destinations/local.py +91 -0
  46. xparse_client/connectors/destinations/milvus.py +229 -0
  47. xparse_client/connectors/destinations/qdrant.py +238 -0
  48. xparse_client/connectors/destinations/s3.py +163 -0
  49. xparse_client/connectors/sources/__init__.py +45 -0
  50. xparse_client/connectors/sources/base.py +74 -0
  51. xparse_client/connectors/sources/ftp.py +278 -0
  52. xparse_client/connectors/sources/local.py +176 -0
  53. xparse_client/connectors/sources/s3.py +232 -0
  54. xparse_client/connectors/sources/smb.py +259 -0
  55. xparse_client/exceptions.py +398 -0
  56. xparse_client/models/__init__.py +60 -0
  57. xparse_client/models/chunk.py +39 -0
  58. xparse_client/models/embed.py +62 -0
  59. xparse_client/models/extract.py +41 -0
  60. xparse_client/models/local.py +38 -0
  61. xparse_client/models/parse.py +132 -0
  62. xparse_client/models/pipeline.py +134 -0
  63. xparse_client/models/workflows.py +74 -0
  64. xparse_client-0.3.0b8.dist-info/METADATA +1075 -0
  65. xparse_client-0.3.0b8.dist-info/RECORD +68 -0
  66. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b8.dist-info}/WHEEL +1 -1
  67. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b8.dist-info}/licenses/LICENSE +1 -1
  68. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b8.dist-info}/top_level.txt +2 -0
  69. xparse_client/pipeline/__init__.py +0 -3
  70. xparse_client/pipeline/config.py +0 -129
  71. xparse_client/pipeline/destinations.py +0 -489
  72. xparse_client/pipeline/pipeline.py +0 -690
  73. xparse_client/pipeline/sources.py +0 -583
  74. xparse_client-0.2.19.dist-info/METADATA +0 -1050
  75. xparse_client-0.2.19.dist-info/RECORD +0 -11
@@ -0,0 +1,369 @@
1
+ """Pipeline API 测试
2
+
3
+ 测试 Pipeline API 的所有功能,包括:
4
+ - parse + chunk + embed 流程
5
+ - parse + extract 流程
6
+ - stages 参数(PipelineStage 对象和字典格式)
7
+ - config 参数
8
+ - data_source 参数
9
+ - 错误处理
10
+ """
11
+
12
+ import httpx
13
+ import pytest
14
+ import respx
15
+
16
+ from xparse_client import XParseClient
17
+ from xparse_client.exceptions import ServerError, ValidationError
18
+ from xparse_client.models import (
19
+ ChunkConfig,
20
+ ChunkStage,
21
+ EmbedConfig,
22
+ EmbedStage,
23
+ ExtractConfig,
24
+ ExtractStage,
25
+ ParseConfig,
26
+ ParseStage,
27
+ PipelineConfig,
28
+ )
29
+
30
+ # ============================================================================
31
+ # 基础功能测试
32
+ # ============================================================================
33
+
34
+
35
+ def test_pipeline_api_exists():
36
+ """测试 client.pipeline 属性存在"""
37
+ client = XParseClient(app_id="test", secret_code="test")
38
+
39
+ assert hasattr(client, "pipeline")
40
+ assert client.pipeline is not None
41
+
42
+
43
+ @respx.mock
44
+ def test_pipeline_parse_chunk_embed(server_url, pipeline_parse_chunk_embed_response):
45
+ """测试 parse + chunk + embed 流程"""
46
+ # Mock API
47
+ respx.post(f"{server_url}/api/xparse/pipeline").mock(
48
+ return_value=httpx.Response(200, json=pipeline_parse_chunk_embed_response)
49
+ )
50
+
51
+ # 初始化客户端
52
+ client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
53
+
54
+ # 创建 stages
55
+ stages = [
56
+ ParseStage(config=ParseConfig(provider="textin")),
57
+ ChunkStage(config=ChunkConfig(strategy="basic", max_characters=1000)),
58
+ EmbedStage(config=EmbedConfig(provider="qwen", model_name="text-embedding-v4")),
59
+ ]
60
+
61
+ # 执行 pipeline
62
+ result = client.pipeline.execute(
63
+ file=b"test pdf content",
64
+ filename="test.pdf",
65
+ stages=stages
66
+ )
67
+
68
+ # 验证结果
69
+ assert result is not None
70
+ assert result.job_id == "job_123"
71
+ assert len(result.elements) == 1
72
+ assert result.elements[0].embeddings is not None
73
+ assert len(result.elements[0].embeddings) == 1024
74
+ assert result.intermediate_results is not None
75
+ assert len(result.intermediate_results) == 2 # parse + chunk
76
+
77
+ # 验证请求包含 stages
78
+ request = respx.calls.last.request
79
+ assert b"stages" in request.content
80
+ assert b"parse" in request.content
81
+ assert b"chunk" in request.content
82
+ assert b"embed" in request.content
83
+
84
+
85
+ @respx.mock
86
+ def test_pipeline_parse_extract(server_url, pipeline_parse_extract_response):
87
+ """测试 parse + extract 流程"""
88
+ # Mock API
89
+ respx.post(f"{server_url}/api/xparse/pipeline").mock(
90
+ return_value=httpx.Response(200, json=pipeline_parse_extract_response)
91
+ )
92
+
93
+ # 初始化客户端
94
+ client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
95
+
96
+ # 创建 stages
97
+ stages = [
98
+ ParseStage(config=ParseConfig(provider="textin")),
99
+ ExtractStage(
100
+ config=ExtractConfig(
101
+ schema_={
102
+ "type": "object",
103
+ "properties": {"购买方": {"type": "string"}},
104
+ "required": ["购买方"]
105
+ }
106
+ )
107
+ ),
108
+ ]
109
+
110
+ # 执行 pipeline
111
+ result = client.pipeline.execute(
112
+ file=b"test invoice content",
113
+ filename="invoice.pdf",
114
+ stages=stages
115
+ )
116
+
117
+ # 验证结果
118
+ assert result is not None
119
+ assert result.job_id == "job_456"
120
+ assert result.extract_result is not None
121
+ assert "extracted_schema" in result.extract_result
122
+ assert result.extract_result["extracted_schema"]["购买方"] == "杭州百世网络技术有限公司"
123
+
124
+ # 验证请求包含 stages
125
+ request = respx.calls.last.request
126
+ assert b"stages" in request.content
127
+ assert b"parse" in request.content
128
+ assert b"extract" in request.content
129
+
130
+
131
+ @respx.mock
132
+ def test_pipeline_with_dict_stages(server_url, pipeline_parse_chunk_embed_response):
133
+ """测试使用字典格式的 stages"""
134
+ # Mock API
135
+ respx.post(f"{server_url}/api/xparse/pipeline").mock(
136
+ return_value=httpx.Response(200, json=pipeline_parse_chunk_embed_response)
137
+ )
138
+
139
+ # 初始化客户端
140
+ client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
141
+
142
+ # 使用字典格式的 stages
143
+ stages = [
144
+ {"type": "parse", "config": {"provider": "textin"}},
145
+ {"type": "chunk", "config": {"strategy": "basic"}},
146
+ {"type": "embed", "config": {"provider": "qwen"}},
147
+ ]
148
+
149
+ # 执行 pipeline
150
+ result = client.pipeline.execute(
151
+ file=b"test",
152
+ filename="test.pdf",
153
+ stages=stages
154
+ )
155
+
156
+ # 验证结果
157
+ assert result is not None
158
+ assert result.job_id == "job_123"
159
+
160
+
161
+ # ============================================================================
162
+ # 参数测试
163
+ # ============================================================================
164
+
165
+
166
+ @respx.mock
167
+ def test_pipeline_with_config(server_url, pipeline_parse_chunk_embed_response):
168
+ """测试 config 参数"""
169
+ # Mock API
170
+ respx.post(f"{server_url}/api/xparse/pipeline").mock(
171
+ return_value=httpx.Response(200, json=pipeline_parse_chunk_embed_response)
172
+ )
173
+
174
+ # 初始化客户端
175
+ client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
176
+
177
+ # 创建 stages 和 config
178
+ stages = [
179
+ ParseStage(config=ParseConfig(provider="textin")),
180
+ ChunkStage(config=ChunkConfig(strategy="basic")),
181
+ ]
182
+ config = PipelineConfig(include_intermediate_results=True)
183
+
184
+ # 执行 pipeline
185
+ result = client.pipeline.execute(
186
+ file=b"test",
187
+ filename="test.pdf",
188
+ stages=stages,
189
+ config=config
190
+ )
191
+
192
+ # 验证结果
193
+ assert result is not None
194
+
195
+ # 验证请求包含 config
196
+ request = respx.calls.last.request
197
+ assert b"config" in request.content
198
+ assert b"include_intermediate_results" in request.content
199
+
200
+
201
+ @respx.mock
202
+ def test_pipeline_with_data_source(server_url, pipeline_parse_chunk_embed_response):
203
+ """测试 data_source 参数"""
204
+ # Mock API
205
+ respx.post(f"{server_url}/api/xparse/pipeline").mock(
206
+ return_value=httpx.Response(200, json=pipeline_parse_chunk_embed_response)
207
+ )
208
+
209
+ # 初始化客户端
210
+ client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
211
+
212
+ # 创建 stages 和 data_source
213
+ stages = [
214
+ ParseStage(config=ParseConfig(provider="textin")),
215
+ ]
216
+ data_source = {
217
+ "url": "s3://bucket/file.pdf",
218
+ "protocol": "s3"
219
+ }
220
+
221
+ # 执行 pipeline
222
+ result = client.pipeline.execute(
223
+ file=b"test",
224
+ filename="test.pdf",
225
+ stages=stages,
226
+ data_source=data_source
227
+ )
228
+
229
+ # 验证结果
230
+ assert result is not None
231
+
232
+ # 验证请求包含 data_source
233
+ request = respx.calls.last.request
234
+ assert b"data_source" in request.content
235
+ assert b"s3://bucket/file.pdf" in request.content
236
+
237
+
238
+ # ============================================================================
239
+ # 错误处理测试
240
+ # ============================================================================
241
+
242
+
243
+ @respx.mock
244
+ def test_pipeline_invalid_stages_error(server_url, business_error_response):
245
+ """测试无效 stages 错误(业务 code 400)"""
246
+ # Mock API 返回业务错误
247
+ respx.post(f"{server_url}/api/xparse/pipeline").mock(
248
+ return_value=httpx.Response(200, json=business_error_response)
249
+ )
250
+
251
+ client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
252
+ stages = [
253
+ {"type": "invalid_stage", "config": {}}
254
+ ]
255
+
256
+ # 应该抛出 ValidationError
257
+ with pytest.raises(ValidationError) as exc_info:
258
+ client.pipeline.execute(
259
+ file=b"test",
260
+ filename="test.pdf",
261
+ stages=stages
262
+ )
263
+
264
+ assert "参数错误" in str(exc_info.value)
265
+
266
+
267
+ @respx.mock
268
+ def test_pipeline_server_error(server_url):
269
+ """测试服务器错误(HTTP 500)"""
270
+ # Mock API 返回 500 错误
271
+ respx.post(f"{server_url}/api/xparse/pipeline").mock(
272
+ return_value=httpx.Response(500, json={"message": "服务器内部错误"})
273
+ )
274
+
275
+ client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
276
+ stages = [
277
+ ParseStage(config=ParseConfig(provider="textin")),
278
+ ]
279
+
280
+ # 应该抛出 ServerError
281
+ with pytest.raises(ServerError) as exc_info:
282
+ client.pipeline.execute(
283
+ file=b"test",
284
+ filename="test.pdf",
285
+ stages=stages
286
+ )
287
+
288
+ assert "服务器" in str(exc_info.value)
289
+
290
+
291
+ # ============================================================================
292
+ # 中间结果测试
293
+ # ============================================================================
294
+
295
+
296
+ @respx.mock
297
+ def test_pipeline_intermediate_results(server_url, pipeline_parse_chunk_embed_response):
298
+ """测试中间结果返回"""
299
+ # Mock API
300
+ respx.post(f"{server_url}/api/xparse/pipeline").mock(
301
+ return_value=httpx.Response(200, json=pipeline_parse_chunk_embed_response)
302
+ )
303
+
304
+ # 初始化客户端
305
+ client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
306
+
307
+ # 创建 stages
308
+ stages = [
309
+ ParseStage(config=ParseConfig(provider="textin")),
310
+ ChunkStage(config=ChunkConfig(strategy="basic")),
311
+ EmbedStage(config=EmbedConfig(provider="qwen")),
312
+ ]
313
+
314
+ # 执行 pipeline
315
+ result = client.pipeline.execute(
316
+ file=b"test",
317
+ filename="test.pdf",
318
+ stages=stages
319
+ )
320
+
321
+ # 验证中间结果
322
+ assert result.intermediate_results is not None
323
+ assert len(result.intermediate_results) == 2
324
+
325
+ # 验证 parse stage 结果
326
+ parse_result = result.intermediate_results[0]
327
+ assert parse_result["stage"] == "parse"
328
+ assert "elements" in parse_result
329
+
330
+ # 验证 chunk stage 结果
331
+ chunk_result = result.intermediate_results[1]
332
+ assert chunk_result["stage"] == "chunk"
333
+ assert "elements" in chunk_result
334
+
335
+
336
+ @respx.mock
337
+ def test_pipeline_stats(server_url, pipeline_parse_chunk_embed_response):
338
+ """测试 stats 信息"""
339
+ # Mock API
340
+ respx.post(f"{server_url}/api/xparse/pipeline").mock(
341
+ return_value=httpx.Response(200, json=pipeline_parse_chunk_embed_response)
342
+ )
343
+
344
+ # 初始化客户端
345
+ client = XParseClient(app_id="test", secret_code="test", server_url=server_url)
346
+
347
+ # 创建 stages
348
+ stages = [
349
+ ParseStage(config=ParseConfig(provider="textin")),
350
+ ChunkStage(config=ChunkConfig(strategy="basic")),
351
+ EmbedStage(config=EmbedConfig(provider="qwen")),
352
+ ]
353
+
354
+ # 执行 pipeline
355
+ result = client.pipeline.execute(
356
+ file=b"test",
357
+ filename="test.pdf",
358
+ stages=stages
359
+ )
360
+
361
+ # 验证 stats
362
+ assert result.stats is not None
363
+ assert result.stats.original_elements == 15
364
+ assert result.stats.chunked_elements == 3
365
+ assert result.stats.embedded_elements == 3
366
+ # stats 包含 stages 信息(通过 extra="allow" 存储)
367
+ stats_dict = result.stats.model_dump()
368
+ assert "stages" in stats_dict
369
+ assert len(stats_dict["stages"]) == 3
@@ -0,0 +1,108 @@
1
+ """测试 Workflows API"""
2
+
3
+ import pytest
4
+
5
+ from xparse_client import XParseClient
6
+ from xparse_client.models import (
7
+ ParseConfig,
8
+ ParseStage,
9
+ Schedule,
10
+ WorkflowState,
11
+ )
12
+
13
+
14
+ @pytest.fixture
15
+ def client():
16
+ """创建测试客户端"""
17
+ return XParseClient(
18
+ app_id="test-app-id",
19
+ secret_code="test-secret"
20
+ )
21
+
22
+
23
+ def test_workflows_api_exists(client):
24
+ """测试 client.workflows 属性存在"""
25
+ assert hasattr(client, 'workflows')
26
+ workflows_api = client.workflows
27
+ assert workflows_api is not None
28
+
29
+
30
+ def test_workflows_api_has_create(client):
31
+ """测试 workflows.create 方法存在"""
32
+ assert hasattr(client.workflows, 'create')
33
+ assert callable(client.workflows.create)
34
+
35
+
36
+ def test_workflows_api_has_list(client):
37
+ """测试 workflows.list 方法存在"""
38
+ assert hasattr(client.workflows, 'list')
39
+ assert callable(client.workflows.list)
40
+
41
+
42
+ def test_workflows_api_has_get(client):
43
+ """测试 workflows.get 方法存在"""
44
+ assert hasattr(client.workflows, 'get')
45
+ assert callable(client.workflows.get)
46
+
47
+
48
+ def test_workflows_api_has_update(client):
49
+ """测试 workflows.update 方法存在"""
50
+ assert hasattr(client.workflows, 'update')
51
+ assert callable(client.workflows.update)
52
+
53
+
54
+ def test_workflows_api_has_delete(client):
55
+ """测试 workflows.delete 方法存在"""
56
+ assert hasattr(client.workflows, 'delete')
57
+ assert callable(client.workflows.delete)
58
+
59
+
60
+ def test_workflows_api_has_run(client):
61
+ """测试 workflows.run 方法存在"""
62
+ assert hasattr(client.workflows, 'run')
63
+ assert callable(client.workflows.run)
64
+
65
+
66
+ def test_create_raises_not_implemented(client):
67
+ """测试 create 方法抛出 NotImplementedError"""
68
+ with pytest.raises(NotImplementedError, match="Workflows.create 尚未实现"):
69
+ client.workflows.create(
70
+ name="test",
71
+ source_id="src_123",
72
+ destination_id="dst_456",
73
+ stages=[ParseStage(config=ParseConfig())],
74
+ schedule=Schedule(cron="0 0 * * *")
75
+ )
76
+
77
+
78
+ def test_list_raises_not_implemented(client):
79
+ """测试 list 方法抛出 NotImplementedError"""
80
+ with pytest.raises(NotImplementedError, match="Workflows.list 尚未实现"):
81
+ client.workflows.list()
82
+
83
+
84
+ def test_get_raises_not_implemented(client):
85
+ """测试 get 方法抛出 NotImplementedError"""
86
+ with pytest.raises(NotImplementedError, match="Workflows.get 尚未实现"):
87
+ client.workflows.get(workflow_id="wf_123")
88
+
89
+
90
+ def test_update_raises_not_implemented(client):
91
+ """测试 update 方法抛出 NotImplementedError"""
92
+ with pytest.raises(NotImplementedError, match="Workflows.update 尚未实现"):
93
+ client.workflows.update(
94
+ workflow_id="wf_123",
95
+ state=WorkflowState.PAUSED
96
+ )
97
+
98
+
99
+ def test_delete_raises_not_implemented(client):
100
+ """测试 delete 方法抛出 NotImplementedError"""
101
+ with pytest.raises(NotImplementedError, match="Workflows.delete 尚未实现"):
102
+ client.workflows.delete(workflow_id="wf_123")
103
+
104
+
105
+ def test_run_raises_not_implemented(client):
106
+ """测试 run 方法抛出 NotImplementedError"""
107
+ with pytest.raises(NotImplementedError, match="Workflows.run 尚未实现"):
108
+ client.workflows.run(workflow_id="wf_123")