xparse-client 0.2.19__py3-none-any.whl → 0.3.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. example/1_basic_api_usage.py +198 -0
  2. example/2_async_job.py +210 -0
  3. example/3_local_workflow.py +300 -0
  4. example/4_advanced_workflow.py +327 -0
  5. example/README.md +128 -0
  6. example/config_example.json +95 -0
  7. tests/conftest.py +310 -0
  8. tests/unit/__init__.py +1 -0
  9. tests/unit/api/__init__.py +1 -0
  10. tests/unit/api/test_extract.py +232 -0
  11. tests/unit/api/test_local.py +231 -0
  12. tests/unit/api/test_parse.py +374 -0
  13. tests/unit/api/test_pipeline.py +369 -0
  14. tests/unit/api/test_workflows.py +108 -0
  15. tests/unit/connectors/test_ftp.py +525 -0
  16. tests/unit/connectors/test_local_connectors.py +324 -0
  17. tests/unit/connectors/test_milvus.py +368 -0
  18. tests/unit/connectors/test_qdrant.py +399 -0
  19. tests/unit/connectors/test_s3.py +598 -0
  20. tests/unit/connectors/test_smb.py +442 -0
  21. tests/unit/connectors/test_utils.py +335 -0
  22. tests/unit/models/test_local.py +54 -0
  23. tests/unit/models/test_pipeline_stages.py +144 -0
  24. tests/unit/models/test_workflows.py +55 -0
  25. tests/unit/test_base.py +437 -0
  26. tests/unit/test_client.py +110 -0
  27. tests/unit/test_config.py +160 -0
  28. tests/unit/test_exceptions.py +182 -0
  29. tests/unit/test_http.py +562 -0
  30. xparse_client/__init__.py +111 -20
  31. xparse_client/_base.py +179 -0
  32. xparse_client/_client.py +218 -0
  33. xparse_client/_config.py +221 -0
  34. xparse_client/_http.py +350 -0
  35. xparse_client/api/__init__.py +14 -0
  36. xparse_client/api/extract.py +109 -0
  37. xparse_client/api/local.py +215 -0
  38. xparse_client/api/parse.py +209 -0
  39. xparse_client/api/pipeline.py +134 -0
  40. xparse_client/api/workflows.py +204 -0
  41. xparse_client/connectors/__init__.py +45 -0
  42. xparse_client/connectors/_utils.py +138 -0
  43. xparse_client/connectors/destinations/__init__.py +45 -0
  44. xparse_client/connectors/destinations/base.py +116 -0
  45. xparse_client/connectors/destinations/local.py +91 -0
  46. xparse_client/connectors/destinations/milvus.py +229 -0
  47. xparse_client/connectors/destinations/qdrant.py +238 -0
  48. xparse_client/connectors/destinations/s3.py +163 -0
  49. xparse_client/connectors/sources/__init__.py +45 -0
  50. xparse_client/connectors/sources/base.py +74 -0
  51. xparse_client/connectors/sources/ftp.py +278 -0
  52. xparse_client/connectors/sources/local.py +176 -0
  53. xparse_client/connectors/sources/s3.py +232 -0
  54. xparse_client/connectors/sources/smb.py +259 -0
  55. xparse_client/exceptions.py +398 -0
  56. xparse_client/models/__init__.py +60 -0
  57. xparse_client/models/chunk.py +39 -0
  58. xparse_client/models/embed.py +62 -0
  59. xparse_client/models/extract.py +41 -0
  60. xparse_client/models/local.py +38 -0
  61. xparse_client/models/parse.py +136 -0
  62. xparse_client/models/pipeline.py +134 -0
  63. xparse_client/models/workflows.py +74 -0
  64. xparse_client-0.3.0b3.dist-info/METADATA +1075 -0
  65. xparse_client-0.3.0b3.dist-info/RECORD +68 -0
  66. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b3.dist-info}/WHEEL +1 -1
  67. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b3.dist-info}/licenses/LICENSE +1 -1
  68. {xparse_client-0.2.19.dist-info → xparse_client-0.3.0b3.dist-info}/top_level.txt +2 -0
  69. xparse_client/pipeline/__init__.py +0 -3
  70. xparse_client/pipeline/config.py +0 -129
  71. xparse_client/pipeline/destinations.py +0 -489
  72. xparse_client/pipeline/pipeline.py +0 -690
  73. xparse_client/pipeline/sources.py +0 -583
  74. xparse_client-0.2.19.dist-info/METADATA +0 -1050
  75. xparse_client-0.2.19.dist-info/RECORD +0 -11
@@ -0,0 +1,68 @@
1
+ example/1_basic_api_usage.py,sha256=x_ZHEWXz6z7qp-sMLBvq7Vpu6nA7YxVCizyQpDday6M,5802
2
+ example/2_async_job.py,sha256=MCjWii3ksmONiIyCWviBrKPZIwrlCPDvmUPP_F8qXPM,6353
3
+ example/3_local_workflow.py,sha256=ZxoT8N0Nz4_s-bkLF0l6UDqBJk_eZZi99WUtUhJseXs,8980
4
+ example/4_advanced_workflow.py,sha256=cgG0xke8-WNPxqx7iSTl9qgoJsVctXf5mjDkp-SaQho,10159
5
+ example/README.md,sha256=d-DumaZwbV96K6ZTEryAlDSk9HUhRBg0gG_Yekx9PqA,2336
6
+ example/config_example.json,sha256=mBaLmKbgxFBKUgA-FtCTiLMtq24z0QRwKbPeYDGwA9I,2517
7
+ tests/conftest.py,sha256=h6sy9RNQmXTQNTu86h7ATDAg-9wI4cKjzKXUbNNH5q8,10037
8
+ tests/unit/__init__.py,sha256=C1308ox8hFFtq3A1vBAVPzL6r8HGVbEbC42ULCpFkjs,25
9
+ tests/unit/test_base.py,sha256=HRW2irYNAX863PBCtJWvN9EJ_UttfOwRI7CRnDdcjEk,11599
10
+ tests/unit/test_client.py,sha256=92geEJ6jBht7nFr5suRbJ6SmkvzCu_HGB5MhSl4Qmr8,3534
11
+ tests/unit/test_config.py,sha256=OAUKvmEtpC-eAglnDzhp1bUyMlUpaeqLAa1QwyxCUdw,5498
12
+ tests/unit/test_exceptions.py,sha256=BEaGSs34_jmdjUuT4e3S3QONDqldamiXNId04PPdMt4,5155
13
+ tests/unit/test_http.py,sha256=3h-O6k5rcVP_I1sPoISOYLdHAKtSxoPrDOfC3EJyNi0,16490
14
+ tests/unit/api/__init__.py,sha256=5HiLverbqHC-yFxXClrHBjHq62KoONLZPLMMOkYR3eU,23
15
+ tests/unit/api/test_extract.py,sha256=QX4_BAQN6MkRTJQocEf9u6HwOA_XoY_a2djMcuLBCTo,6959
16
+ tests/unit/api/test_local.py,sha256=_DXC8PattJ-CH9inlwDu9s1RCocTGVeYpKFpTiwDwa4,7541
17
+ tests/unit/api/test_parse.py,sha256=EUA3emZFAFefop3CcUgqyLGC1lhd5ujx6x-dGE5h3pw,12639
18
+ tests/unit/api/test_pipeline.py,sha256=0TC_upZbSllQfuz7uGOCvx28BDNCnX1POR92JLnfrIE,11067
19
+ tests/unit/api/test_workflows.py,sha256=lYVbOWph8ZqxszFQTgcFHnxgdp9jjJM35F-wKO3IUuY,3286
20
+ tests/unit/connectors/test_ftp.py,sha256=7NbV7cL9tIy03L7_sWoALrfDmjda_xOgfAoBfUnF9ks,14605
21
+ tests/unit/connectors/test_local_connectors.py,sha256=QS2T_eLXJtHSaOrbtwYrRSgAVrL3jkO3QVPoMEpi9d8,9855
22
+ tests/unit/connectors/test_milvus.py,sha256=X2VRv639QSntpjx_7u1cOwJg0HSKPumuDmd26FWxMKA,11911
23
+ tests/unit/connectors/test_qdrant.py,sha256=wCIG36CjbDhF0Jq53jnNzMliq5eXMWgnCPiIHtVBbew,12682
24
+ tests/unit/connectors/test_s3.py,sha256=Xbo-S9khgS1S3LM3BUIuYp-28YLEmEW0SP1O-iJ8g5g,18444
25
+ tests/unit/connectors/test_smb.py,sha256=yrI_xNeOe84vONyxXNOGLbWa7tcyxjzo9G5Z_Tjzn-w,12378
26
+ tests/unit/connectors/test_utils.py,sha256=RfnzTW5-9w2f5YeLbNTB_z3vVnsUSL7DwXNobo7xZUA,10302
27
+ tests/unit/models/test_local.py,sha256=QnxzWKOivqeITd0CPzZLEzGbaMDtiwsPiwyEHiPpcPY,1325
28
+ tests/unit/models/test_pipeline_stages.py,sha256=vT7Pd0lRWdChTQJuCZ3iUBx8C0xFXQXTBVB6jtWtEKU,3972
29
+ tests/unit/models/test_workflows.py,sha256=Al4d3SQI1jTz585i1kD569TZx4IWNrhyf3_Le6-cq2k,1692
30
+ xparse_client/__init__.py,sha256=FyR7W0eteapDoPj2pU4VoTobM8XR2B7f_g4yVI-v2v8,3956
31
+ xparse_client/_base.py,sha256=Vx85kDns565zwv6Zl6_8jl6b43nNprfBHbqdCVPGGTc,4420
32
+ xparse_client/_client.py,sha256=ZGxZ9xhUitTP-eRhte57ekqUkrXxNubFDL5gojajA2s,6964
33
+ xparse_client/_config.py,sha256=BG3cEW5ywKhmtfHjfTFrdezs2Ugc9P2Y85tzu5OZZ4M,6480
34
+ xparse_client/_http.py,sha256=OrnrkTeuG5tEpuD-a1ixLIFPdiqnVIYtuZCi3uXAzJ4,11381
35
+ xparse_client/exceptions.py,sha256=FNozgCFaQSsdeyxE6P-3CZItkTSfFJcIQVrddHWKN0s,9521
36
+ xparse_client/api/__init__.py,sha256=MZ3wBTDU666efOBv81MKKtc1UkjUsjzIEOpsnDUAHD4,203
37
+ xparse_client/api/extract.py,sha256=87tH-y4sXPKW35l-d0GmZdK9RIXWq4tfmGZXYJhIN60,3011
38
+ xparse_client/api/local.py,sha256=LkwtGFCZifWuWbQlq2Y2dXPB3aXW1x1P89b-o0MJC7s,7662
39
+ xparse_client/api/parse.py,sha256=nvpu5kiN43QlyMW3N6vMruVADu6MdjS-TMbLFjBh-cc,5870
40
+ xparse_client/api/pipeline.py,sha256=bPCSpWjER-7c7L0iMQ_Xz1VTe5DARMsX5XIcrayGKzw,4530
41
+ xparse_client/api/workflows.py,sha256=29_Rf4bIH1fiJb_bWShkUm-et97ym5ymQFh5i3b5mDI,5945
42
+ xparse_client/connectors/__init__.py,sha256=BLGxk9i8BsVCr4RMrCIh8b9cywBqXXmcDHqfF7C_FX0,1113
43
+ xparse_client/connectors/_utils.py,sha256=Qb2OyUg8gvzyMBELx4h0GmjRSdT4d9C45v9m6NUNuMg,3515
44
+ xparse_client/connectors/destinations/__init__.py,sha256=hmvWAXj58yLmePMCEnqKbNhJ74UxErugSFxBxPhlb14,1401
45
+ xparse_client/connectors/destinations/base.py,sha256=DfwwH5S8VPwEUyvWqUKDV9Bq-7A59lqoZHruOjB7L50,3218
46
+ xparse_client/connectors/destinations/local.py,sha256=drZ5wkPMiYd8iOROmLJR3eoCXcYKUpAPPjSFpafeNW8,2542
47
+ xparse_client/connectors/destinations/milvus.py,sha256=MhElUsIMY1g6_c_bD-wjpWOM5PiuZ9bLsgXACW7EY1c,7339
48
+ xparse_client/connectors/destinations/qdrant.py,sha256=Mo1wCwtsM7OIMLZ0HsEBnvE8N9BGnyTGBGJ1B8oYIew,7661
49
+ xparse_client/connectors/destinations/s3.py,sha256=2aeTW0cNUVPLBL8up9InFfgXBcFDPtqFNGa5FbUphhk,4728
50
+ xparse_client/connectors/sources/__init__.py,sha256=830EDkdGJtPC2p1MhIP108uLibGeMoXcXAwPAEACPvQ,1143
51
+ xparse_client/connectors/sources/base.py,sha256=fbRWFWuIcw_XWrOPpPkBnQ0Pf2R9N7Ry4523rxWcOpc,1863
52
+ xparse_client/connectors/sources/ftp.py,sha256=bjOjnd-2l0MZ2EAcGM3jvuTP7FMWBiQaEbgHzhIl3og,8170
53
+ xparse_client/connectors/sources/local.py,sha256=ZYTGWdQmUpzdenn3JQ_p0u9-8x7xXk5Ani2GIAqTGyE,5156
54
+ xparse_client/connectors/sources/s3.py,sha256=RCzinmz9cCqp9cFcfEozBapa2nllFvJCw9P-aLeOJQA,7219
55
+ xparse_client/connectors/sources/smb.py,sha256=_F496mipGZJ0y17H-sYTlqAf2uJScxGyit7kirTyDKc,7786
56
+ xparse_client/models/__init__.py,sha256=zLBFirgXDisEeqKHqpsH1bN5p2zE7ZmCtVENZ69mlOg,1213
57
+ xparse_client/models/chunk.py,sha256=VXKrATgk7bhlPVbJTFOUvOWu9oEnNMgMF6AnQarJD7o,986
58
+ xparse_client/models/embed.py,sha256=Tg9iqOiLZy63m4T0q2z9cq3neuQbMnqDqT55-sVA7CE,1652
59
+ xparse_client/models/extract.py,sha256=ZwMy0MsTt775Bq3UsZ9WlXFNCLrSnN0j1RgXQ-vFNqo,915
60
+ xparse_client/models/local.py,sha256=hqKmyWTU_EhPk6qybtBzmbLybBBaiasTpqM4_zP-ipo,698
61
+ xparse_client/models/parse.py,sha256=K34Fodo3emOPZJJQeu3MLjvA7HkVMxAuXk20UvO4lfQ,3297
62
+ xparse_client/models/pipeline.py,sha256=24bDzhrVotQ8St6VLEJLvG2cZF0G2AMioKI34I_hJXI,3297
63
+ xparse_client/models/workflows.py,sha256=BivMdGOAmhP6oYLQSGAAN7yml2xb7vHHrpzwLgN_Afk,1754
64
+ xparse_client-0.3.0b3.dist-info/licenses/LICENSE,sha256=7iuki7DyWMGB8PBzsht7PUt0YjdIcPjrcXNyUFgMJsw,1070
65
+ xparse_client-0.3.0b3.dist-info/METADATA,sha256=6F2nf_y5odBzTzTLF8N4MA-aqW9F3_EnCCmnPtiqwvg,25453
66
+ xparse_client-0.3.0b3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
67
+ xparse_client-0.3.0b3.dist-info/top_level.txt,sha256=oQGc_qysOmnSAaLjwB72wH8RBHRAmxB-_qb-Uj6u56o,28
68
+ xparse_client-0.3.0b3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2025 ACG Xparse Authors
3
+ Copyright (c) 2025 INTSIG-TEXTIN
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1 +1,3 @@
1
+ example
2
+ tests
1
3
  xparse_client
@@ -1,3 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- encoding: utf-8 -*-
3
-
@@ -1,129 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- encoding: utf-8 -*-
3
-
4
- from dataclasses import dataclass, asdict
5
- from typing import Dict, Any, Optional, Literal, List, Union
6
- from .destinations import Destination
7
-
8
-
9
- class ParseConfig:
10
- """Parse 配置,支持动态字段"""
11
-
12
- def __init__(self, provider: Literal["textin", "mineru", "paddle", "textin-lite"] = "textin", **kwargs):
13
- self.provider = provider
14
- for key, value in kwargs.items():
15
- setattr(self, key, value)
16
-
17
- def to_dict(self) -> Dict[str, Any]:
18
- result = {'provider': self.provider}
19
- for key, value in self.__dict__.items():
20
- if not key.startswith('_') and key != 'provider':
21
- result[key] = value
22
- return result
23
-
24
- def __repr__(self) -> str:
25
- attrs = ', '.join(f'{k}={v!r}' for k, v in sorted(self.__dict__.items()) if not k.startswith('_'))
26
- return f'ParseConfig({attrs})'
27
-
28
-
29
- @dataclass
30
- class ChunkConfig:
31
- """Chunk 配置"""
32
- strategy: Literal["basic", "by_title", "by_page"] = "basic"
33
- include_orig_elements: bool = False
34
- new_after_n_chars: int = 512
35
- max_characters: int = 1024
36
- overlap: int = 0
37
- overlap_all: bool = False
38
-
39
- def to_dict(self) -> Dict[str, Any]:
40
- return asdict(self)
41
-
42
-
43
- @dataclass
44
- class EmbedConfig:
45
- """Embed 配置"""
46
- provider: Literal["qwen", "doubao"] = "qwen"
47
- model_name: Literal[
48
- "text-embedding-v3",
49
- "text-embedding-v4",
50
- "doubao-embedding-large-text-250515",
51
- "doubao-embedding-text-240715"
52
- ] = "text-embedding-v3"
53
-
54
- def to_dict(self) -> Dict[str, Any]:
55
- return asdict(self)
56
-
57
- def validate(self) -> None:
58
- provider_models = {
59
- "qwen": ["text-embedding-v3", "text-embedding-v4"],
60
- "doubao": ["doubao-embedding-large-text-250515", "doubao-embedding-text-240715"]
61
- }
62
- if self.provider not in provider_models:
63
- raise ValueError(f"不支持的 provider: {self.provider}, 支持的有: {list(provider_models.keys())}")
64
- if self.model_name not in provider_models[self.provider]:
65
- raise ValueError(
66
- f"provider '{self.provider}' 不支持模型 '{self.model_name}', 支持的模型: {provider_models[self.provider]}"
67
- )
68
-
69
-
70
- @dataclass
71
- class Stage:
72
- """Pipeline Stage 配置"""
73
- type: Literal["parse", "chunk", "embed"]
74
- config: Union[ParseConfig, ChunkConfig, EmbedConfig, Dict[str, Any]]
75
-
76
- def to_dict(self) -> Dict[str, Any]:
77
- """转换为字典格式,用于 API 请求"""
78
- if isinstance(self.config, (ParseConfig, ChunkConfig, EmbedConfig)):
79
- return {
80
- 'type': self.type,
81
- 'config': self.config.to_dict()
82
- }
83
- else:
84
- # 如果 config 已经是字典,直接使用
85
- return {
86
- 'type': self.type,
87
- 'config': self.config
88
- }
89
-
90
- def __repr__(self) -> str:
91
- return f"Stage(type={self.type!r}, config={self.config!r})"
92
-
93
-
94
- @dataclass
95
- class PipelineStats:
96
- """Pipeline 统计信息"""
97
- original_elements: int = 0
98
- chunked_elements: int = 0
99
- embedded_elements: int = 0
100
- stages: Optional[List[Stage]] = None # 存储实际执行的 stages
101
- record_id: Optional[str] = None # 记录 ID,用于标识需要写入 Milvus 的记录
102
-
103
-
104
- @dataclass
105
- class PipelineConfig:
106
- """Pipeline 配置"""
107
- include_intermediate_results: bool = False
108
- intermediate_results_destination: Optional[Destination] = None # 支持 Destination 类型,如 LocalDestination、S3Destination 等
109
-
110
- def __post_init__(self):
111
- """验证配置"""
112
- if self.include_intermediate_results and not self.intermediate_results_destination:
113
- raise ValueError("当 include_intermediate_results 为 True 时,必须设置 intermediate_results_destination")
114
-
115
- def to_dict(self) -> Dict[str, Any]:
116
- return {
117
- 'include_intermediate_results': self.include_intermediate_results,
118
- }
119
-
120
-
121
- __all__ = [
122
- 'ParseConfig',
123
- 'ChunkConfig',
124
- 'EmbedConfig',
125
- 'Stage',
126
- 'PipelineStats',
127
- 'PipelineConfig',
128
- ]
129
-