xparse-client 0.2.20__py3-none-any.whl → 0.3.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. example/1_basic_api_usage.py +198 -0
  2. example/2_async_job.py +210 -0
  3. example/3_local_workflow.py +300 -0
  4. example/4_advanced_workflow.py +327 -0
  5. example/README.md +128 -0
  6. example/config_example.json +95 -0
  7. tests/conftest.py +310 -0
  8. tests/unit/__init__.py +1 -0
  9. tests/unit/api/__init__.py +1 -0
  10. tests/unit/api/test_extract.py +232 -0
  11. tests/unit/api/test_local.py +231 -0
  12. tests/unit/api/test_parse.py +374 -0
  13. tests/unit/api/test_pipeline.py +369 -0
  14. tests/unit/api/test_workflows.py +108 -0
  15. tests/unit/connectors/test_ftp.py +525 -0
  16. tests/unit/connectors/test_local_connectors.py +324 -0
  17. tests/unit/connectors/test_milvus.py +368 -0
  18. tests/unit/connectors/test_qdrant.py +399 -0
  19. tests/unit/connectors/test_s3.py +598 -0
  20. tests/unit/connectors/test_smb.py +442 -0
  21. tests/unit/connectors/test_utils.py +335 -0
  22. tests/unit/models/test_local.py +54 -0
  23. tests/unit/models/test_pipeline_stages.py +144 -0
  24. tests/unit/models/test_workflows.py +55 -0
  25. tests/unit/test_base.py +437 -0
  26. tests/unit/test_client.py +110 -0
  27. tests/unit/test_config.py +160 -0
  28. tests/unit/test_exceptions.py +182 -0
  29. tests/unit/test_http.py +562 -0
  30. xparse_client/__init__.py +110 -20
  31. xparse_client/_base.py +179 -0
  32. xparse_client/_client.py +218 -0
  33. xparse_client/_config.py +221 -0
  34. xparse_client/_http.py +350 -0
  35. xparse_client/api/__init__.py +14 -0
  36. xparse_client/api/extract.py +109 -0
  37. xparse_client/api/local.py +188 -0
  38. xparse_client/api/parse.py +209 -0
  39. xparse_client/api/pipeline.py +132 -0
  40. xparse_client/api/workflows.py +204 -0
  41. xparse_client/connectors/__init__.py +45 -0
  42. xparse_client/connectors/_utils.py +138 -0
  43. xparse_client/connectors/destinations/__init__.py +45 -0
  44. xparse_client/connectors/destinations/base.py +116 -0
  45. xparse_client/connectors/destinations/local.py +91 -0
  46. xparse_client/connectors/destinations/milvus.py +229 -0
  47. xparse_client/connectors/destinations/qdrant.py +238 -0
  48. xparse_client/connectors/destinations/s3.py +163 -0
  49. xparse_client/connectors/sources/__init__.py +45 -0
  50. xparse_client/connectors/sources/base.py +74 -0
  51. xparse_client/connectors/sources/ftp.py +278 -0
  52. xparse_client/connectors/sources/local.py +176 -0
  53. xparse_client/connectors/sources/s3.py +232 -0
  54. xparse_client/connectors/sources/smb.py +259 -0
  55. xparse_client/exceptions.py +398 -0
  56. xparse_client/models/__init__.py +60 -0
  57. xparse_client/models/chunk.py +39 -0
  58. xparse_client/models/embed.py +62 -0
  59. xparse_client/models/extract.py +41 -0
  60. xparse_client/models/local.py +38 -0
  61. xparse_client/models/parse.py +136 -0
  62. xparse_client/models/pipeline.py +132 -0
  63. xparse_client/models/workflows.py +74 -0
  64. xparse_client-0.3.0b2.dist-info/METADATA +1075 -0
  65. xparse_client-0.3.0b2.dist-info/RECORD +68 -0
  66. {xparse_client-0.2.20.dist-info → xparse_client-0.3.0b2.dist-info}/WHEEL +1 -1
  67. {xparse_client-0.2.20.dist-info → xparse_client-0.3.0b2.dist-info}/licenses/LICENSE +1 -1
  68. {xparse_client-0.2.20.dist-info → xparse_client-0.3.0b2.dist-info}/top_level.txt +2 -0
  69. xparse_client/pipeline/__init__.py +0 -3
  70. xparse_client/pipeline/config.py +0 -163
  71. xparse_client/pipeline/destinations.py +0 -489
  72. xparse_client/pipeline/pipeline.py +0 -860
  73. xparse_client/pipeline/sources.py +0 -583
  74. xparse_client-0.2.20.dist-info/METADATA +0 -1050
  75. xparse_client-0.2.20.dist-info/RECORD +0 -11
@@ -0,0 +1,68 @@
1
+ example/1_basic_api_usage.py,sha256=x_ZHEWXz6z7qp-sMLBvq7Vpu6nA7YxVCizyQpDday6M,5802
2
+ example/2_async_job.py,sha256=MCjWii3ksmONiIyCWviBrKPZIwrlCPDvmUPP_F8qXPM,6353
3
+ example/3_local_workflow.py,sha256=ZxoT8N0Nz4_s-bkLF0l6UDqBJk_eZZi99WUtUhJseXs,8980
4
+ example/4_advanced_workflow.py,sha256=cgG0xke8-WNPxqx7iSTl9qgoJsVctXf5mjDkp-SaQho,10159
5
+ example/README.md,sha256=d-DumaZwbV96K6ZTEryAlDSk9HUhRBg0gG_Yekx9PqA,2336
6
+ example/config_example.json,sha256=mBaLmKbgxFBKUgA-FtCTiLMtq24z0QRwKbPeYDGwA9I,2517
7
+ tests/conftest.py,sha256=h6sy9RNQmXTQNTu86h7ATDAg-9wI4cKjzKXUbNNH5q8,10037
8
+ tests/unit/__init__.py,sha256=C1308ox8hFFtq3A1vBAVPzL6r8HGVbEbC42ULCpFkjs,25
9
+ tests/unit/test_base.py,sha256=HRW2irYNAX863PBCtJWvN9EJ_UttfOwRI7CRnDdcjEk,11599
10
+ tests/unit/test_client.py,sha256=92geEJ6jBht7nFr5suRbJ6SmkvzCu_HGB5MhSl4Qmr8,3534
11
+ tests/unit/test_config.py,sha256=OAUKvmEtpC-eAglnDzhp1bUyMlUpaeqLAa1QwyxCUdw,5498
12
+ tests/unit/test_exceptions.py,sha256=BEaGSs34_jmdjUuT4e3S3QONDqldamiXNId04PPdMt4,5155
13
+ tests/unit/test_http.py,sha256=3h-O6k5rcVP_I1sPoISOYLdHAKtSxoPrDOfC3EJyNi0,16490
14
+ tests/unit/api/__init__.py,sha256=5HiLverbqHC-yFxXClrHBjHq62KoONLZPLMMOkYR3eU,23
15
+ tests/unit/api/test_extract.py,sha256=QX4_BAQN6MkRTJQocEf9u6HwOA_XoY_a2djMcuLBCTo,6959
16
+ tests/unit/api/test_local.py,sha256=_DXC8PattJ-CH9inlwDu9s1RCocTGVeYpKFpTiwDwa4,7541
17
+ tests/unit/api/test_parse.py,sha256=EUA3emZFAFefop3CcUgqyLGC1lhd5ujx6x-dGE5h3pw,12639
18
+ tests/unit/api/test_pipeline.py,sha256=0TC_upZbSllQfuz7uGOCvx28BDNCnX1POR92JLnfrIE,11067
19
+ tests/unit/api/test_workflows.py,sha256=lYVbOWph8ZqxszFQTgcFHnxgdp9jjJM35F-wKO3IUuY,3286
20
+ tests/unit/connectors/test_ftp.py,sha256=7NbV7cL9tIy03L7_sWoALrfDmjda_xOgfAoBfUnF9ks,14605
21
+ tests/unit/connectors/test_local_connectors.py,sha256=QS2T_eLXJtHSaOrbtwYrRSgAVrL3jkO3QVPoMEpi9d8,9855
22
+ tests/unit/connectors/test_milvus.py,sha256=X2VRv639QSntpjx_7u1cOwJg0HSKPumuDmd26FWxMKA,11911
23
+ tests/unit/connectors/test_qdrant.py,sha256=wCIG36CjbDhF0Jq53jnNzMliq5eXMWgnCPiIHtVBbew,12682
24
+ tests/unit/connectors/test_s3.py,sha256=Xbo-S9khgS1S3LM3BUIuYp-28YLEmEW0SP1O-iJ8g5g,18444
25
+ tests/unit/connectors/test_smb.py,sha256=yrI_xNeOe84vONyxXNOGLbWa7tcyxjzo9G5Z_Tjzn-w,12378
26
+ tests/unit/connectors/test_utils.py,sha256=RfnzTW5-9w2f5YeLbNTB_z3vVnsUSL7DwXNobo7xZUA,10302
27
+ tests/unit/models/test_local.py,sha256=QnxzWKOivqeITd0CPzZLEzGbaMDtiwsPiwyEHiPpcPY,1325
28
+ tests/unit/models/test_pipeline_stages.py,sha256=vT7Pd0lRWdChTQJuCZ3iUBx8C0xFXQXTBVB6jtWtEKU,3972
29
+ tests/unit/models/test_workflows.py,sha256=Al4d3SQI1jTz585i1kD569TZx4IWNrhyf3_Le6-cq2k,1692
30
+ xparse_client/__init__.py,sha256=FyR7W0eteapDoPj2pU4VoTobM8XR2B7f_g4yVI-v2v8,3956
31
+ xparse_client/_base.py,sha256=Vx85kDns565zwv6Zl6_8jl6b43nNprfBHbqdCVPGGTc,4420
32
+ xparse_client/_client.py,sha256=ZGxZ9xhUitTP-eRhte57ekqUkrXxNubFDL5gojajA2s,6964
33
+ xparse_client/_config.py,sha256=BG3cEW5ywKhmtfHjfTFrdezs2Ugc9P2Y85tzu5OZZ4M,6480
34
+ xparse_client/_http.py,sha256=OrnrkTeuG5tEpuD-a1ixLIFPdiqnVIYtuZCi3uXAzJ4,11381
35
+ xparse_client/exceptions.py,sha256=FNozgCFaQSsdeyxE6P-3CZItkTSfFJcIQVrddHWKN0s,9521
36
+ xparse_client/api/__init__.py,sha256=MZ3wBTDU666efOBv81MKKtc1UkjUsjzIEOpsnDUAHD4,203
37
+ xparse_client/api/extract.py,sha256=87tH-y4sXPKW35l-d0GmZdK9RIXWq4tfmGZXYJhIN60,3011
38
+ xparse_client/api/local.py,sha256=vFfPfdVPMYtJQK76iPfGw1DdFxNqfFiMIEPWw4jkrwo,6246
39
+ xparse_client/api/parse.py,sha256=nvpu5kiN43QlyMW3N6vMruVADu6MdjS-TMbLFjBh-cc,5870
40
+ xparse_client/api/pipeline.py,sha256=HscuwRUOmcdUEjzqSyAMdNkcLyK_XbF_THjNH5DXsec,4356
41
+ xparse_client/api/workflows.py,sha256=29_Rf4bIH1fiJb_bWShkUm-et97ym5ymQFh5i3b5mDI,5945
42
+ xparse_client/connectors/__init__.py,sha256=BLGxk9i8BsVCr4RMrCIh8b9cywBqXXmcDHqfF7C_FX0,1113
43
+ xparse_client/connectors/_utils.py,sha256=Qb2OyUg8gvzyMBELx4h0GmjRSdT4d9C45v9m6NUNuMg,3515
44
+ xparse_client/connectors/destinations/__init__.py,sha256=hmvWAXj58yLmePMCEnqKbNhJ74UxErugSFxBxPhlb14,1401
45
+ xparse_client/connectors/destinations/base.py,sha256=DfwwH5S8VPwEUyvWqUKDV9Bq-7A59lqoZHruOjB7L50,3218
46
+ xparse_client/connectors/destinations/local.py,sha256=drZ5wkPMiYd8iOROmLJR3eoCXcYKUpAPPjSFpafeNW8,2542
47
+ xparse_client/connectors/destinations/milvus.py,sha256=MhElUsIMY1g6_c_bD-wjpWOM5PiuZ9bLsgXACW7EY1c,7339
48
+ xparse_client/connectors/destinations/qdrant.py,sha256=Mo1wCwtsM7OIMLZ0HsEBnvE8N9BGnyTGBGJ1B8oYIew,7661
49
+ xparse_client/connectors/destinations/s3.py,sha256=2aeTW0cNUVPLBL8up9InFfgXBcFDPtqFNGa5FbUphhk,4728
50
+ xparse_client/connectors/sources/__init__.py,sha256=830EDkdGJtPC2p1MhIP108uLibGeMoXcXAwPAEACPvQ,1143
51
+ xparse_client/connectors/sources/base.py,sha256=fbRWFWuIcw_XWrOPpPkBnQ0Pf2R9N7Ry4523rxWcOpc,1863
52
+ xparse_client/connectors/sources/ftp.py,sha256=bjOjnd-2l0MZ2EAcGM3jvuTP7FMWBiQaEbgHzhIl3og,8170
53
+ xparse_client/connectors/sources/local.py,sha256=ZYTGWdQmUpzdenn3JQ_p0u9-8x7xXk5Ani2GIAqTGyE,5156
54
+ xparse_client/connectors/sources/s3.py,sha256=RCzinmz9cCqp9cFcfEozBapa2nllFvJCw9P-aLeOJQA,7219
55
+ xparse_client/connectors/sources/smb.py,sha256=_F496mipGZJ0y17H-sYTlqAf2uJScxGyit7kirTyDKc,7786
56
+ xparse_client/models/__init__.py,sha256=zLBFirgXDisEeqKHqpsH1bN5p2zE7ZmCtVENZ69mlOg,1213
57
+ xparse_client/models/chunk.py,sha256=VXKrATgk7bhlPVbJTFOUvOWu9oEnNMgMF6AnQarJD7o,986
58
+ xparse_client/models/embed.py,sha256=Tg9iqOiLZy63m4T0q2z9cq3neuQbMnqDqT55-sVA7CE,1652
59
+ xparse_client/models/extract.py,sha256=ZwMy0MsTt775Bq3UsZ9WlXFNCLrSnN0j1RgXQ-vFNqo,915
60
+ xparse_client/models/local.py,sha256=hqKmyWTU_EhPk6qybtBzmbLybBBaiasTpqM4_zP-ipo,698
61
+ xparse_client/models/parse.py,sha256=K34Fodo3emOPZJJQeu3MLjvA7HkVMxAuXk20UvO4lfQ,3297
62
+ xparse_client/models/pipeline.py,sha256=KVmTYqyi6sbGMR9UQcEgMHFqhcSBPnoRgtGL4xp8QEA,3086
63
+ xparse_client/models/workflows.py,sha256=BivMdGOAmhP6oYLQSGAAN7yml2xb7vHHrpzwLgN_Afk,1754
64
+ xparse_client-0.3.0b2.dist-info/licenses/LICENSE,sha256=7iuki7DyWMGB8PBzsht7PUt0YjdIcPjrcXNyUFgMJsw,1070
65
+ xparse_client-0.3.0b2.dist-info/METADATA,sha256=QYHCW6pdcMpJKkK3GRKFNMs9comTFatvSG1jXs_5Zv4,25453
66
+ xparse_client-0.3.0b2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
67
+ xparse_client-0.3.0b2.dist-info/top_level.txt,sha256=oQGc_qysOmnSAaLjwB72wH8RBHRAmxB-_qb-Uj6u56o,28
68
+ xparse_client-0.3.0b2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2025 ACG Xparse Authors
3
+ Copyright (c) 2025 INTSIG-TEXTIN
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1 +1,3 @@
1
+ example
2
+ tests
1
3
  xparse_client
@@ -1,3 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- encoding: utf-8 -*-
3
-
@@ -1,163 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- encoding: utf-8 -*-
3
-
4
- from dataclasses import asdict, dataclass
5
- from typing import Any, Dict, List, Literal, Optional, Union
6
-
7
- from .destinations import Destination
8
-
9
-
10
- class ParseConfig:
11
- """Parse 配置,支持动态字段"""
12
-
13
- def __init__(
14
- self,
15
- provider: Literal["textin", "mineru", "paddle", "textin-lite"] = "textin",
16
- **kwargs,
17
- ):
18
- self.provider = provider
19
- for key, value in kwargs.items():
20
- setattr(self, key, value)
21
-
22
- def to_dict(self) -> Dict[str, Any]:
23
- result = {"provider": self.provider}
24
- for key, value in self.__dict__.items():
25
- if not key.startswith("_") and key != "provider":
26
- result[key] = value
27
- return result
28
-
29
- def __repr__(self) -> str:
30
- attrs = ", ".join(
31
- f"{k}={v!r}"
32
- for k, v in sorted(self.__dict__.items())
33
- if not k.startswith("_")
34
- )
35
- return f"ParseConfig({attrs})"
36
-
37
-
38
- @dataclass
39
- class ChunkConfig:
40
- """Chunk 配置"""
41
-
42
- strategy: Literal["basic", "by_title", "by_page"] = "basic"
43
- include_orig_elements: bool = False
44
- new_after_n_chars: int = 512
45
- max_characters: int = 1024
46
- overlap: int = 0
47
- overlap_all: bool = False
48
-
49
- def to_dict(self) -> Dict[str, Any]:
50
- return asdict(self)
51
-
52
-
53
- @dataclass
54
- class EmbedConfig:
55
- """Embed 配置"""
56
-
57
- provider: Literal["qwen", "doubao"] = "qwen"
58
- model_name: Literal[
59
- "text-embedding-v3",
60
- "text-embedding-v4",
61
- "doubao-embedding-large-text-250515",
62
- "doubao-embedding-text-240715",
63
- ] = "text-embedding-v3"
64
-
65
- def to_dict(self) -> Dict[str, Any]:
66
- return asdict(self)
67
-
68
- def validate(self) -> None:
69
- provider_models = {
70
- "qwen": ["text-embedding-v3", "text-embedding-v4"],
71
- "doubao": [
72
- "doubao-embedding-large-text-250515",
73
- "doubao-embedding-text-240715",
74
- ],
75
- }
76
- if self.provider not in provider_models:
77
- raise ValueError(
78
- f"不支持的 provider: {self.provider}, 支持的有: {list(provider_models.keys())}"
79
- )
80
- if self.model_name not in provider_models[self.provider]:
81
- raise ValueError(
82
- f"provider '{self.provider}' 不支持模型 '{self.model_name}', 支持的模型: {provider_models[self.provider]}"
83
- )
84
-
85
-
86
- @dataclass
87
- class ExtractConfig:
88
- """Extract 配置"""
89
-
90
- schema: Dict[str, Any] # 必填,JSON Schema 定义
91
- generate_citations: bool = False
92
- stamp: bool = False
93
-
94
- def to_dict(self) -> Dict[str, Any]:
95
- return asdict(self)
96
-
97
-
98
- @dataclass
99
- class Stage:
100
- """Pipeline Stage 配置"""
101
-
102
- type: Literal["parse", "chunk", "embed", "extract"]
103
- config: Union[ParseConfig, ChunkConfig, EmbedConfig, ExtractConfig, Dict[str, Any]]
104
-
105
- def to_dict(self) -> Dict[str, Any]:
106
- """转换为字典格式,用于 API 请求"""
107
- if isinstance(
108
- self.config, (ParseConfig, ChunkConfig, EmbedConfig, ExtractConfig)
109
- ):
110
- return {"type": self.type, "config": self.config.to_dict()}
111
- else:
112
- # 如果 config 已经是字典,直接使用
113
- return {"type": self.type, "config": self.config}
114
-
115
- def __repr__(self) -> str:
116
- return f"Stage(type={self.type!r}, config={self.config!r})"
117
-
118
-
119
- @dataclass
120
- class PipelineStats:
121
- """Pipeline 统计信息"""
122
-
123
- original_elements: int = 0
124
- chunked_elements: int = 0
125
- embedded_elements: int = 0
126
- stages: Optional[List[Stage]] = None # 存储实际执行的 stages
127
- record_id: Optional[str] = None # 记录 ID,用于标识需要写入 Milvus 的记录
128
-
129
-
130
- @dataclass
131
- class PipelineConfig:
132
- """Pipeline 配置"""
133
-
134
- include_intermediate_results: bool = False
135
- intermediate_results_destination: Optional[Destination] = (
136
- None # 支持 Destination 类型,如 LocalDestination、S3Destination 等
137
- )
138
-
139
- def __post_init__(self):
140
- """验证配置"""
141
- if (
142
- self.include_intermediate_results
143
- and not self.intermediate_results_destination
144
- ):
145
- raise ValueError(
146
- "当 include_intermediate_results 为 True 时,必须设置 intermediate_results_destination"
147
- )
148
-
149
- def to_dict(self) -> Dict[str, Any]:
150
- return {
151
- "include_intermediate_results": self.include_intermediate_results,
152
- }
153
-
154
-
155
- __all__ = [
156
- "ParseConfig",
157
- "ChunkConfig",
158
- "EmbedConfig",
159
- "ExtractConfig",
160
- "Stage",
161
- "PipelineStats",
162
- "PipelineConfig",
163
- ]