xparse-client 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
example/run_pipeline.py CHANGED
@@ -101,8 +101,15 @@ def run_with_manual_setup():
101
101
  # secret_key='JFIIaTGiXelv7DgBYNIBSStofF0S98',
102
102
  # bucket='textin',
103
103
  # prefix='',
104
- # region='cn-shanghai',
105
- # pattern='*.png'
104
+ # region='cn-shanghai'
105
+ # )
106
+ # source=S3Source(
107
+ # endpoint='https://S3.oss-cn-shanghai.aliyuncs.com',
108
+ # access_key='LTAI5t6ZnqTra8oLmJEfvcr7',
109
+ # secret_key='SEbz4oJ4KNJIOTMfphuVGOWmRpGGUG',
110
+ # bucket='textin-test-aliyun',
111
+ # prefix='',
112
+ # region='cn-shanghai'
106
113
  # )
107
114
  # source = S3Source(
108
115
  # endpoint='https://cos.ap-shanghai.myqcloud.com',
@@ -128,14 +135,14 @@ def run_with_manual_setup():
128
135
  # prefix='',
129
136
  # region='cn-east-3'
130
137
  # )
131
- source = S3Source(
132
- endpoint='https://s3.us-east-1.amazonaws.com',
133
- access_key='AKIA6QUE3TVZADUWA4PO',
134
- secret_key='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',
135
- bucket='textin-test',
136
- prefix='',
137
- region='us-east-1'
138
- )
138
+ # source = S3Source(
139
+ # endpoint='https://s3.us-east-1.amazonaws.com',
140
+ # access_key='AKIA6QUE3TVZADUWA4PO',
141
+ # secret_key='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',
142
+ # bucket='textin-test',
143
+ # prefix='',
144
+ # region='us-east-1'
145
+ # )
139
146
  # source = S3Source(
140
147
  # endpoint='http://127.0.0.1:9000',
141
148
  # access_key='',
@@ -158,11 +165,20 @@ def run_with_manual_setup():
158
165
  # username='', # 用户名,按照实际填写
159
166
  # password='' # 密码,按照实际填写
160
167
  # )
161
- # source = LocalSource(
162
- # directory='/Users/ke_wang/Documents/doc',
163
- # recursive=True,
164
- # pattern=['**/*.png'] # 支持通配符: *.pdf, *.docx, **/*.txt
165
- # )
168
+ source = LocalSource(
169
+ directory='/Users/ke_wang/Documents/doc',
170
+ pattern=['*.pdf'],
171
+ recursive=True,
172
+ )
173
+
174
+ # source=S3Source(
175
+ # endpoint='https://obs.cn-north-4.myhuaweicloud.com',
176
+ # access_key='HPUAFT3D1Q6O6UUN1RWQ',
177
+ # secret_key='4zIk8x37nZiDS9P585BTFCWsOSo5G7ok1yRWtEA1',
178
+ # bucket='textin-test-ywj',
179
+ # prefix='',
180
+ # region='cn-north-4'
181
+ # )# 华为云
166
182
 
167
183
  # 创建 Milvus 目的地
168
184
  # destination = MilvusDestination(
@@ -191,18 +207,27 @@ def run_with_manual_setup():
191
207
  # region='ap-shanghai'
192
208
  # )
193
209
 
194
- destination = QdrantDestination(
195
- url='https://1325db22-7dd8-4fc9-930b-f969d4963b3d.us-east-1-1.aws.cloud.qdrant.io:6333',
196
- collection_name='textin1',
197
- dimension=1024,
198
- api_key='eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.TGnFB1pAD7c7IqSOvTpgCPpHXSnnoKhWEQ5pQ8DrBnI',
199
- )
210
+ # destination = QdrantDestination(
211
+ # url='https://1325db22-7dd8-4fc9-930b-f969d4963b3d.us-east-1-1.aws.cloud.qdrant.io:6333',
212
+ # collection_name='textin1',
213
+ # dimension=1024,
214
+ # api_key='eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.TGnFB1pAD7c7IqSOvTpgCPpHXSnnoKhWEQ5pQ8DrBnI',
215
+ # )
216
+
217
+ destination = S3Destination(
218
+ endpoint='https://obs.cn-east-3.myhuaweicloud.com',
219
+ access_key='HPUAFT3D1Q6O6UUN1RWQ',
220
+ secret_key='4zIk8x37nZiDS9P585BTFCWsOSo5G7ok1yRWtEA1',
221
+ bucket='xparse',
222
+ prefix='json/',
223
+ region='cn-east-3'
224
+ ) # 华为云
200
225
 
201
226
  # 使用新的 stages 格式创建配置
202
227
  stages = [
203
228
  Stage(
204
229
  type='parse',
205
- config=ParseConfig(provider='paddle')
230
+ config=ParseConfig(provider='textin', page_ranges='3')
206
231
  ),
207
232
  Stage(
208
233
  type='chunk',
@@ -240,6 +240,8 @@ class S3Destination(Destination):
240
240
  config = Config(signature_version='s3v4')
241
241
  elif self.endpoint.endswith('aliyuncs.com'):
242
242
  config = Config(signature_version='s3', s3={'addressing_style': 'virtual'})
243
+ elif self.endpoint.endswith('myhuaweicloud.com'):
244
+ config = Config(signature_version='s3', s3={'addressing_style': 'virtual'})
243
245
  else:
244
246
  config = Config(signature_version='s3v4', s3={'addressing_style': 'virtual'})
245
247
 
@@ -121,6 +121,8 @@ class S3Source(Source):
121
121
 
122
122
  if self.endpoint == 'https://textin-minio-api.ai.intsig.net':
123
123
  config = Config(signature_version='s3v4')
124
+ elif self.endpoint.endswith('aliyuncs.com'):
125
+ config = Config(signature_version='s3', s3={'addressing_style': 'virtual'})
124
126
  else:
125
127
  config = Config(signature_version='s3v4', s3={'addressing_style': 'virtual'})
126
128
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xparse-client
3
- Version: 0.2.10
3
+ Version: 0.2.12
4
4
  Summary: 面向Agent和RAG的新一代文档处理 AI Infra
5
5
  License-Expression: MIT
6
6
  Project-URL: Homepage, https://gitlab.intsig.net/xparse1/xparse-pipeline
@@ -10,7 +10,8 @@ Requires-Python: >=3.8
10
10
  Description-Content-Type: text/markdown
11
11
  License-File: LICENSE
12
12
  Requires-Dist: boto3
13
- Requires-Dist: pymilvus[milvus_lite]
13
+ Requires-Dist: pymilvus
14
+ Requires-Dist: milvus-lite
14
15
  Requires-Dist: requests
15
16
  Requires-Dist: pysmb
16
17
  Requires-Dist: qdrant-client
@@ -0,0 +1,13 @@
1
+ example/run_pipeline.py,sha256=AnNMUD0h6eoWdUQrejw6YZs7O_aM5rYajbtAAoAALrc,16671
2
+ example/run_pipeline_test.py,sha256=pxsNiq_LmP6M4R7tTuja0u-Lu7fW-wIBU1uBf0-agQI,14845
3
+ xparse_client/__init__.py,sha256=C2XLxkCoONl6_B1FmDhWRw84TqOL4pZF20br-K26SSY,1721
4
+ xparse_client/pipeline/__init__.py,sha256=TVlb2AGCNKP0jrv3p4ZLZCPKp68hTVMFi00DTdi6QAo,49
5
+ xparse_client/pipeline/config.py,sha256=FFYq2a0dBWBEj70s2aInXOiQ5MwwHimd6SI2_tkp52w,4138
6
+ xparse_client/pipeline/destinations.py,sha256=QKlNGcpXIqkZS3rlBlhLDoRqIWA21Jgn3GiGhhfE8Rc,20921
7
+ xparse_client/pipeline/pipeline.py,sha256=ZspagUjiL5wnzGJq6A7riOU8qGXJMtg1fqPm9H09mkk,27272
8
+ xparse_client/pipeline/sources.py,sha256=pzJ5FjP-kZi-6Cphhm9rOPXETmHw5Qpf7EaxrQPgSxs,22285
9
+ xparse_client-0.2.12.dist-info/licenses/LICENSE,sha256=ckIP-MbocsP9nqYnta5KgfAicYF196B5TNdHIR6kOO0,1075
10
+ xparse_client-0.2.12.dist-info/METADATA,sha256=wQIOLJAjxSbLGpWXNM_rZmKK2h_TSoNwHvvhQLYafaw,28799
11
+ xparse_client-0.2.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
+ xparse_client-0.2.12.dist-info/top_level.txt,sha256=bfX8BWo1sEEQVsI4Ql4Uu80vrfEh5zfajU9YqFTzxMo,22
13
+ xparse_client-0.2.12.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- example/run_pipeline.py,sha256=d4pPDqjiC9dPNh6nmArPOF7fPMY0a-jcvdgtNuV-_kM,15795
2
- example/run_pipeline_test.py,sha256=pxsNiq_LmP6M4R7tTuja0u-Lu7fW-wIBU1uBf0-agQI,14845
3
- xparse_client/__init__.py,sha256=C2XLxkCoONl6_B1FmDhWRw84TqOL4pZF20br-K26SSY,1721
4
- xparse_client/pipeline/__init__.py,sha256=TVlb2AGCNKP0jrv3p4ZLZCPKp68hTVMFi00DTdi6QAo,49
5
- xparse_client/pipeline/config.py,sha256=FFYq2a0dBWBEj70s2aInXOiQ5MwwHimd6SI2_tkp52w,4138
6
- xparse_client/pipeline/destinations.py,sha256=9UyZ8Ygjoe4yAq6-VZNZBoNYRbb3mahify3c1AdOHMY,20775
7
- xparse_client/pipeline/pipeline.py,sha256=ZspagUjiL5wnzGJq6A7riOU8qGXJMtg1fqPm9H09mkk,27272
8
- xparse_client/pipeline/sources.py,sha256=D-kLrSQ-qsFFFq7JC4sL3Y3Q3Q87Wcpv9R5K85YkDjE,22144
9
- xparse_client-0.2.10.dist-info/licenses/LICENSE,sha256=ckIP-MbocsP9nqYnta5KgfAicYF196B5TNdHIR6kOO0,1075
10
- xparse_client-0.2.10.dist-info/METADATA,sha256=gIY_PxB1pTxSlKJZjU7z1Iua6ZMtAfMfHFeztWp2zIw,28785
11
- xparse_client-0.2.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
- xparse_client-0.2.10.dist-info/top_level.txt,sha256=bfX8BWo1sEEQVsI4Ql4Uu80vrfEh5zfajU9YqFTzxMo,22
13
- xparse_client-0.2.10.dist-info/RECORD,,