xparse-client 0.2.10__tar.gz → 0.2.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xparse_client-0.2.10/xparse_client.egg-info → xparse_client-0.2.11}/PKG-INFO +3 -2
- {xparse_client-0.2.10 → xparse_client-0.2.11}/example/run_pipeline.py +41 -25
- {xparse_client-0.2.10 → xparse_client-0.2.11}/pyproject.toml +3 -2
- {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client/pipeline/sources.py +2 -0
- {xparse_client-0.2.10 → xparse_client-0.2.11/xparse_client.egg-info}/PKG-INFO +3 -2
- {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client.egg-info/requires.txt +2 -1
- {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client.egg-info/top_level.txt +3 -0
- {xparse_client-0.2.10 → xparse_client-0.2.11}/LICENSE +0 -0
- {xparse_client-0.2.10 → xparse_client-0.2.11}/README.md +0 -0
- {xparse_client-0.2.10 → xparse_client-0.2.11}/example/run_pipeline_test.py +0 -0
- {xparse_client-0.2.10 → xparse_client-0.2.11}/setup.cfg +0 -0
- {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client/__init__.py +0 -0
- {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client/pipeline/__init__.py +0 -0
- {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client/pipeline/config.py +0 -0
- {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client/pipeline/destinations.py +0 -0
- {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client/pipeline/pipeline.py +0 -0
- {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client.egg-info/SOURCES.txt +0 -0
- {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client.egg-info/dependency_links.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xparse-client
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.11
|
|
4
4
|
Summary: 面向Agent和RAG的新一代文档处理 AI Infra
|
|
5
5
|
License-Expression: MIT
|
|
6
6
|
Project-URL: Homepage, https://gitlab.intsig.net/xparse1/xparse-pipeline
|
|
@@ -10,7 +10,8 @@ Requires-Python: >=3.8
|
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
|
12
12
|
Requires-Dist: boto3
|
|
13
|
-
Requires-Dist: pymilvus
|
|
13
|
+
Requires-Dist: pymilvus
|
|
14
|
+
Requires-Dist: milvus-lite
|
|
14
15
|
Requires-Dist: requests
|
|
15
16
|
Requires-Dist: pysmb
|
|
16
17
|
Requires-Dist: qdrant-client
|
|
@@ -101,8 +101,15 @@ def run_with_manual_setup():
|
|
|
101
101
|
# secret_key='JFIIaTGiXelv7DgBYNIBSStofF0S98',
|
|
102
102
|
# bucket='textin',
|
|
103
103
|
# prefix='',
|
|
104
|
-
# region='cn-shanghai'
|
|
105
|
-
#
|
|
104
|
+
# region='cn-shanghai'
|
|
105
|
+
# )
|
|
106
|
+
# source=S3Source(
|
|
107
|
+
# endpoint='https://S3.oss-cn-shanghai.aliyuncs.com',
|
|
108
|
+
# access_key='LTAI5t6ZnqTra8oLmJEfvcr7',
|
|
109
|
+
# secret_key='SEbz4oJ4KNJIOTMfphuVGOWmRpGGUG',
|
|
110
|
+
# bucket='textin-test-aliyun',
|
|
111
|
+
# prefix='',
|
|
112
|
+
# region='cn-shanghai'
|
|
106
113
|
# )
|
|
107
114
|
# source = S3Source(
|
|
108
115
|
# endpoint='https://cos.ap-shanghai.myqcloud.com',
|
|
@@ -128,14 +135,14 @@ def run_with_manual_setup():
|
|
|
128
135
|
# prefix='',
|
|
129
136
|
# region='cn-east-3'
|
|
130
137
|
# )
|
|
131
|
-
source = S3Source(
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
)
|
|
138
|
+
# source = S3Source(
|
|
139
|
+
# endpoint='https://s3.us-east-1.amazonaws.com',
|
|
140
|
+
# access_key='AKIA6QUE3TVZADUWA4PO',
|
|
141
|
+
# secret_key='OfV4r9/u+CmlLxmiZDYwtiFSl0OsNdWLADKdPek7',
|
|
142
|
+
# bucket='textin-test',
|
|
143
|
+
# prefix='',
|
|
144
|
+
# region='us-east-1'
|
|
145
|
+
# )
|
|
139
146
|
# source = S3Source(
|
|
140
147
|
# endpoint='http://127.0.0.1:9000',
|
|
141
148
|
# access_key='',
|
|
@@ -158,11 +165,20 @@ def run_with_manual_setup():
|
|
|
158
165
|
# username='', # 用户名,按照实际填写
|
|
159
166
|
# password='' # 密码,按照实际填写
|
|
160
167
|
# )
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
168
|
+
source = LocalSource(
|
|
169
|
+
directory='/Users/ke_wang/Documents/doc',
|
|
170
|
+
pattern=['*.pdf'],
|
|
171
|
+
recursive=True,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# source=S3Source(
|
|
175
|
+
# endpoint='https://obs.cn-north-4.myhuaweicloud.com',
|
|
176
|
+
# access_key='HPUAFT3D1Q6O6UUN1RWQ',
|
|
177
|
+
# secret_key='4zIk8x37nZiDS9P585BTFCWsOSo5G7ok1yRWtEA1',
|
|
178
|
+
# bucket='textin-test-ywj',
|
|
179
|
+
# prefix='',
|
|
180
|
+
# region='cn-north-4'
|
|
181
|
+
# )# 华为云
|
|
166
182
|
|
|
167
183
|
# 创建 Milvus 目的地
|
|
168
184
|
# destination = MilvusDestination(
|
|
@@ -171,9 +187,9 @@ def run_with_manual_setup():
|
|
|
171
187
|
# dimension=1024
|
|
172
188
|
# )
|
|
173
189
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
190
|
+
destination = LocalDestination(
|
|
191
|
+
output_dir='./result'
|
|
192
|
+
)
|
|
177
193
|
|
|
178
194
|
# destination = MilvusDestination(
|
|
179
195
|
# db_path='https://in03-5388093d0db1707.serverless.ali-cn-hangzhou.cloud.zilliz.com.cn', # zilliz连接地址
|
|
@@ -191,18 +207,18 @@ def run_with_manual_setup():
|
|
|
191
207
|
# region='ap-shanghai'
|
|
192
208
|
# )
|
|
193
209
|
|
|
194
|
-
destination = QdrantDestination(
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
)
|
|
210
|
+
# destination = QdrantDestination(
|
|
211
|
+
# url='https://1325db22-7dd8-4fc9-930b-f969d4963b3d.us-east-1-1.aws.cloud.qdrant.io:6333',
|
|
212
|
+
# collection_name='textin1',
|
|
213
|
+
# dimension=1024,
|
|
214
|
+
# api_key='eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.TGnFB1pAD7c7IqSOvTpgCPpHXSnnoKhWEQ5pQ8DrBnI',
|
|
215
|
+
# )
|
|
200
216
|
|
|
201
217
|
# 使用新的 stages 格式创建配置
|
|
202
218
|
stages = [
|
|
203
219
|
Stage(
|
|
204
220
|
type='parse',
|
|
205
|
-
config=ParseConfig(provider='
|
|
221
|
+
config=ParseConfig(provider='textin', page_ranges='3')
|
|
206
222
|
),
|
|
207
223
|
Stage(
|
|
208
224
|
type='chunk',
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "xparse-client"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.11"
|
|
8
8
|
description = "面向Agent和RAG的新一代文档处理 AI Infra"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -14,7 +14,8 @@ keywords = ["xparse", "pipeline", "rag"]
|
|
|
14
14
|
requires-python = ">=3.8"
|
|
15
15
|
dependencies = [
|
|
16
16
|
"boto3",
|
|
17
|
-
"pymilvus
|
|
17
|
+
"pymilvus",
|
|
18
|
+
"milvus-lite",
|
|
18
19
|
"requests",
|
|
19
20
|
"pysmb",
|
|
20
21
|
"qdrant-client"
|
|
@@ -121,6 +121,8 @@ class S3Source(Source):
|
|
|
121
121
|
|
|
122
122
|
if self.endpoint == 'https://textin-minio-api.ai.intsig.net':
|
|
123
123
|
config = Config(signature_version='s3v4')
|
|
124
|
+
elif self.endpoint.endswith('aliyuncs.com'):
|
|
125
|
+
config = Config(signature_version='s3', s3={'addressing_style': 'virtual'})
|
|
124
126
|
else:
|
|
125
127
|
config = Config(signature_version='s3v4', s3={'addressing_style': 'virtual'})
|
|
126
128
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xparse-client
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.11
|
|
4
4
|
Summary: 面向Agent和RAG的新一代文档处理 AI Infra
|
|
5
5
|
License-Expression: MIT
|
|
6
6
|
Project-URL: Homepage, https://gitlab.intsig.net/xparse1/xparse-pipeline
|
|
@@ -10,7 +10,8 @@ Requires-Python: >=3.8
|
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
|
12
12
|
Requires-Dist: boto3
|
|
13
|
-
Requires-Dist: pymilvus
|
|
13
|
+
Requires-Dist: pymilvus
|
|
14
|
+
Requires-Dist: milvus-lite
|
|
14
15
|
Requires-Dist: requests
|
|
15
16
|
Requires-Dist: pysmb
|
|
16
17
|
Requires-Dist: qdrant-client
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|