xparse-client 0.2.10__tar.gz → 0.2.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {xparse_client-0.2.10/xparse_client.egg-info → xparse_client-0.2.11}/PKG-INFO +3 -2
  2. {xparse_client-0.2.10 → xparse_client-0.2.11}/example/run_pipeline.py +41 -25
  3. {xparse_client-0.2.10 → xparse_client-0.2.11}/pyproject.toml +3 -2
  4. {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client/pipeline/sources.py +2 -0
  5. {xparse_client-0.2.10 → xparse_client-0.2.11/xparse_client.egg-info}/PKG-INFO +3 -2
  6. {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client.egg-info/requires.txt +2 -1
  7. {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client.egg-info/top_level.txt +3 -0
  8. {xparse_client-0.2.10 → xparse_client-0.2.11}/LICENSE +0 -0
  9. {xparse_client-0.2.10 → xparse_client-0.2.11}/README.md +0 -0
  10. {xparse_client-0.2.10 → xparse_client-0.2.11}/example/run_pipeline_test.py +0 -0
  11. {xparse_client-0.2.10 → xparse_client-0.2.11}/setup.cfg +0 -0
  12. {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client/__init__.py +0 -0
  13. {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client/pipeline/__init__.py +0 -0
  14. {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client/pipeline/config.py +0 -0
  15. {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client/pipeline/destinations.py +0 -0
  16. {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client/pipeline/pipeline.py +0 -0
  17. {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client.egg-info/SOURCES.txt +0 -0
  18. {xparse_client-0.2.10 → xparse_client-0.2.11}/xparse_client.egg-info/dependency_links.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xparse-client
3
- Version: 0.2.10
3
+ Version: 0.2.11
4
4
  Summary: 面向Agent和RAG的新一代文档处理 AI Infra
5
5
  License-Expression: MIT
6
6
  Project-URL: Homepage, https://gitlab.intsig.net/xparse1/xparse-pipeline
@@ -10,7 +10,8 @@ Requires-Python: >=3.8
10
10
  Description-Content-Type: text/markdown
11
11
  License-File: LICENSE
12
12
  Requires-Dist: boto3
13
- Requires-Dist: pymilvus[milvus_lite]
13
+ Requires-Dist: pymilvus
14
+ Requires-Dist: milvus-lite
14
15
  Requires-Dist: requests
15
16
  Requires-Dist: pysmb
16
17
  Requires-Dist: qdrant-client
@@ -101,8 +101,15 @@ def run_with_manual_setup():
101
101
  # secret_key='JFIIaTGiXelv7DgBYNIBSStofF0S98',
102
102
  # bucket='textin',
103
103
  # prefix='',
104
- # region='cn-shanghai',
105
- # pattern='*.png'
104
+ # region='cn-shanghai'
105
+ # )
106
+ # source=S3Source(
107
+ # endpoint='https://S3.oss-cn-shanghai.aliyuncs.com',
108
+ # access_key='LTAI5t6ZnqTra8oLmJEfvcr7',
109
+ # secret_key='SEbz4oJ4KNJIOTMfphuVGOWmRpGGUG',
110
+ # bucket='textin-test-aliyun',
111
+ # prefix='',
112
+ # region='cn-shanghai'
106
113
  # )
107
114
  # source = S3Source(
108
115
  # endpoint='https://cos.ap-shanghai.myqcloud.com',
@@ -128,14 +135,14 @@ def run_with_manual_setup():
128
135
  # prefix='',
129
136
  # region='cn-east-3'
130
137
  # )
131
- source = S3Source(
132
- endpoint='https://s3.us-east-1.amazonaws.com',
133
- access_key='AKIA6QUE3TVZADUWA4PO',
134
- secret_key='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',
135
- bucket='textin-test',
136
- prefix='',
137
- region='us-east-1'
138
- )
138
+ # source = S3Source(
139
+ # endpoint='https://s3.us-east-1.amazonaws.com',
140
+ # access_key='AKIA6QUE3TVZADUWA4PO',
141
+ # secret_key='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',
142
+ # bucket='textin-test',
143
+ # prefix='',
144
+ # region='us-east-1'
145
+ # )
139
146
  # source = S3Source(
140
147
  # endpoint='http://127.0.0.1:9000',
141
148
  # access_key='',
@@ -158,11 +165,20 @@ def run_with_manual_setup():
158
165
  # username='', # 用户名,按照实际填写
159
166
  # password='' # 密码,按照实际填写
160
167
  # )
161
- # source = LocalSource(
162
- # directory='/Users/ke_wang/Documents/doc',
163
- # recursive=True,
164
- # pattern=['**/*.png'] # 支持通配符: *.pdf, *.docx, **/*.txt
165
- # )
168
+ source = LocalSource(
169
+ directory='/Users/ke_wang/Documents/doc',
170
+ pattern=['*.pdf'],
171
+ recursive=True,
172
+ )
173
+
174
+ # source=S3Source(
175
+ # endpoint='https://obs.cn-north-4.myhuaweicloud.com',
176
+ # access_key='HPUAFT3D1Q6O6UUN1RWQ',
177
+ # secret_key='4zIk8x37nZiDS9P585BTFCWsOSo5G7ok1yRWtEA1',
178
+ # bucket='textin-test-ywj',
179
+ # prefix='',
180
+ # region='cn-north-4'
181
+ # )# 华为云
166
182
 
167
183
  # 创建 Milvus 目的地
168
184
  # destination = MilvusDestination(
@@ -171,9 +187,9 @@ def run_with_manual_setup():
171
187
  # dimension=1024
172
188
  # )
173
189
 
174
- # destination = LocalDestination(
175
- # output_dir='./result'
176
- # )
190
+ destination = LocalDestination(
191
+ output_dir='./result'
192
+ )
177
193
 
178
194
  # destination = MilvusDestination(
179
195
  # db_path='https://in03-5388093d0db1707.serverless.ali-cn-hangzhou.cloud.zilliz.com.cn', # zilliz连接地址
@@ -191,18 +207,18 @@ def run_with_manual_setup():
191
207
  # region='ap-shanghai'
192
208
  # )
193
209
 
194
- destination = QdrantDestination(
195
- url='https://1325db22-7dd8-4fc9-930b-f969d4963b3d.us-east-1-1.aws.cloud.qdrant.io:6333',
196
- collection_name='textin1',
197
- dimension=1024,
198
- api_key='eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.TGnFB1pAD7c7IqSOvTpgCPpHXSnnoKhWEQ5pQ8DrBnI',
199
- )
210
+ # destination = QdrantDestination(
211
+ # url='https://1325db22-7dd8-4fc9-930b-f969d4963b3d.us-east-1-1.aws.cloud.qdrant.io:6333',
212
+ # collection_name='textin1',
213
+ # dimension=1024,
214
+ # api_key='eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.TGnFB1pAD7c7IqSOvTpgCPpHXSnnoKhWEQ5pQ8DrBnI',
215
+ # )
200
216
 
201
217
  # 使用新的 stages 格式创建配置
202
218
  stages = [
203
219
  Stage(
204
220
  type='parse',
205
- config=ParseConfig(provider='paddle')
221
+ config=ParseConfig(provider='textin', page_ranges='3')
206
222
  ),
207
223
  Stage(
208
224
  type='chunk',
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "xparse-client"
7
- version = "0.2.10"
7
+ version = "0.2.11"
8
8
  description = "面向Agent和RAG的新一代文档处理 AI Infra"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -14,7 +14,8 @@ keywords = ["xparse", "pipeline", "rag"]
14
14
  requires-python = ">=3.8"
15
15
  dependencies = [
16
16
  "boto3",
17
- "pymilvus[milvus_lite]",
17
+ "pymilvus",
18
+ "milvus-lite",
18
19
  "requests",
19
20
  "pysmb",
20
21
  "qdrant-client"
@@ -121,6 +121,8 @@ class S3Source(Source):
121
121
 
122
122
  if self.endpoint == 'https://textin-minio-api.ai.intsig.net':
123
123
  config = Config(signature_version='s3v4')
124
+ elif self.endpoint.endswith('aliyuncs.com'):
125
+ config = Config(signature_version='s3', s3={'addressing_style': 'virtual'})
124
126
  else:
125
127
  config = Config(signature_version='s3v4', s3={'addressing_style': 'virtual'})
126
128
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xparse-client
3
- Version: 0.2.10
3
+ Version: 0.2.11
4
4
  Summary: 面向Agent和RAG的新一代文档处理 AI Infra
5
5
  License-Expression: MIT
6
6
  Project-URL: Homepage, https://gitlab.intsig.net/xparse1/xparse-pipeline
@@ -10,7 +10,8 @@ Requires-Python: >=3.8
10
10
  Description-Content-Type: text/markdown
11
11
  License-File: LICENSE
12
12
  Requires-Dist: boto3
13
- Requires-Dist: pymilvus[milvus_lite]
13
+ Requires-Dist: pymilvus
14
+ Requires-Dist: milvus-lite
14
15
  Requires-Dist: requests
15
16
  Requires-Dist: pysmb
16
17
  Requires-Dist: qdrant-client
@@ -1,5 +1,6 @@
1
1
  boto3
2
- pymilvus[milvus_lite]
2
+ pymilvus
3
+ milvus-lite
3
4
  requests
4
5
  pysmb
5
6
  qdrant-client
@@ -1,6 +1,9 @@
1
+ agent_vectors
1
2
  dist
2
3
  doc
3
4
  example
4
5
  intermediate_results
6
+ parsed_documents
7
+ parsed_medical_documents
5
8
  result
6
9
  xparse_client
File without changes
File without changes
File without changes