xparse-client 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- example/run_pipeline.py +20 -12
- example/run_pipeline_test.py +8 -8
- xparse_client/__init__.py +2 -1
- xparse_client/pipeline/destinations.py +184 -2
- xparse_client/pipeline/pipeline.py +17 -1
- xparse_client/pipeline/sources.py +83 -24
- {xparse_client-0.2.8.dist-info → xparse_client-0.2.10.dist-info}/METADATA +42 -19
- xparse_client-0.2.10.dist-info/RECORD +13 -0
- xparse_client-0.2.8.dist-info/RECORD +0 -13
- {xparse_client-0.2.8.dist-info → xparse_client-0.2.10.dist-info}/WHEEL +0 -0
- {xparse_client-0.2.8.dist-info → xparse_client-0.2.10.dist-info}/licenses/LICENSE +0 -0
- {xparse_client-0.2.8.dist-info → xparse_client-0.2.10.dist-info}/top_level.txt +0 -0
example/run_pipeline.py
CHANGED
|
@@ -84,7 +84,7 @@ def run_with_config():
|
|
|
84
84
|
|
|
85
85
|
def run_with_manual_setup():
|
|
86
86
|
"""手动创建 Source、Destination 和 Pipeline"""
|
|
87
|
-
from xparse_client import ChunkConfig, EmbedConfig, ParseConfig, Stage, PipelineConfig, LocalDestination
|
|
87
|
+
from xparse_client import ChunkConfig, EmbedConfig, ParseConfig, Stage, PipelineConfig, LocalDestination, QdrantDestination
|
|
88
88
|
|
|
89
89
|
# 创建 S3 数据源
|
|
90
90
|
# source = S3Source(
|
|
@@ -132,7 +132,7 @@ def run_with_manual_setup():
|
|
|
132
132
|
endpoint='https://s3.us-east-1.amazonaws.com',
|
|
133
133
|
access_key='AKIA6QUE3TVZADUWA4PO',
|
|
134
134
|
secret_key='OfV4r9/u+CmlLxmiZDYwtiFSl0OsNdWLADKdPek7',
|
|
135
|
-
bucket='textin-
|
|
135
|
+
bucket='textin-test',
|
|
136
136
|
prefix='',
|
|
137
137
|
region='us-east-1'
|
|
138
138
|
)
|
|
@@ -160,7 +160,8 @@ def run_with_manual_setup():
|
|
|
160
160
|
# )
|
|
161
161
|
# source = LocalSource(
|
|
162
162
|
# directory='/Users/ke_wang/Documents/doc',
|
|
163
|
-
#
|
|
163
|
+
# recursive=True,
|
|
164
|
+
# pattern=['**/*.png'] # 支持通配符: *.pdf, *.docx, **/*.txt
|
|
164
165
|
# )
|
|
165
166
|
|
|
166
167
|
# 创建 Milvus 目的地
|
|
@@ -174,12 +175,12 @@ def run_with_manual_setup():
|
|
|
174
175
|
# output_dir='./result'
|
|
175
176
|
# )
|
|
176
177
|
|
|
177
|
-
destination = MilvusDestination(
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
)
|
|
178
|
+
# destination = MilvusDestination(
|
|
179
|
+
# db_path='https://in03-5388093d0db1707.serverless.ali-cn-hangzhou.cloud.zilliz.com.cn', # zilliz连接地址
|
|
180
|
+
# collection_name='textin_test_3_copy', # 数据库collection名称
|
|
181
|
+
# dimension=1024, # 向量维度,需与 embed API 返回一致
|
|
182
|
+
# api_key='872c3f5b3f3995c80dcda5c3d34f1f608815aef7671b6ee391ab37e40e79c892ce56d9c8c6565a03a3fd66da7e11b67f384c5c46' # Zilliz Cloud API Key
|
|
183
|
+
# )
|
|
183
184
|
|
|
184
185
|
# destination = S3Destination(
|
|
185
186
|
# endpoint='https://cos.ap-shanghai.myqcloud.com',
|
|
@@ -189,12 +190,19 @@ def run_with_manual_setup():
|
|
|
189
190
|
# prefix='result',
|
|
190
191
|
# region='ap-shanghai'
|
|
191
192
|
# )
|
|
193
|
+
|
|
194
|
+
destination = QdrantDestination(
|
|
195
|
+
url='https://1325db22-7dd8-4fc9-930b-f969d4963b3d.us-east-1-1.aws.cloud.qdrant.io:6333',
|
|
196
|
+
collection_name='textin1',
|
|
197
|
+
dimension=1024,
|
|
198
|
+
api_key='eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.TGnFB1pAD7c7IqSOvTpgCPpHXSnnoKhWEQ5pQ8DrBnI',
|
|
199
|
+
)
|
|
192
200
|
|
|
193
201
|
# 使用新的 stages 格式创建配置
|
|
194
202
|
stages = [
|
|
195
203
|
Stage(
|
|
196
204
|
type='parse',
|
|
197
|
-
config=ParseConfig(provider='
|
|
205
|
+
config=ParseConfig(provider='paddle')
|
|
198
206
|
),
|
|
199
207
|
Stage(
|
|
200
208
|
type='chunk',
|
|
@@ -236,8 +244,8 @@ def run_with_manual_setup():
|
|
|
236
244
|
)
|
|
237
245
|
|
|
238
246
|
# 运行
|
|
239
|
-
config = pipeline.get_config()
|
|
240
|
-
|
|
247
|
+
# config = pipeline.get_config()
|
|
248
|
+
pipeline.run()
|
|
241
249
|
|
|
242
250
|
|
|
243
251
|
# ============================================================================
|
example/run_pipeline_test.py
CHANGED
|
@@ -86,14 +86,14 @@ def run_with_manual_setup():
|
|
|
86
86
|
from xparse_client import ChunkConfig, EmbedConfig, ParseConfig, Stage
|
|
87
87
|
|
|
88
88
|
# 创建 S3 数据源
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
89
|
+
source = S3Source(
|
|
90
|
+
endpoint='https://textin-minio-api.ai.intsig.net',
|
|
91
|
+
access_key='IEQspf8C7fVcgmp3AZWl',
|
|
92
|
+
secret_key='kLj96I8FGbIrPFW08meXivCy4AVdzBijOJWKWOt1',
|
|
93
|
+
bucket='textin-test',
|
|
94
|
+
prefix='',
|
|
95
|
+
region='us-east-1'
|
|
96
|
+
)
|
|
97
97
|
source = S3Source(
|
|
98
98
|
endpoint='https://s3.oss-cn-shanghai.aliyuncs.com',
|
|
99
99
|
access_key='LTAI5tBgsaVfkbh9rbPyuB17',
|
xparse_client/__init__.py
CHANGED
|
@@ -10,7 +10,7 @@ logging.basicConfig(
|
|
|
10
10
|
|
|
11
11
|
from .pipeline.config import ParseConfig, ChunkConfig, EmbedConfig, Stage, PipelineStats, PipelineConfig
|
|
12
12
|
from .pipeline.sources import Source, S3Source, LocalSource, FtpSource, SmbSource
|
|
13
|
-
from .pipeline.destinations import Destination, MilvusDestination, LocalDestination, S3Destination
|
|
13
|
+
from .pipeline.destinations import Destination, MilvusDestination, QdrantDestination, LocalDestination, S3Destination
|
|
14
14
|
from .pipeline.pipeline import Pipeline, create_pipeline_from_config
|
|
15
15
|
|
|
16
16
|
__all__ = [
|
|
@@ -27,6 +27,7 @@ __all__ = [
|
|
|
27
27
|
'SmbSource',
|
|
28
28
|
'Destination',
|
|
29
29
|
'MilvusDestination',
|
|
30
|
+
'QdrantDestination',
|
|
30
31
|
'LocalDestination',
|
|
31
32
|
'S3Destination',
|
|
32
33
|
'Pipeline',
|
|
@@ -13,7 +13,8 @@ from typing import List, Dict, Any
|
|
|
13
13
|
|
|
14
14
|
from botocore.config import Config
|
|
15
15
|
from pymilvus import MilvusClient
|
|
16
|
-
|
|
16
|
+
from qdrant_client import QdrantClient
|
|
17
|
+
from qdrant_client.models import Distance, VectorParams, PointStruct, PayloadSchemaType
|
|
17
18
|
|
|
18
19
|
logger = logging.getLogger(__name__)
|
|
19
20
|
|
|
@@ -127,7 +128,7 @@ class MilvusDestination(Destination):
|
|
|
127
128
|
print(f" ✓ 删除现有记录: record_id={record_id}, 删除 {deleted_count} 条")
|
|
128
129
|
logger.info(f"删除 Milvus 现有记录: record_id={record_id}, 删除 {deleted_count} 条")
|
|
129
130
|
else:
|
|
130
|
-
print(f" →
|
|
131
|
+
print(f" → 准备写入记录: record_id={record_id}")
|
|
131
132
|
except Exception as e:
|
|
132
133
|
print(f" ! 删除现有记录失败: {str(e)}")
|
|
133
134
|
logger.warning(f"删除 Milvus 现有记录失败: record_id={record_id}, {str(e)}")
|
|
@@ -296,9 +297,190 @@ class S3Destination(Destination):
|
|
|
296
297
|
return False
|
|
297
298
|
|
|
298
299
|
|
|
300
|
+
class QdrantDestination(Destination):
|
|
301
|
+
"""Qdrant 向量数据库目的地"""
|
|
302
|
+
|
|
303
|
+
def __init__(self, url: str, collection_name: str, dimension: int, api_key: str = None, prefer_grpc: bool = False):
|
|
304
|
+
"""初始化 Qdrant 目的地
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
url: Qdrant 服务地址(如 'http://localhost:6333' 或 'https://xxx.qdrant.io')
|
|
308
|
+
collection_name: Collection 名称
|
|
309
|
+
dimension: 向量维度
|
|
310
|
+
api_key: API Key(可选,用于 Qdrant Cloud)
|
|
311
|
+
prefer_grpc: 是否优先使用 gRPC(默认 False,使用 HTTP)
|
|
312
|
+
"""
|
|
313
|
+
|
|
314
|
+
self.url = url
|
|
315
|
+
self.collection_name = collection_name
|
|
316
|
+
self.dimension = dimension
|
|
317
|
+
|
|
318
|
+
client_kwargs = {'url': url}
|
|
319
|
+
if api_key:
|
|
320
|
+
client_kwargs['api_key'] = api_key
|
|
321
|
+
if prefer_grpc:
|
|
322
|
+
client_kwargs['prefer_grpc'] = True
|
|
323
|
+
|
|
324
|
+
self.client = QdrantClient(**client_kwargs)
|
|
325
|
+
|
|
326
|
+
# 检查或创建 collection
|
|
327
|
+
try:
|
|
328
|
+
collections = self.client.get_collections()
|
|
329
|
+
collection_exists = any(col.name == collection_name for col in collections.collections)
|
|
330
|
+
|
|
331
|
+
if not collection_exists:
|
|
332
|
+
self.client.create_collection(
|
|
333
|
+
collection_name=collection_name,
|
|
334
|
+
vectors_config=VectorParams(
|
|
335
|
+
size=dimension,
|
|
336
|
+
distance=Distance.COSINE
|
|
337
|
+
)
|
|
338
|
+
)
|
|
339
|
+
# 为 record_id 创建索引,用于过滤查询
|
|
340
|
+
try:
|
|
341
|
+
self.client.create_payload_index(
|
|
342
|
+
collection_name=collection_name,
|
|
343
|
+
field_name="record_id",
|
|
344
|
+
field_schema=PayloadSchemaType.KEYWORD
|
|
345
|
+
)
|
|
346
|
+
print(f"✓ Qdrant Collection 创建: {collection_name} (维度: {dimension})")
|
|
347
|
+
except Exception as e:
|
|
348
|
+
logger.warning(f"创建 record_id 索引失败(可能已存在): {str(e)}")
|
|
349
|
+
print(f"✓ Qdrant Collection 创建: {collection_name} (维度: {dimension})")
|
|
350
|
+
else:
|
|
351
|
+
print(f"✓ Qdrant Collection 存在: {collection_name}")
|
|
352
|
+
# 确保 record_id 索引存在(如果不存在则创建)
|
|
353
|
+
try:
|
|
354
|
+
self.client.create_payload_index(
|
|
355
|
+
collection_name=collection_name,
|
|
356
|
+
field_name="record_id",
|
|
357
|
+
field_schema=PayloadSchemaType.KEYWORD
|
|
358
|
+
)
|
|
359
|
+
except Exception as e:
|
|
360
|
+
# 索引可能已存在,忽略错误
|
|
361
|
+
logger.debug(f"record_id 索引可能已存在: {str(e)}")
|
|
362
|
+
|
|
363
|
+
logger.info(f"Qdrant 连接成功: {url}/{collection_name}")
|
|
364
|
+
except Exception as e:
|
|
365
|
+
print(f"✗ Qdrant 连接失败: {str(e)}")
|
|
366
|
+
logger.error(f"Qdrant 连接失败: {str(e)}")
|
|
367
|
+
raise
|
|
368
|
+
|
|
369
|
+
def write(self, data: List[Dict[str, Any]], metadata: Dict[str, Any]) -> bool:
|
|
370
|
+
try:
|
|
371
|
+
# 如果 metadata 中有 record_id,先删除相同 record_id 的现有记录
|
|
372
|
+
record_id = metadata.get('record_id')
|
|
373
|
+
if record_id:
|
|
374
|
+
try:
|
|
375
|
+
# 查询并删除相同 record_id 的所有记录
|
|
376
|
+
# 使用字典格式的 filter(兼容性更好)
|
|
377
|
+
scroll_result = self.client.scroll(
|
|
378
|
+
collection_name=self.collection_name,
|
|
379
|
+
scroll_filter={
|
|
380
|
+
"must": [
|
|
381
|
+
{
|
|
382
|
+
"key": "record_id",
|
|
383
|
+
"match": {"value": record_id}
|
|
384
|
+
}
|
|
385
|
+
]
|
|
386
|
+
},
|
|
387
|
+
limit=10000 # 假设单次最多删除 10000 条
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
if scroll_result[0]: # 有记录
|
|
391
|
+
point_ids = [point.id for point in scroll_result[0]]
|
|
392
|
+
self.client.delete(
|
|
393
|
+
collection_name=self.collection_name,
|
|
394
|
+
points_selector=point_ids
|
|
395
|
+
)
|
|
396
|
+
print(f" ✓ 删除现有记录: record_id={record_id}, 删除 {len(point_ids)} 条")
|
|
397
|
+
logger.info(f"删除 Qdrant 现有记录: record_id={record_id}, 删除 {len(point_ids)} 条")
|
|
398
|
+
else:
|
|
399
|
+
print(f" → 准备写入记录: record_id={record_id}")
|
|
400
|
+
except Exception as e:
|
|
401
|
+
print(f" ! 删除现有记录失败: {str(e)}")
|
|
402
|
+
logger.warning(f"删除 Qdrant 现有记录失败: record_id={record_id}, {str(e)}")
|
|
403
|
+
# 继续执行写入,不因为删除失败而中断
|
|
404
|
+
else:
|
|
405
|
+
print(f" → 没有 record_id")
|
|
406
|
+
logger.warning(f"没有 record_id")
|
|
407
|
+
return False
|
|
408
|
+
|
|
409
|
+
points = []
|
|
410
|
+
for item in data:
|
|
411
|
+
# 获取元素级别的 metadata
|
|
412
|
+
element_metadata = item.get('metadata', {})
|
|
413
|
+
|
|
414
|
+
if 'embeddings' in item and item['embeddings']:
|
|
415
|
+
element_id = item.get('element_id') or item.get('id') or str(uuid.uuid4())
|
|
416
|
+
|
|
417
|
+
# 构建 payload(元数据)
|
|
418
|
+
payload = {
|
|
419
|
+
'text': item.get('text', ''),
|
|
420
|
+
'record_id': record_id,
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
# 合并文件级别的 metadata 和元素级别的 metadata
|
|
424
|
+
# 文件级别的 metadata 优先级更高
|
|
425
|
+
merged_metadata = {**element_metadata, **metadata}
|
|
426
|
+
|
|
427
|
+
# 将 metadata 中的字段添加到 payload
|
|
428
|
+
# 排除已存在的固定字段,避免冲突
|
|
429
|
+
fixed_fields = {'embeddings', 'text', 'element_id', 'record_id', 'created_at', 'metadata'}
|
|
430
|
+
for key, value in merged_metadata.items():
|
|
431
|
+
if key not in fixed_fields:
|
|
432
|
+
# 特殊处理 data_source 字段:如果是字典则递归展平
|
|
433
|
+
if key == 'data_source' and isinstance(value, dict):
|
|
434
|
+
# 递归展平 data_source 字典,包括嵌套的字典
|
|
435
|
+
flattened = _flatten_dict(value, 'data_source', fixed_fields)
|
|
436
|
+
payload.update(flattened)
|
|
437
|
+
elif key == 'coordinates' and isinstance(value, list):
|
|
438
|
+
payload[key] = value
|
|
439
|
+
elif isinstance(value, (dict, list)):
|
|
440
|
+
# Qdrant 支持 JSON 格式的 payload
|
|
441
|
+
payload[key] = value
|
|
442
|
+
else:
|
|
443
|
+
payload[key] = value
|
|
444
|
+
|
|
445
|
+
# 创建 Point(id 是必需的)
|
|
446
|
+
# Qdrant 的 point id 可以是整数或 UUID 字符串
|
|
447
|
+
# 如果 element_id 是 UUID 格式,直接使用;否则转换为 UUID5(基于 element_id 生成稳定的 UUID)
|
|
448
|
+
try:
|
|
449
|
+
# 尝试将 element_id 解析为 UUID
|
|
450
|
+
point_id = str(uuid.UUID(element_id))
|
|
451
|
+
except (ValueError, TypeError):
|
|
452
|
+
# 如果不是有效的 UUID,使用 UUID5 基于 element_id 生成稳定的 UUID
|
|
453
|
+
point_id = str(uuid.uuid5(uuid.NAMESPACE_URL, str(element_id)))
|
|
454
|
+
|
|
455
|
+
point = PointStruct(
|
|
456
|
+
id=point_id,
|
|
457
|
+
vector=item['embeddings'],
|
|
458
|
+
payload=payload
|
|
459
|
+
)
|
|
460
|
+
points.append(point)
|
|
461
|
+
|
|
462
|
+
if not points:
|
|
463
|
+
print(f" ! 警告: 没有有效的向量数据")
|
|
464
|
+
return False
|
|
465
|
+
|
|
466
|
+
# 批量插入
|
|
467
|
+
self.client.upsert(
|
|
468
|
+
collection_name=self.collection_name,
|
|
469
|
+
points=points
|
|
470
|
+
)
|
|
471
|
+
print(f" ✓ 写入 Qdrant: {len(points)} 条")
|
|
472
|
+
logger.info(f"写入 Qdrant 成功: {len(points)} 条")
|
|
473
|
+
return True
|
|
474
|
+
except Exception as e:
|
|
475
|
+
print(f" ✗ 写入 Qdrant 失败: {str(e)}")
|
|
476
|
+
logger.error(f"写入 Qdrant 失败: {str(e)}")
|
|
477
|
+
return False
|
|
478
|
+
|
|
479
|
+
|
|
299
480
|
__all__ = [
|
|
300
481
|
'Destination',
|
|
301
482
|
'MilvusDestination',
|
|
483
|
+
'QdrantDestination',
|
|
302
484
|
'LocalDestination',
|
|
303
485
|
'S3Destination',
|
|
304
486
|
]
|
|
@@ -12,7 +12,7 @@ import requests
|
|
|
12
12
|
|
|
13
13
|
from .config import ParseConfig, ChunkConfig, EmbedConfig, Stage, PipelineStats, PipelineConfig
|
|
14
14
|
from .sources import Source, S3Source, LocalSource, FtpSource, SmbSource
|
|
15
|
-
from .destinations import Destination, MilvusDestination, LocalDestination, S3Destination
|
|
15
|
+
from .destinations import Destination, MilvusDestination, QdrantDestination, LocalDestination, S3Destination
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
@@ -145,6 +145,14 @@ class Pipeline:
|
|
|
145
145
|
'dimension': self.destination.dimension
|
|
146
146
|
})
|
|
147
147
|
# api_key 和 token 不在对象中保存,无法恢复
|
|
148
|
+
elif isinstance(self.destination, QdrantDestination):
|
|
149
|
+
config['destination'].update({
|
|
150
|
+
'url': self.destination.url,
|
|
151
|
+
'collection_name': self.destination.collection_name,
|
|
152
|
+
'dimension': self.destination.dimension,
|
|
153
|
+
'prefer_grpc': getattr(self.destination, 'prefer_grpc', False)
|
|
154
|
+
})
|
|
155
|
+
# api_key 不在对象中保存,无法恢复
|
|
148
156
|
elif isinstance(self.destination, LocalDestination):
|
|
149
157
|
config['destination'].update({
|
|
150
158
|
'output_dir': str(self.destination.output_dir)
|
|
@@ -503,6 +511,14 @@ def create_pipeline_from_config(config: Dict[str, Any]) -> Pipeline:
|
|
|
503
511
|
api_key=dest_config.get('api_key'),
|
|
504
512
|
token=dest_config.get('token')
|
|
505
513
|
)
|
|
514
|
+
elif dest_config['type'] == 'qdrant':
|
|
515
|
+
destination = QdrantDestination(
|
|
516
|
+
url=dest_config['url'],
|
|
517
|
+
collection_name=dest_config['collection_name'],
|
|
518
|
+
dimension=dest_config['dimension'],
|
|
519
|
+
api_key=dest_config.get('api_key'),
|
|
520
|
+
prefer_grpc=dest_config.get('prefer_grpc', False)
|
|
521
|
+
)
|
|
506
522
|
elif dest_config['type'] == 'local':
|
|
507
523
|
destination = LocalDestination(
|
|
508
524
|
output_dir=dest_config['output_dir']
|
|
@@ -11,7 +11,7 @@ from datetime import datetime, timezone
|
|
|
11
11
|
from email.utils import parsedate_to_datetime
|
|
12
12
|
from fnmatch import fnmatch
|
|
13
13
|
from pathlib import Path
|
|
14
|
-
from typing import List, Dict, Any, Tuple
|
|
14
|
+
from typing import List, Dict, Any, Tuple, Optional
|
|
15
15
|
|
|
16
16
|
from smb.SMBConnection import SMBConnection
|
|
17
17
|
from botocore.config import Config
|
|
@@ -20,6 +20,56 @@ from botocore.config import Config
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
def _normalize_wildcard_patterns(pattern: Optional[List[str]]) -> Optional[List[str]]:
|
|
24
|
+
"""规范化通配符模式列表
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
pattern: 通配符模式列表,如果为 None 或空列表则返回 None(表示匹配所有文件)
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
通配符模式列表,如果 pattern 是 None、空列表或包含 "*" 则返回 None(表示匹配所有文件)
|
|
31
|
+
"""
|
|
32
|
+
if pattern is None or not pattern:
|
|
33
|
+
return None # None 表示匹配所有文件
|
|
34
|
+
|
|
35
|
+
if not isinstance(pattern, list):
|
|
36
|
+
raise ValueError(f"pattern 类型错误: {type(pattern)}")
|
|
37
|
+
|
|
38
|
+
# 过滤空字符串并去除空格
|
|
39
|
+
normalized = [p.strip() for p in pattern if p and p.strip()]
|
|
40
|
+
|
|
41
|
+
if not normalized:
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
# 如果包含 "*",直接返回 None(匹配所有文件,减少后续开销)
|
|
45
|
+
if '*' in normalized:
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
return normalized
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _match_file_extension(file_path: str, wildcard_patterns: Optional[List[str]]) -> bool:
|
|
52
|
+
"""检查文件路径是否匹配通配符模式
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
file_path: 文件路径
|
|
56
|
+
wildcard_patterns: 已规范化的通配符模式列表(如 ['*.pdf', '*.docx'])
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
如果匹配返回 True,否则返回 False
|
|
60
|
+
"""
|
|
61
|
+
# 如果 wildcard_patterns 是 None 或空列表,匹配所有文件
|
|
62
|
+
if wildcard_patterns is None:
|
|
63
|
+
return True
|
|
64
|
+
|
|
65
|
+
# 检查是否匹配任何一个通配符模式
|
|
66
|
+
for wildcard_pattern in wildcard_patterns:
|
|
67
|
+
if fnmatch(file_path, wildcard_pattern):
|
|
68
|
+
return True
|
|
69
|
+
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
|
|
23
73
|
def _to_millis_timestamp_string(timestamp):
|
|
24
74
|
"""将时间戳转换为毫秒时间戳字符串
|
|
25
75
|
|
|
@@ -62,11 +112,11 @@ class S3Source(Source):
|
|
|
62
112
|
"""S3/MinIO 数据源"""
|
|
63
113
|
|
|
64
114
|
def __init__(self, endpoint: str, access_key: str, secret_key: str,
|
|
65
|
-
bucket: str, prefix: str = '', region: str = 'us-east-1', pattern: str =
|
|
115
|
+
bucket: str, prefix: str = '', region: str = 'us-east-1', pattern: Optional[List[str]] = None, recursive: bool = False):
|
|
66
116
|
self.endpoint = endpoint
|
|
67
117
|
self.bucket = bucket
|
|
68
118
|
self.prefix = prefix
|
|
69
|
-
self.pattern = pattern
|
|
119
|
+
self.pattern = _normalize_wildcard_patterns(pattern) # 在初始化时规范化
|
|
70
120
|
self.recursive = recursive
|
|
71
121
|
|
|
72
122
|
if self.endpoint == 'https://textin-minio-api.ai.intsig.net':
|
|
@@ -108,7 +158,7 @@ class S3Source(Source):
|
|
|
108
158
|
key = obj['Key']
|
|
109
159
|
if key.endswith('/') or key.endswith('empty.tmp'):
|
|
110
160
|
continue
|
|
111
|
-
if
|
|
161
|
+
if _match_file_extension(key, self.pattern):
|
|
112
162
|
files.append(key)
|
|
113
163
|
|
|
114
164
|
# 非递归模式下,CommonPrefixes 包含子目录,我们忽略它们
|
|
@@ -156,9 +206,9 @@ class S3Source(Source):
|
|
|
156
206
|
class LocalSource(Source):
|
|
157
207
|
"""本地文件系统数据源"""
|
|
158
208
|
|
|
159
|
-
def __init__(self, directory: str, pattern: str =
|
|
209
|
+
def __init__(self, directory: str, pattern: Optional[List[str]] = None, recursive: bool = False):
|
|
160
210
|
self.directory = Path(directory)
|
|
161
|
-
self.pattern = pattern
|
|
211
|
+
self.pattern = _normalize_wildcard_patterns(pattern) # 在初始化时规范化
|
|
162
212
|
self.recursive = recursive
|
|
163
213
|
|
|
164
214
|
if not self.directory.exists():
|
|
@@ -168,20 +218,29 @@ class LocalSource(Source):
|
|
|
168
218
|
logger.info(f"本地目录: {self.directory}")
|
|
169
219
|
|
|
170
220
|
def list_files(self) -> List[str]:
|
|
221
|
+
all_files = []
|
|
222
|
+
# 匹配所有文件
|
|
171
223
|
if self.recursive:
|
|
172
|
-
|
|
173
|
-
files = [
|
|
224
|
+
all_files.extend([
|
|
174
225
|
str(f.relative_to(self.directory))
|
|
175
|
-
for f in self.directory.rglob(
|
|
226
|
+
for f in self.directory.rglob('*')
|
|
176
227
|
if f.is_file()
|
|
177
|
-
]
|
|
228
|
+
])
|
|
178
229
|
else:
|
|
179
|
-
|
|
180
|
-
files = [
|
|
230
|
+
all_files.extend([
|
|
181
231
|
str(f.relative_to(self.directory))
|
|
182
|
-
for f in self.directory.glob(
|
|
232
|
+
for f in self.directory.glob('*')
|
|
183
233
|
if f.is_file()
|
|
184
|
-
]
|
|
234
|
+
])
|
|
235
|
+
|
|
236
|
+
files = []
|
|
237
|
+
if self.pattern is not None:
|
|
238
|
+
for file in all_files:
|
|
239
|
+
if _match_file_extension(file, self.pattern):
|
|
240
|
+
files.append(file)
|
|
241
|
+
else:
|
|
242
|
+
files.extend(all_files)
|
|
243
|
+
|
|
185
244
|
print(f"✓ 本地找到 {len(files)} 个文件")
|
|
186
245
|
return files
|
|
187
246
|
|
|
@@ -217,12 +276,12 @@ class LocalSource(Source):
|
|
|
217
276
|
class FtpSource(Source):
|
|
218
277
|
"""FTP 数据源"""
|
|
219
278
|
|
|
220
|
-
def __init__(self, host: str, port: int, username: str, password: str, pattern: str =
|
|
279
|
+
def __init__(self, host: str, port: int, username: str, password: str, pattern: Optional[List[str]] = None, recursive: bool = False):
|
|
221
280
|
self.host = host
|
|
222
281
|
self.port = port
|
|
223
282
|
self.username = username
|
|
224
283
|
self.password = password
|
|
225
|
-
self.pattern = pattern
|
|
284
|
+
self.pattern = _normalize_wildcard_patterns(pattern) # 在初始化时规范化
|
|
226
285
|
self.recursive = recursive
|
|
227
286
|
|
|
228
287
|
self.client = ftplib.FTP()
|
|
@@ -288,12 +347,12 @@ class FtpSource(Source):
|
|
|
288
347
|
except:
|
|
289
348
|
# 不是目录,是文件
|
|
290
349
|
relative_path = full_path.lstrip('/')
|
|
291
|
-
if
|
|
350
|
+
if _match_file_extension(relative_path, self.pattern):
|
|
292
351
|
files.append(relative_path)
|
|
293
352
|
else:
|
|
294
353
|
# 是文件
|
|
295
354
|
relative_path = full_path.lstrip('/')
|
|
296
|
-
if
|
|
355
|
+
if _match_file_extension(relative_path, self.pattern):
|
|
297
356
|
files.append(relative_path)
|
|
298
357
|
|
|
299
358
|
# 恢复原始目录
|
|
@@ -325,7 +384,7 @@ class FtpSource(Source):
|
|
|
325
384
|
item_type = item_info.get('type', 'unknown')
|
|
326
385
|
# 只添加文件,排除目录
|
|
327
386
|
if item_type == 'file' or (item_type == 'unknown' and not item_info.get('type', '').startswith('dir')):
|
|
328
|
-
if
|
|
387
|
+
if _match_file_extension(item_name, self.pattern):
|
|
329
388
|
files.append(item_name)
|
|
330
389
|
except:
|
|
331
390
|
# 如果不支持 MLSD,使用 LIST 命令
|
|
@@ -341,7 +400,7 @@ class FtpSource(Source):
|
|
|
341
400
|
continue
|
|
342
401
|
is_dir = parts[0].startswith('d')
|
|
343
402
|
# 只添加文件,排除目录
|
|
344
|
-
if not is_dir and
|
|
403
|
+
if not is_dir and _match_file_extension(item_name, self.pattern):
|
|
345
404
|
files.append(item_name)
|
|
346
405
|
except:
|
|
347
406
|
# 最后回退到 nlst,通过尝试切换目录来判断是否为目录
|
|
@@ -357,7 +416,7 @@ class FtpSource(Source):
|
|
|
357
416
|
continue
|
|
358
417
|
except:
|
|
359
418
|
# 不能切换,说明是文件
|
|
360
|
-
if
|
|
419
|
+
if _match_file_extension(item_name, self.pattern):
|
|
361
420
|
files.append(item_name)
|
|
362
421
|
|
|
363
422
|
# 确保回到原始目录
|
|
@@ -405,7 +464,7 @@ class SmbSource(Source):
|
|
|
405
464
|
"""SMB/CIFS 数据源"""
|
|
406
465
|
|
|
407
466
|
def __init__(self, host: str, share_name: str, username: str, password: str,
|
|
408
|
-
domain: str = '', port: int = 445, path: str = '', pattern: str =
|
|
467
|
+
domain: str = '', port: int = 445, path: str = '', pattern: Optional[List[str]] = None, recursive: bool = False):
|
|
409
468
|
self.host = host
|
|
410
469
|
self.share_name = share_name
|
|
411
470
|
self.username = username
|
|
@@ -413,7 +472,7 @@ class SmbSource(Source):
|
|
|
413
472
|
self.domain = domain
|
|
414
473
|
self.port = port
|
|
415
474
|
self.path = path.strip('/').strip('\\') if path else ''
|
|
416
|
-
self.pattern = pattern
|
|
475
|
+
self.pattern = _normalize_wildcard_patterns(pattern) # 在初始化时规范化
|
|
417
476
|
self.recursive = recursive
|
|
418
477
|
|
|
419
478
|
self.conn = SMBConnection(
|
|
@@ -451,7 +510,7 @@ class SmbSource(Source):
|
|
|
451
510
|
_list_recursive(conn, share, item_path)
|
|
452
511
|
# 非递归模式:忽略子目录
|
|
453
512
|
else:
|
|
454
|
-
if
|
|
513
|
+
if _match_file_extension(relative_path, self.pattern):
|
|
455
514
|
files.append(relative_path)
|
|
456
515
|
except Exception as e:
|
|
457
516
|
logger.warning(f"列出路径失败 {current_path}: {str(e)}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xparse-client
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.10
|
|
4
4
|
Summary: 面向Agent和RAG的新一代文档处理 AI Infra
|
|
5
5
|
License-Expression: MIT
|
|
6
6
|
Project-URL: Homepage, https://gitlab.intsig.net/xparse1/xparse-pipeline
|
|
@@ -13,6 +13,7 @@ Requires-Dist: boto3
|
|
|
13
13
|
Requires-Dist: pymilvus[milvus_lite]
|
|
14
14
|
Requires-Dist: requests
|
|
15
15
|
Requires-Dist: pysmb
|
|
16
|
+
Requires-Dist: qdrant-client
|
|
16
17
|
Dynamic: license-file
|
|
17
18
|
|
|
18
19
|
# xParse
|
|
@@ -24,7 +25,7 @@ xParse的同步pipeline实现,支持多种数据源与输出。
|
|
|
24
25
|
## 🌟 特点
|
|
25
26
|
|
|
26
27
|
- **灵活的数据源**:支持兼容 S3 协议的对象存储、本地文件系统以及 FTP/SMB 协议文件系统
|
|
27
|
-
- **灵活的输出**:支持 Milvus/Zilliz 向量数据库、兼容 S3 协议的对象存储以及本地文件系统
|
|
28
|
+
- **灵活的输出**:支持 Milvus/Zilliz/Qdrant 向量数据库、兼容 S3 协议的对象存储以及本地文件系统
|
|
28
29
|
- **统一 Pipeline API**:使用 `/api/xparse/pipeline` 一次性完成 parse → chunk → embed 全流程
|
|
29
30
|
- **配置化处理**:支持灵活配置 parse、chunk、embed 参数
|
|
30
31
|
- **详细统计信息**:返回每个阶段的处理统计数据
|
|
@@ -51,7 +52,7 @@ xParse的同步pipeline实现,支持多种数据源与输出。
|
|
|
51
52
|
│ [embeddings + stats]
|
|
52
53
|
▼
|
|
53
54
|
┌──────────────┐
|
|
54
|
-
│ Destination │ 目的地(Milvus/Zilliz/本地)
|
|
55
|
+
│ Destination │ 目的地(Milvus/Zilliz/Qdrant/本地)
|
|
55
56
|
└──────────────┘
|
|
56
57
|
```
|
|
57
58
|
|
|
@@ -69,7 +70,7 @@ pip install --upgrade xparse-client
|
|
|
69
70
|
|
|
70
71
|
#### 代码配置
|
|
71
72
|
```python
|
|
72
|
-
from xparse_client import ParseConfig, ChunkConfig, EmbedConfig, Stage, Pipeline, S3Source, MilvusDestination
|
|
73
|
+
from xparse_client import ParseConfig, ChunkConfig, EmbedConfig, Stage, Pipeline, S3Source, MilvusDestination, QdrantDestination
|
|
73
74
|
|
|
74
75
|
# 使用新的 stages 格式创建配置
|
|
75
76
|
stages = [
|
|
@@ -173,7 +174,7 @@ source = S3Source(
|
|
|
173
174
|
bucket='textin',
|
|
174
175
|
prefix='',
|
|
175
176
|
region='us-east-1',
|
|
176
|
-
pattern='*.pdf' #
|
|
177
|
+
pattern=['*.pdf'] # 可选,通配符模式列表,支持多个扩展名
|
|
177
178
|
)
|
|
178
179
|
```
|
|
179
180
|
请确保配置的访问凭证至少包括以下几项权限:
|
|
@@ -193,7 +194,7 @@ source = S3Source(
|
|
|
193
194
|
bucket='textin',
|
|
194
195
|
prefix='',
|
|
195
196
|
region='cn-shanghai',
|
|
196
|
-
pattern='*.pdf' #
|
|
197
|
+
pattern=['*.pdf'] # 可选,通配符模式列表,支持多个扩展名
|
|
197
198
|
)
|
|
198
199
|
```
|
|
199
200
|
请确保配置的访问凭证至少包括以下几项权限:
|
|
@@ -214,7 +215,7 @@ source = S3Source(
|
|
|
214
215
|
bucket='textin',
|
|
215
216
|
prefix='',
|
|
216
217
|
region='ap-shanghai',
|
|
217
|
-
pattern='*.pdf' #
|
|
218
|
+
pattern=['*.pdf'] # 可选,通配符模式列表,支持多个扩展名
|
|
218
219
|
)
|
|
219
220
|
```
|
|
220
221
|
|
|
@@ -235,7 +236,7 @@ source = S3Source(
|
|
|
235
236
|
bucket='textin',
|
|
236
237
|
prefix='',
|
|
237
238
|
region='cn-shanghai',
|
|
238
|
-
pattern='*.pdf' #
|
|
239
|
+
pattern=['*.pdf'] # 可选,通配符模式列表,支持多个扩展名
|
|
239
240
|
)
|
|
240
241
|
```
|
|
241
242
|
|
|
@@ -257,7 +258,7 @@ source = S3Source(
|
|
|
257
258
|
bucket='textin',
|
|
258
259
|
prefix='',
|
|
259
260
|
region='cn-east-3',
|
|
260
|
-
pattern='*.pdf' #
|
|
261
|
+
pattern=['*.pdf'] # 可选,通配符模式列表,支持多个扩展名
|
|
261
262
|
)
|
|
262
263
|
```
|
|
263
264
|
|
|
@@ -279,7 +280,7 @@ source = S3Source(
|
|
|
279
280
|
bucket='textin-xparse',
|
|
280
281
|
prefix='',
|
|
281
282
|
region='us-east-1',
|
|
282
|
-
pattern='*.pdf' #
|
|
283
|
+
pattern=['*.pdf'] # 可选,通配符模式列表,支持多个扩展名
|
|
283
284
|
)
|
|
284
285
|
```
|
|
285
286
|
请确保配置的访问凭证至少包括以下几项权限:
|
|
@@ -294,7 +295,7 @@ s3:GetObject
|
|
|
294
295
|
```python
|
|
295
296
|
source = LocalSource(
|
|
296
297
|
directory='./input',
|
|
297
|
-
pattern='*.pdf'
|
|
298
|
+
pattern=['*.pdf', '*.docx'] # 支持多个通配符模式列表
|
|
298
299
|
)
|
|
299
300
|
```
|
|
300
301
|
|
|
@@ -306,7 +307,7 @@ source = FtpSource(
|
|
|
306
307
|
port=21,
|
|
307
308
|
username='', # 用户名,按照实际填写
|
|
308
309
|
password='', # 密码,按照实际填写
|
|
309
|
-
pattern='*.pdf' #
|
|
310
|
+
pattern=['*.pdf'] # 可选,通配符模式列表,过滤指定类型文件
|
|
310
311
|
)
|
|
311
312
|
```
|
|
312
313
|
|
|
@@ -319,11 +320,11 @@ source = SmbSource(
|
|
|
319
320
|
username='', # 用户名,按照实际填写
|
|
320
321
|
password='', # 密码,按照实际填写
|
|
321
322
|
domain='your-smb-domain',
|
|
322
|
-
pattern='**/*.pdf' #
|
|
323
|
+
pattern=['**/*.pdf'] # 可选,通配符模式列表,支持多级匹配
|
|
323
324
|
)
|
|
324
325
|
```
|
|
325
326
|
|
|
326
|
-
> 注 1:所有 Source 均支持 `pattern`
|
|
327
|
+
> 注 1:所有 Source 均支持 `pattern` 参数,使用通配符模式列表(如 `['*.pdf', '*.docx']`)来过滤需要处理的文件。支持多个通配符模式,如果列表中包含 `'*'` 则匹配所有文件。默认为 `None`,即处理全部文件。
|
|
327
328
|
|
|
328
329
|
> 注 2:所有 Source 均支持 `recursive` 参数,表示是否递归遍历,默认为 `False`。
|
|
329
330
|
|
|
@@ -354,6 +355,28 @@ destination = MilvusDestination(
|
|
|
354
355
|
)
|
|
355
356
|
```
|
|
356
357
|
|
|
358
|
+
#### Qdrant 向量存储
|
|
359
|
+
|
|
360
|
+
```python
|
|
361
|
+
destination = QdrantDestination(
|
|
362
|
+
url='http://localhost:6333', # Qdrant 服务地址(本地或云端)
|
|
363
|
+
collection_name='my_collection', # Collection 名称
|
|
364
|
+
dimension=1024, # 向量维度,需与 embed API 返回一致
|
|
365
|
+
api_key='your-api-key', # 可选,Qdrant Cloud API Key
|
|
366
|
+
prefer_grpc=False # 可选,是否优先使用 gRPC(默认 False)
|
|
367
|
+
)
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
**Qdrant Cloud 示例:**
|
|
371
|
+
```python
|
|
372
|
+
destination = QdrantDestination(
|
|
373
|
+
url='https://xxxxxxx.us-east-1-0.aws.cloud.qdrant.io',
|
|
374
|
+
collection_name='my_collection',
|
|
375
|
+
dimension=1024,
|
|
376
|
+
api_key='your-api-key'
|
|
377
|
+
)
|
|
378
|
+
```
|
|
379
|
+
|
|
357
380
|
#### 本地文件系统目的地
|
|
358
381
|
|
|
359
382
|
将在配置的本地文件地址中写入`json`文件。
|
|
@@ -533,7 +556,7 @@ source = S3Source(
|
|
|
533
556
|
bucket='documents',
|
|
534
557
|
prefix='pdfs/',
|
|
535
558
|
region='us-east-1',
|
|
536
|
-
pattern='*.pdf', # 仅处理匹配的文件
|
|
559
|
+
pattern=['*.pdf'], # 仅处理匹配的文件
|
|
537
560
|
recursive=False # 不递归子目录
|
|
538
561
|
)
|
|
539
562
|
|
|
@@ -597,7 +620,7 @@ from xparse_client import (
|
|
|
597
620
|
# 手动创建 Pipeline
|
|
598
621
|
source = LocalSource(
|
|
599
622
|
directory='./test_files',
|
|
600
|
-
pattern='*.pdf',
|
|
623
|
+
pattern=['*.pdf'],
|
|
601
624
|
recursive=False
|
|
602
625
|
)
|
|
603
626
|
|
|
@@ -644,7 +667,7 @@ from xparse_client import (
|
|
|
644
667
|
# 创建本地数据源
|
|
645
668
|
source = LocalSource(
|
|
646
669
|
directory='./test_files',
|
|
647
|
-
pattern='*.pdf',
|
|
670
|
+
pattern=['*.pdf'],
|
|
648
671
|
recursive=False
|
|
649
672
|
)
|
|
650
673
|
|
|
@@ -771,7 +794,7 @@ source = FtpSource(
|
|
|
771
794
|
port=21,
|
|
772
795
|
username='user',
|
|
773
796
|
password='pass',
|
|
774
|
-
pattern='*.pdf',
|
|
797
|
+
pattern=['*.pdf'],
|
|
775
798
|
recursive=False
|
|
776
799
|
)
|
|
777
800
|
|
|
@@ -831,7 +854,7 @@ from xparse_client import (
|
|
|
831
854
|
# 创建 Pipeline
|
|
832
855
|
source = LocalSource(
|
|
833
856
|
directory='./docs',
|
|
834
|
-
pattern='*.pdf',
|
|
857
|
+
pattern=['*.pdf'],
|
|
835
858
|
recursive=False
|
|
836
859
|
)
|
|
837
860
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
example/run_pipeline.py,sha256=d4pPDqjiC9dPNh6nmArPOF7fPMY0a-jcvdgtNuV-_kM,15795
|
|
2
|
+
example/run_pipeline_test.py,sha256=pxsNiq_LmP6M4R7tTuja0u-Lu7fW-wIBU1uBf0-agQI,14845
|
|
3
|
+
xparse_client/__init__.py,sha256=C2XLxkCoONl6_B1FmDhWRw84TqOL4pZF20br-K26SSY,1721
|
|
4
|
+
xparse_client/pipeline/__init__.py,sha256=TVlb2AGCNKP0jrv3p4ZLZCPKp68hTVMFi00DTdi6QAo,49
|
|
5
|
+
xparse_client/pipeline/config.py,sha256=FFYq2a0dBWBEj70s2aInXOiQ5MwwHimd6SI2_tkp52w,4138
|
|
6
|
+
xparse_client/pipeline/destinations.py,sha256=9UyZ8Ygjoe4yAq6-VZNZBoNYRbb3mahify3c1AdOHMY,20775
|
|
7
|
+
xparse_client/pipeline/pipeline.py,sha256=ZspagUjiL5wnzGJq6A7riOU8qGXJMtg1fqPm9H09mkk,27272
|
|
8
|
+
xparse_client/pipeline/sources.py,sha256=D-kLrSQ-qsFFFq7JC4sL3Y3Q3Q87Wcpv9R5K85YkDjE,22144
|
|
9
|
+
xparse_client-0.2.10.dist-info/licenses/LICENSE,sha256=ckIP-MbocsP9nqYnta5KgfAicYF196B5TNdHIR6kOO0,1075
|
|
10
|
+
xparse_client-0.2.10.dist-info/METADATA,sha256=gIY_PxB1pTxSlKJZjU7z1Iua6ZMtAfMfHFeztWp2zIw,28785
|
|
11
|
+
xparse_client-0.2.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
12
|
+
xparse_client-0.2.10.dist-info/top_level.txt,sha256=bfX8BWo1sEEQVsI4Ql4Uu80vrfEh5zfajU9YqFTzxMo,22
|
|
13
|
+
xparse_client-0.2.10.dist-info/RECORD,,
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
example/run_pipeline.py,sha256=ybAWBPXcQClRk1HOMySLi9IUPIs1Qn-S5HXNLbNJHjs,15459
|
|
2
|
-
example/run_pipeline_test.py,sha256=uIU09FTv_VnTQS1Lc94ydc3kaD86eHkaHQbVXpsGEcA,14861
|
|
3
|
-
xparse_client/__init__.py,sha256=je1ena3HwLL4CRtLU4r6EAzoOIJthlPjTwshxZnzQDM,1677
|
|
4
|
-
xparse_client/pipeline/__init__.py,sha256=TVlb2AGCNKP0jrv3p4ZLZCPKp68hTVMFi00DTdi6QAo,49
|
|
5
|
-
xparse_client/pipeline/config.py,sha256=FFYq2a0dBWBEj70s2aInXOiQ5MwwHimd6SI2_tkp52w,4138
|
|
6
|
-
xparse_client/pipeline/destinations.py,sha256=F0z1AgVIBOn0m32i4l7LCMkJE0IbBdlpykO_at_wLaE,11931
|
|
7
|
-
xparse_client/pipeline/pipeline.py,sha256=IRTxN4YUJi9Wrm1G1ysGvcwsPsGh0inbquBH3nWYmAA,26477
|
|
8
|
-
xparse_client/pipeline/sources.py,sha256=UeVbWv6n0wQkIZIBBhrFCiyydQX7cvwmkoMgcf12p9g,19940
|
|
9
|
-
xparse_client-0.2.8.dist-info/licenses/LICENSE,sha256=ckIP-MbocsP9nqYnta5KgfAicYF196B5TNdHIR6kOO0,1075
|
|
10
|
-
xparse_client-0.2.8.dist-info/METADATA,sha256=LX8TfLSbFZerGPhh16x5QK1lwrPh55CYKNLhr2kdBcY,27850
|
|
11
|
-
xparse_client-0.2.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
12
|
-
xparse_client-0.2.8.dist-info/top_level.txt,sha256=bfX8BWo1sEEQVsI4Ql4Uu80vrfEh5zfajU9YqFTzxMo,22
|
|
13
|
-
xparse_client-0.2.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|