unstructured-ingest 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/{databricks_tests → databricks}/test_volumes_native.py +75 -19
- test/integration/connectors/sql/test_postgres.py +6 -2
- test/integration/connectors/sql/test_singlestore.py +6 -2
- test/integration/connectors/sql/test_snowflake.py +6 -2
- test/integration/connectors/sql/test_sqlite.py +6 -2
- test/integration/connectors/test_milvus.py +13 -0
- test/integration/connectors/test_onedrive.py +6 -0
- test/integration/connectors/test_redis.py +119 -0
- test/integration/connectors/test_vectara.py +270 -0
- test/integration/embedders/test_bedrock.py +28 -0
- test/integration/embedders/test_octoai.py +14 -0
- test/integration/embedders/test_openai.py +13 -0
- test/integration/embedders/test_togetherai.py +10 -0
- test/integration/partitioners/test_partitioner.py +2 -2
- test/unit/embed/test_octoai.py +8 -1
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/embed/bedrock.py +39 -11
- unstructured_ingest/embed/interfaces.py +5 -0
- unstructured_ingest/embed/octoai.py +44 -3
- unstructured_ingest/embed/openai.py +37 -1
- unstructured_ingest/embed/togetherai.py +28 -1
- unstructured_ingest/embed/voyageai.py +33 -1
- unstructured_ingest/v2/errors.py +18 -0
- unstructured_ingest/v2/interfaces/file_data.py +11 -1
- unstructured_ingest/v2/processes/connectors/__init__.py +7 -0
- unstructured_ingest/v2/processes/connectors/astradb.py +2 -0
- unstructured_ingest/v2/processes/connectors/chroma.py +0 -1
- unstructured_ingest/v2/processes/connectors/couchbase.py +2 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +5 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +2 -2
- unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +2 -2
- unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +2 -2
- unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +2 -2
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +1 -1
- unstructured_ingest/v2/processes/connectors/kafka/cloud.py +5 -2
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py +14 -3
- unstructured_ingest/v2/processes/connectors/milvus.py +15 -6
- unstructured_ingest/v2/processes/connectors/mongodb.py +3 -4
- unstructured_ingest/v2/processes/connectors/neo4j.py +2 -0
- unstructured_ingest/v2/processes/connectors/onedrive.py +79 -25
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +0 -1
- unstructured_ingest/v2/processes/connectors/redisdb.py +182 -0
- unstructured_ingest/v2/processes/connectors/sql/sql.py +5 -0
- unstructured_ingest/v2/processes/connectors/vectara.py +350 -0
- unstructured_ingest/v2/unstructured_api.py +25 -2
- {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/METADATA +20 -16
- {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/RECORD +52 -48
- test/integration/connectors/test_kafka.py +0 -304
- /test/integration/connectors/{databricks_tests → databricks}/__init__.py +0 -0
- {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/top_level.txt +0 -0
|
@@ -10,17 +10,18 @@ test/integration/connectors/test_azure_ai_search.py,sha256=EGV-G_Lq3h6pHhhmmQGWj
|
|
|
10
10
|
test/integration/connectors/test_chroma.py,sha256=KQCzBJsOHAOtg0Ehp0tNtuYchFtiSmhHDKyOju33kJg,3686
|
|
11
11
|
test/integration/connectors/test_confluence.py,sha256=adJxIggjuO-jgMimBZdv_AqWeBFlQoodELucIYwWC98,3546
|
|
12
12
|
test/integration/connectors/test_delta_table.py,sha256=xsnJmwlWVQrccYeAtpt2lm0DYm2jGxiKXeERQXqCDCM,6884
|
|
13
|
-
test/integration/connectors/test_kafka.py,sha256=FtHLptvS9V3Br7wCm2Xyh_ulz8_wWvCOKKEd0xD9LyM,10758
|
|
14
13
|
test/integration/connectors/test_lancedb.py,sha256=U2HfIrf6iJ7lYMn-vz0j-LesVyDY-jc9QrQhlJVhG9Q,9183
|
|
15
|
-
test/integration/connectors/test_milvus.py,sha256=
|
|
14
|
+
test/integration/connectors/test_milvus.py,sha256=aRT5SpJHY4NA8pG_LcVTJwYwvLw2W_OOE-NIfDq03SE,7015
|
|
16
15
|
test/integration/connectors/test_mongodb.py,sha256=UZ4eo61MisCw4s0p7HWaediN7M-lSddMDs71RFgdmJs,12347
|
|
17
16
|
test/integration/connectors/test_neo4j.py,sha256=Esiq_Z9k1JLrWNXPmLBsX3LLwyEozwKoxX7iwMEJjRM,8252
|
|
18
|
-
test/integration/connectors/test_onedrive.py,sha256=
|
|
17
|
+
test/integration/connectors/test_onedrive.py,sha256=Bp9Ayv59JnfsjSwqbQ-zYvg-XAPGgZfKJ45Asc0y1bM,3808
|
|
19
18
|
test/integration/connectors/test_pinecone.py,sha256=suPFi40d6rHXurQQLIpCzW5XRTdgzlP-f-KLPhGCUHo,10208
|
|
20
19
|
test/integration/connectors/test_qdrant.py,sha256=hyuqSJDaylkQVxWh7byD8jo8bwPuBxSa8MWRD3sBu-Y,7906
|
|
20
|
+
test/integration/connectors/test_redis.py,sha256=Q_KAZPNE9NIoRN2UsbXtc1fe_aJg66RbSQtS3OKNpc0,4327
|
|
21
21
|
test/integration/connectors/test_s3.py,sha256=PJaAwFRF2lXMQlkbv9JHpngPc6706ML7zowOlXT3TcY,7033
|
|
22
|
-
test/integration/connectors/
|
|
23
|
-
test/integration/connectors/
|
|
22
|
+
test/integration/connectors/test_vectara.py,sha256=_FQHbhxL3f1rLV9MrHOvcljm_4qTVf5xl-Q7MplE_xs,8688
|
|
23
|
+
test/integration/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
+
test/integration/connectors/databricks/test_volumes_native.py,sha256=ig60-nCdLF0GsgJowG9eRaG28iuoYHtuf12HdK6OE1I,7764
|
|
24
25
|
test/integration/connectors/duckdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
26
|
test/integration/connectors/duckdb/conftest.py,sha256=rlBHMJTiJ2a5xbvIxTOyhhcuTBc9DO-yTzD6Kf8X3hY,301
|
|
26
27
|
test/integration/connectors/duckdb/test_duckdb.py,sha256=tZfHJYNILVqwT20XD-aJUFZ67TnJvHLpfAxNvNiE51o,2891
|
|
@@ -30,10 +31,10 @@ test/integration/connectors/elasticsearch/conftest.py,sha256=-i4_7MkIxSQENz7nuD2
|
|
|
30
31
|
test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=Lm8topVssTsqcI8H2Tzohuxb9j-CFHv9orM6WfAqCZw,11933
|
|
31
32
|
test/integration/connectors/elasticsearch/test_opensearch.py,sha256=fWpZrhzRiVpm9AOlZvgZRCjyXSYvWG7-8j06x-HR3PY,11311
|
|
32
33
|
test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
|
-
test/integration/connectors/sql/test_postgres.py,sha256=
|
|
34
|
-
test/integration/connectors/sql/test_singlestore.py,sha256=
|
|
35
|
-
test/integration/connectors/sql/test_snowflake.py,sha256
|
|
36
|
-
test/integration/connectors/sql/test_sqlite.py,sha256=
|
|
34
|
+
test/integration/connectors/sql/test_postgres.py,sha256=DXyHMZBQgrV2HyVflkoBpT1mewSnvw3ugoHtGR5o8OM,6876
|
|
35
|
+
test/integration/connectors/sql/test_singlestore.py,sha256=pzCPo8IW3c9VH-f3UdJS5MjPjkHarJPSepAxV0ZVajo,6059
|
|
36
|
+
test/integration/connectors/sql/test_snowflake.py,sha256=MiTzepeeJlv147CyzCGyd16MRk5QeUw4g4L3TTi5gVY,7400
|
|
37
|
+
test/integration/connectors/sql/test_sqlite.py,sha256=rSkjv3KpslAvt_8LQecJUT0lOLtuZSvhtlW2deJovLI,5862
|
|
37
38
|
test/integration/connectors/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
39
|
test/integration/connectors/utils/constants.py,sha256=0zSPnsZVqJuNhXduXvdXFQLZTRIQa5Fo_1qjBYVCfb8,209
|
|
39
40
|
test/integration/connectors/utils/docker.py,sha256=8uOTJ3AVG1dxK4OiLvOLfRxL_TsYQX2KKCID9TZ7-Ac,4995
|
|
@@ -50,17 +51,17 @@ test/integration/connectors/weaviate/test_local.py,sha256=SK6iEwQUKiCd0X99BEk8Gl
|
|
|
50
51
|
test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
52
|
test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
|
|
52
53
|
test/integration/embedders/test_azure_openai.py,sha256=6tFpKFBFRXD49imhhRzsvy3MPtuZ4L1PtnKyMVBRAqc,1808
|
|
53
|
-
test/integration/embedders/test_bedrock.py,sha256=
|
|
54
|
+
test/integration/embedders/test_bedrock.py,sha256=ZyS17PLaHOeh-ykrd71Jkgg_ext7aOadxvxVJ_4IvFE,2852
|
|
54
55
|
test/integration/embedders/test_huggingface.py,sha256=0mMTOO-Nh7KB70AGs_7LLQIxMYrnSPqyihriUeqACbM,1007
|
|
55
56
|
test/integration/embedders/test_mixedbread.py,sha256=RrLv8SByMNXsgrlh94RbaT-VyxZ4-DILO-OPpmOwvSI,1441
|
|
56
|
-
test/integration/embedders/test_octoai.py,sha256=
|
|
57
|
-
test/integration/embedders/test_openai.py,sha256=
|
|
58
|
-
test/integration/embedders/test_togetherai.py,sha256=
|
|
57
|
+
test/integration/embedders/test_octoai.py,sha256=oQYpYh2XaKhiqtnOSpH0rP9TQrzykZ1-3C3jZRurPu8,1734
|
|
58
|
+
test/integration/embedders/test_openai.py,sha256=s4_XGQfVpsTb4hKh2QZkXdOG_MnF5OQgL98kzNjTFCg,1664
|
|
59
|
+
test/integration/embedders/test_togetherai.py,sha256=3otyr6i9smJMyXbhKCcaC2gx813rqGaZTKi2sEM7GIQ,1707
|
|
59
60
|
test/integration/embedders/test_vertexai.py,sha256=OtoFzmrWWhGIO5Bbl5zt_4sp6qRHZxtaDQKpGcfzNLM,1345
|
|
60
61
|
test/integration/embedders/test_voyageai.py,sha256=Zqf7nn1AxfBDBr5A9Jr-5pxes4QNvfKiyeGexCCm4nY,1346
|
|
61
62
|
test/integration/embedders/utils.py,sha256=3AMKMBpgBep_0jFqrqMHH8BJo6w60kpouSZ5JPJTwIA,1850
|
|
62
63
|
test/integration/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
|
-
test/integration/partitioners/test_partitioner.py,sha256=
|
|
64
|
+
test/integration/partitioners/test_partitioner.py,sha256=MEQJbRoc01uPLT6O8CkXeQF_DXK21nz3KVJkzkBtsgM,2835
|
|
64
65
|
test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
65
66
|
test/unit/test_chunking_utils.py,sha256=0iPwfnMPpyTm-yOE0BXMnEQQP4iguS6NhOqgMQU5nhk,1390
|
|
66
67
|
test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
|
|
@@ -69,7 +70,7 @@ test/unit/test_logger.py,sha256=0SKndXE_VRd8XmUHkrj7zuBQHZscXx3ZQllMEOvtF9Y,2380
|
|
|
69
70
|
test/unit/test_utils.py,sha256=Q6mp9YZPah8z3-2lreyRbmAc7m2Y_w26_N9vocSInoA,5421
|
|
70
71
|
test/unit/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
72
|
test/unit/embed/test_mixedbreadai.py,sha256=XFNJDP5pIgF3eQYwBiuEWmH3zZWx72Wpwyv-Q4m0DJg,1332
|
|
72
|
-
test/unit/embed/test_octoai.py,sha256=
|
|
73
|
+
test/unit/embed/test_octoai.py,sha256=pouR4J6B_mrlu4TsA5yr2Ln_LCYL2pGBojXY5KEqvKI,1053
|
|
73
74
|
test/unit/embed/test_openai.py,sha256=0O1yshDcE0BMKv1yJqrNuiNLSdPhLpKqJ-D_wmnidsM,831
|
|
74
75
|
test/unit/embed/test_vertexai.py,sha256=Pl7COc9E3tf_yGidkTEmTizNGyZF1F5zuL2TgPTMnfI,1048
|
|
75
76
|
test/unit/embed/test_voyageai.py,sha256=DviCOJFhe5H4e26-kNyX3JNe8h3qB5Yl0KOe8rQEMrc,981
|
|
@@ -94,7 +95,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
94
95
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
96
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
96
97
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
97
|
-
unstructured_ingest/__version__.py,sha256=
|
|
98
|
+
unstructured_ingest/__version__.py,sha256=R522TM0FvpKddIRo55tqz-j1ENS8k4uXjk60bKhQ50M,43
|
|
98
99
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
99
100
|
unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
|
|
100
101
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -263,15 +264,15 @@ unstructured_ingest/connector/notion/types/database_properties/url.py,sha256=iXQ
|
|
|
263
264
|
unstructured_ingest/connector/notion/types/database_properties/verification.py,sha256=J_DLjY-v2T6xDGMQ7FkI0YMKMA6SG6Y3yYW7qUD1hKA,2334
|
|
264
265
|
unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
265
266
|
unstructured_ingest/embed/azure_openai.py,sha256=4YBOIxv66wVZ5EqNNC4uCDPNJ3VrsLPe5wwagT6zqe0,1001
|
|
266
|
-
unstructured_ingest/embed/bedrock.py,sha256
|
|
267
|
+
unstructured_ingest/embed/bedrock.py,sha256=qb1eo1Uooz2JxhHcqvJDTYGbuwaqyRTD0ZepQzlL9_o,4455
|
|
267
268
|
unstructured_ingest/embed/huggingface.py,sha256=2cBiQhOhfWHX3hS-eKjocysOkUaRlyRfUj9Kxjrp6cE,1934
|
|
268
|
-
unstructured_ingest/embed/interfaces.py,sha256=
|
|
269
|
+
unstructured_ingest/embed/interfaces.py,sha256=XsPtb53367KCkH-ItwWQ_EQ-sYWHaekhxkF4PwHCNXc,2210
|
|
269
270
|
unstructured_ingest/embed/mixedbreadai.py,sha256=OwFWWukvkQaXhjgs6b6N6D4w7sYrtcHNhsHAj-Bocj4,4268
|
|
270
|
-
unstructured_ingest/embed/octoai.py,sha256=
|
|
271
|
-
unstructured_ingest/embed/openai.py,sha256=
|
|
272
|
-
unstructured_ingest/embed/togetherai.py,sha256=
|
|
271
|
+
unstructured_ingest/embed/octoai.py,sha256=0LVZlbOMUuxwZV0QHhGWUlneWDX3fCklPRTuc4huze0,3007
|
|
272
|
+
unstructured_ingest/embed/openai.py,sha256=5M2idJ7Ynx_3-FXwm9mTjGnNiww0DSuZmbuvi2YAUqk,2543
|
|
273
|
+
unstructured_ingest/embed/togetherai.py,sha256=2jXYFB9QTDUlSKc_j32bMrwKu7YQA0oF893rGSmlXr8,2374
|
|
273
274
|
unstructured_ingest/embed/vertexai.py,sha256=X5bGJdXyR5nAFH_ocAVgEowmd60nOBykyfclYo3VfBM,2808
|
|
274
|
-
unstructured_ingest/embed/voyageai.py,sha256=
|
|
275
|
+
unstructured_ingest/embed/voyageai.py,sha256=BfYa-oedkq-56j5_0rDjOLy18b9zC0zagaoPHJry5xA,2958
|
|
275
276
|
unstructured_ingest/enhanced_dataclass/__init__.py,sha256=gDZOUsv5eo-8jm4Yu7DdDwi101aGbfG7JctTdOYnTOM,151
|
|
276
277
|
unstructured_ingest/enhanced_dataclass/core.py,sha256=d6aUkDynuKX87cHx9_N5UDUWrvISR4jYRFRTvd_avlI,3038
|
|
277
278
|
unstructured_ingest/enhanced_dataclass/dataclasses.py,sha256=aZMsoCzAGRb8Rmh3BTSBFtNr6FmFTY93KYGLk3gYJKQ,1949
|
|
@@ -360,10 +361,11 @@ unstructured_ingest/utils/string_and_date_utils.py,sha256=kijtPlGAbH376vVjFSo5H_
|
|
|
360
361
|
unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
|
|
361
362
|
unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
362
363
|
unstructured_ingest/v2/constants.py,sha256=pDspTYz-nEojHBqrZNfssGEiujmVa02pIWL63PQP9sU,103
|
|
364
|
+
unstructured_ingest/v2/errors.py,sha256=y1tGvobuhQdcR9vw5APuFigiQSfsQKrAYGDr4biGDdw,207
|
|
363
365
|
unstructured_ingest/v2/logger.py,sha256=wcln4s5Nyp2fjjJux9iM3d6t9aQFNJ2H1IAZXmIknjI,4323
|
|
364
366
|
unstructured_ingest/v2/main.py,sha256=WFdLEqEXRy6E9_G-dF20MK2AtgX51Aan1sp_N67U2B8,172
|
|
365
367
|
unstructured_ingest/v2/otel.py,sha256=2fGj1c7cVcC3J8NwL6MNYhyPEAXiB33DsilvRDkrdLo,4130
|
|
366
|
-
unstructured_ingest/v2/unstructured_api.py,sha256=
|
|
368
|
+
unstructured_ingest/v2/unstructured_api.py,sha256=g6AO2Vy0lpy6-ooOvdgfJvIRhearPKArp3ggIdApG8I,4514
|
|
367
369
|
unstructured_ingest/v2/utils.py,sha256=HHli5rHDBm6flUeQ_ovVDvtOdnzzL4FvNyw6jsHIJfw,2041
|
|
368
370
|
unstructured_ingest/v2/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
369
371
|
unstructured_ingest/v2/cli/cli.py,sha256=qHXIs-PcvMgDZhP1AR9iDMxh8FXBMJCEDksPBfiMULE,648
|
|
@@ -379,7 +381,7 @@ unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=7eEIkk1KU51-ZNiIfI1K
|
|
|
379
381
|
unstructured_ingest/v2/interfaces/__init__.py,sha256=9VO09XuTvyOcFF8ZDKN169fNb_uA5TAYzPsiPHOyxhQ,963
|
|
380
382
|
unstructured_ingest/v2/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
|
|
381
383
|
unstructured_ingest/v2/interfaces/downloader.py,sha256=Lj3nTY1hPA71GfNeedFVCdHdZsHLle8qrx5RtXAy9GY,2940
|
|
382
|
-
unstructured_ingest/v2/interfaces/file_data.py,sha256=
|
|
384
|
+
unstructured_ingest/v2/interfaces/file_data.py,sha256=7MyRlj5dijQsCR6W18wQ8fEgJigGKwoOYc10g9A6PSo,3834
|
|
383
385
|
unstructured_ingest/v2/interfaces/indexer.py,sha256=gsa1MLhFa82BzD2h4Yb7ons0VxRwKINZOrzvHAahwVU,846
|
|
384
386
|
unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
|
|
385
387
|
unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
|
|
@@ -406,40 +408,42 @@ unstructured_ingest/v2/processes/embedder.py,sha256=xCBpaL07WnVUOUW8SHktaf1vwBGZ
|
|
|
406
408
|
unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
|
|
407
409
|
unstructured_ingest/v2/processes/partitioner.py,sha256=agpHwB9FR8OZVQqE7zFEb0IcDPCOPA_BZjLzLF71nOY,8194
|
|
408
410
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
409
|
-
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=
|
|
411
|
+
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=bmogp1sPbRS-RndN0R8V8gY4uaTkpmNJv-035-Y5SGU,5835
|
|
410
412
|
unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XEFCdcGeJCguJU5WN2Mv9kLp5dVQ,8917
|
|
411
|
-
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=
|
|
413
|
+
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=xhUMoUdnrfAY1isZGqsV4lZUsnZNpbvgLyQWQbR4hVo,14814
|
|
412
414
|
unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6m5sxIlB6u5ebQpqCS_SJ-_amCC1KQ03EQ,11529
|
|
413
|
-
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=
|
|
415
|
+
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
|
|
414
416
|
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=-Y1OU_ZXhZQNj5NH3EN01CP8QKKZJaJ9xkXoAlSgnIk,7604
|
|
415
|
-
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=
|
|
417
|
+
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=i7vuNKsUkN93JRVmg4--MO0ZgbjvhIqt46oYqk9zFSQ,12250
|
|
416
418
|
unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=SotSXZQ85_6TO906YvFi3yTml8jE9A_zV6nBJ4oTx8A,7075
|
|
417
419
|
unstructured_ingest/v2/processes/connectors/gitlab.py,sha256=ufE65Z8q_tC4oppGg5BsGXwSaL7RbEXcaagJQYsylNo,9984
|
|
418
420
|
unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=5k7pdAzJGXSdyPCzW9vu2OaAjGVTo2JevDyGaXM1Hvk,13370
|
|
419
421
|
unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=VRDAiou_7oWOIAgQTdOGQWxudzQEDopXM8XkfkQ2j6g,5004
|
|
420
422
|
unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWoaQyp9zp0WVqAywMaHJ2kcAc,7153
|
|
421
|
-
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=
|
|
422
|
-
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=
|
|
423
|
-
unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=
|
|
424
|
-
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=
|
|
423
|
+
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
|
|
424
|
+
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
|
|
425
|
+
unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=QTw_Kq1_kHMdqsaBST6yW8vl-SYXVQFlIofDP1W_IuI,14250
|
|
426
|
+
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=d6gC40YmfqBNXxizAt4MO4OOu5BoCZ7SAe1AbNwTP0E,18322
|
|
425
427
|
unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
|
|
426
428
|
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=cohF7gBj0opSGKXlENSdGfTtyIKMHd1pwu4ydeb7JAY,10605
|
|
429
|
+
unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bWzpScvVbLTVlI3DzsCNUKiBI5M,6757
|
|
427
430
|
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
|
|
428
431
|
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=Ndn2Wm7RupfjAtlLxxQwJueeE0V8aGMbNVPuFq9nqdQ,19730
|
|
429
432
|
unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
|
|
430
433
|
unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
|
|
434
|
+
unstructured_ingest/v2/processes/connectors/vectara.py,sha256=BlI_4nkpNR99aYxDd9eusm5LQsVB9EI0r-5Kc1D7pgQ,12255
|
|
431
435
|
unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=jO71UTC7bLA_N12CrLWJzh_yZML5gfT7VohxzCpUGWg,1848
|
|
432
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=
|
|
433
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=
|
|
434
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=
|
|
435
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=
|
|
436
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=
|
|
436
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=kI_ThB5e-DS8-GiQP5TQ8cP3fiGRm-V2AuNlGoSjH6I,6613
|
|
437
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=TA2e_1SIr4VaEI62873eyReCNfgmQ51_2Pko2I04pPM,2747
|
|
438
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=cb-EUW0T-linZMkbU6AcKEGWnFHQvhpO5Abtps4P2X0,3532
|
|
439
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=tR8NubkyHw49IpW_42g6w1Koxlm56EPiPf1lB-eoRSI,2783
|
|
440
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=dJLD1fueXf8_0AfC4cg0G7siJZVefz68iuEx2Kq7rMs,2890
|
|
437
441
|
unstructured_ingest/v2/processes/connectors/duckdb/__init__.py,sha256=5sVvJCWhU-YkjHIwk4W6BZCanFYK5W4xTpWtQ8xzeB4,561
|
|
438
442
|
unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=FVblIddorGCh9D9GZ8zLVUm8n39PJA5JLoJeWd-tSy8,2610
|
|
439
443
|
unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=oUHHaLpO2pWW2Lu4Mc-XFjrA0ze97205WQ_xP95ua4M,4296
|
|
440
444
|
unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=mU5x6SnbFgRsVicNGh4y4gtR6ek7eQFinI0dQQmzMds,4481
|
|
441
445
|
unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py,sha256=Zzc0JNPP-eFqpwWw1Gp-XC8H-s__IgkYKzoagECycZY,829
|
|
442
|
-
unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=
|
|
446
|
+
unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=MEKU64OsiQmbLPb3ken-WWCIV6-pnFbs_6kjJweG-SY,18813
|
|
443
447
|
unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=qRz8Fyr2RSZIPZGkhPeme6AZxM0aX-c_xOa1ZtSr2Kg,6781
|
|
444
448
|
unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
|
|
445
449
|
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=NWsouhaxeyxrS_WlZJ70X2YIdioFH5LSaRLhnCPYAH0,6034
|
|
@@ -451,8 +455,8 @@ unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=l3TRKPEb0AJ7e0VS
|
|
|
451
455
|
unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=JsxXKXNI20mdgwR_A6Rnf4u8fsFwLe3AkJmIe_3NEKY,6150
|
|
452
456
|
unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
|
|
453
457
|
unstructured_ingest/v2/processes/connectors/kafka/__init__.py,sha256=mQJ9Ex-QCfhz-BB5YWTfbPf7xGLd1i7FpjRr0ukbhNw,754
|
|
454
|
-
unstructured_ingest/v2/processes/connectors/kafka/cloud.py,sha256=
|
|
455
|
-
unstructured_ingest/v2/processes/connectors/kafka/kafka.py,sha256=
|
|
458
|
+
unstructured_ingest/v2/processes/connectors/kafka/cloud.py,sha256=GdAeQ8Uz-6v1C5byBHtjfevVfbzW3obScBFFLRTb0ps,3441
|
|
459
|
+
unstructured_ingest/v2/processes/connectors/kafka/kafka.py,sha256=UfS41jzV9VxekS6AwWHhURJmJ7RUAw5iiIrj75BWrXQ,10255
|
|
456
460
|
unstructured_ingest/v2/processes/connectors/kafka/local.py,sha256=lUkmfbTxyQW87CXxbJaijIT6foV09Gi-IG9o08OgiEs,2581
|
|
457
461
|
unstructured_ingest/v2/processes/connectors/lancedb/__init__.py,sha256=LW37xZrn48JeHluRNulLTreUPdaF-ZU81F7MCUHcCv8,1253
|
|
458
462
|
unstructured_ingest/v2/processes/connectors/lancedb/aws.py,sha256=eeXWsh8UeVm1Ur53C4MEnpLplfO8U91KYgk--0kk5pE,1413
|
|
@@ -464,22 +468,22 @@ unstructured_ingest/v2/processes/connectors/lancedb/local.py,sha256=_7-6iO6B60gA
|
|
|
464
468
|
unstructured_ingest/v2/processes/connectors/qdrant/__init__.py,sha256=xM19uYzAuGizVoZIM_hnVZ5AcBN69aOBGpqZcpWPtuE,760
|
|
465
469
|
unstructured_ingest/v2/processes/connectors/qdrant/cloud.py,sha256=accJ4sNWBVWV-KiVBDBDBYYx5A9CUoikP5NCErRmfik,1624
|
|
466
470
|
unstructured_ingest/v2/processes/connectors/qdrant/local.py,sha256=cGEyv3Oy6y4BQ4DU8yhJWMpL82QYwBVdPTxxNuV127U,1588
|
|
467
|
-
unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py,sha256=
|
|
471
|
+
unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py,sha256=BHI7HYSdbS05j2vrjyDvLzVG1WfsM8osKeq-lttlybQ,5437
|
|
468
472
|
unstructured_ingest/v2/processes/connectors/qdrant/server.py,sha256=odvCZWZp8DmRxLXMR7tHhW-c7UQbix1_zpFdfXfCvKI,1613
|
|
469
473
|
unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=E16CXRBw8fZKTuXIECns5wif_I07oncBHskVxHC4p7w,1448
|
|
470
474
|
unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=BATfX1PQGT2kl8jAbdNKXTojYKJxh3pJV9-h3OBnHGo,5124
|
|
471
475
|
unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=-2E9dsdNhjAiuzeSBytBbAhljOhvQ8kN8wvlUESvLo8,5465
|
|
472
476
|
unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=TApGi5G0W0TktJFmo4QWDR3X3R-MUQTKbIxjAX_M8ZI,7402
|
|
473
|
-
unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=
|
|
477
|
+
unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=HC-qnhdpuyScKoGh50pPjkQLGSac_mOAnuB2FZwSVl0,15265
|
|
474
478
|
unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=Q5RAqn5Ccw-pbeKZLkiMn5IVw6EemCMukXzLlS7pDhc,5162
|
|
475
479
|
unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWan69KnzVELvaqX34tMhCytIa-C8EDsXVKsEo,856
|
|
476
480
|
unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
|
|
477
481
|
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
478
482
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
479
483
|
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=X1yv1H_orDQ-J965EMXhR2XaURqe8vovSi9n1fk85B4,10499
|
|
480
|
-
unstructured_ingest-0.3.
|
|
481
|
-
unstructured_ingest-0.3.
|
|
482
|
-
unstructured_ingest-0.3.
|
|
483
|
-
unstructured_ingest-0.3.
|
|
484
|
-
unstructured_ingest-0.3.
|
|
485
|
-
unstructured_ingest-0.3.
|
|
484
|
+
unstructured_ingest-0.3.12.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
485
|
+
unstructured_ingest-0.3.12.dist-info/METADATA,sha256=nNPregI5d4D8fHqXxTPkKmn7bqmfUX5RB-AcMsgj0J4,7769
|
|
486
|
+
unstructured_ingest-0.3.12.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
487
|
+
unstructured_ingest-0.3.12.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
488
|
+
unstructured_ingest-0.3.12.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
489
|
+
unstructured_ingest-0.3.12.dist-info/RECORD,,
|
|
@@ -1,304 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import os
|
|
3
|
-
import tempfile
|
|
4
|
-
import time
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
import pytest
|
|
8
|
-
from confluent_kafka import Consumer, KafkaError, KafkaException, Producer
|
|
9
|
-
from confluent_kafka.admin import AdminClient, NewTopic
|
|
10
|
-
|
|
11
|
-
from test.integration.connectors.utils.constants import (
|
|
12
|
-
DESTINATION_TAG,
|
|
13
|
-
SOURCE_TAG,
|
|
14
|
-
env_setup_path,
|
|
15
|
-
)
|
|
16
|
-
from test.integration.connectors.utils.docker_compose import docker_compose_context
|
|
17
|
-
from test.integration.connectors.utils.validation.source import (
|
|
18
|
-
SourceValidationConfigs,
|
|
19
|
-
source_connector_validation,
|
|
20
|
-
)
|
|
21
|
-
from test.integration.utils import requires_env
|
|
22
|
-
from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
|
|
23
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
24
|
-
from unstructured_ingest.v2.processes.connectors.kafka.cloud import (
|
|
25
|
-
CloudKafkaAccessConfig,
|
|
26
|
-
CloudKafkaConnectionConfig,
|
|
27
|
-
CloudKafkaDownloader,
|
|
28
|
-
CloudKafkaDownloaderConfig,
|
|
29
|
-
CloudKafkaIndexer,
|
|
30
|
-
CloudKafkaIndexerConfig,
|
|
31
|
-
)
|
|
32
|
-
from unstructured_ingest.v2.processes.connectors.kafka.local import (
|
|
33
|
-
CONNECTOR_TYPE,
|
|
34
|
-
LocalKafkaConnectionConfig,
|
|
35
|
-
LocalKafkaDownloader,
|
|
36
|
-
LocalKafkaDownloaderConfig,
|
|
37
|
-
LocalKafkaIndexer,
|
|
38
|
-
LocalKafkaIndexerConfig,
|
|
39
|
-
LocalKafkaUploader,
|
|
40
|
-
LocalKafkaUploaderConfig,
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
SEED_MESSAGES = 10
|
|
44
|
-
TOPIC = "fake-topic"
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def get_admin_client() -> AdminClient:
|
|
48
|
-
conf = {
|
|
49
|
-
"bootstrap.servers": "localhost:29092",
|
|
50
|
-
}
|
|
51
|
-
return AdminClient(conf)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
@pytest.fixture
|
|
55
|
-
def docker_compose_ctx():
|
|
56
|
-
with docker_compose_context(docker_compose_path=env_setup_path / "kafka") as ctx:
|
|
57
|
-
yield ctx
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def wait_for_topic(
|
|
61
|
-
topic: str,
|
|
62
|
-
retries: int = 10,
|
|
63
|
-
interval: int = 1,
|
|
64
|
-
exists: bool = True,
|
|
65
|
-
admin_client=None,
|
|
66
|
-
):
|
|
67
|
-
if admin_client is None:
|
|
68
|
-
admin_client = get_admin_client()
|
|
69
|
-
current_topics = admin_client.list_topics().topics
|
|
70
|
-
attempts = 0
|
|
71
|
-
while (topic not in current_topics) == exists and attempts < retries:
|
|
72
|
-
attempts += 1
|
|
73
|
-
print(
|
|
74
|
-
"Attempt {}: Waiting for topic {} to {} exist. Current topics: [{}]".format(
|
|
75
|
-
attempts, topic, "" if exists else "not", ", ".join(current_topics)
|
|
76
|
-
)
|
|
77
|
-
)
|
|
78
|
-
time.sleep(interval)
|
|
79
|
-
current_topics = admin_client.list_topics().topics
|
|
80
|
-
if (topic not in current_topics) == exists:
|
|
81
|
-
raise TimeoutError(f"Timeout out waiting for topic {topic} to exist")
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
@pytest.fixture
|
|
85
|
-
def kafka_seed_topic(docker_compose_ctx) -> str:
|
|
86
|
-
conf = {
|
|
87
|
-
"bootstrap.servers": "localhost:29092",
|
|
88
|
-
}
|
|
89
|
-
producer = Producer(conf)
|
|
90
|
-
for i in range(SEED_MESSAGES):
|
|
91
|
-
message = f"This is some text for message {i}"
|
|
92
|
-
producer.produce(topic=TOPIC, value=message)
|
|
93
|
-
producer.flush(timeout=10)
|
|
94
|
-
print(f"kafka topic {TOPIC} seeded with {SEED_MESSAGES} messages")
|
|
95
|
-
wait_for_topic(topic=TOPIC)
|
|
96
|
-
return TOPIC
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
@pytest.fixture
|
|
100
|
-
def kafka_upload_topic(docker_compose_ctx) -> str:
|
|
101
|
-
admin_client = get_admin_client()
|
|
102
|
-
admin_client.create_topics([NewTopic(TOPIC, 1, 1)])
|
|
103
|
-
return TOPIC
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
@pytest.mark.asyncio
|
|
107
|
-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
108
|
-
async def test_kafka_source_local(kafka_seed_topic: str):
|
|
109
|
-
connection_config = LocalKafkaConnectionConfig(bootstrap_server="localhost", port=29092)
|
|
110
|
-
with tempfile.TemporaryDirectory() as tempdir:
|
|
111
|
-
tempdir_path = Path(tempdir)
|
|
112
|
-
download_config = LocalKafkaDownloaderConfig(download_dir=tempdir_path)
|
|
113
|
-
indexer = LocalKafkaIndexer(
|
|
114
|
-
connection_config=connection_config,
|
|
115
|
-
index_config=LocalKafkaIndexerConfig(topic=kafka_seed_topic, num_messages_to_consume=5),
|
|
116
|
-
)
|
|
117
|
-
downloader = LocalKafkaDownloader(
|
|
118
|
-
connection_config=connection_config, download_config=download_config
|
|
119
|
-
)
|
|
120
|
-
indexer.precheck()
|
|
121
|
-
await source_connector_validation(
|
|
122
|
-
indexer=indexer,
|
|
123
|
-
downloader=downloader,
|
|
124
|
-
configs=SourceValidationConfigs(
|
|
125
|
-
test_id="kafka-local", expected_num_files=5, validate_downloaded_files=True
|
|
126
|
-
),
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
@pytest.fixture
|
|
131
|
-
def kafka_seed_topic_cloud(expected_messages: int = 5) -> int:
|
|
132
|
-
conf = {
|
|
133
|
-
"bootstrap.servers": os.environ["KAFKA_BOOTSTRAP_SERVER"],
|
|
134
|
-
"sasl.username": os.environ["KAFKA_API_KEY"],
|
|
135
|
-
"sasl.password": os.environ["KAFKA_SECRET"],
|
|
136
|
-
"sasl.mechanism": "PLAIN",
|
|
137
|
-
"security.protocol": "SASL_SSL",
|
|
138
|
-
}
|
|
139
|
-
admin_client = AdminClient(conf)
|
|
140
|
-
try:
|
|
141
|
-
res = admin_client.delete_topics([TOPIC], operation_timeout=10)
|
|
142
|
-
for topic, f in res.items():
|
|
143
|
-
f.result()
|
|
144
|
-
print(f"Topic {topic} removed")
|
|
145
|
-
wait_for_topic(TOPIC, 5, 1, False, admin_client)
|
|
146
|
-
except Exception:
|
|
147
|
-
pass
|
|
148
|
-
|
|
149
|
-
cluster_meta = admin_client.list_topics()
|
|
150
|
-
current_topics = [topic for topic in cluster_meta.topics if topic != "__consumer_offsets"]
|
|
151
|
-
|
|
152
|
-
assert TOPIC not in current_topics, f"Topic {TOPIC} shouldn't exist"
|
|
153
|
-
|
|
154
|
-
# Kafka Cloud allows to use replication_factor=1 only for Dedicated clusters.
|
|
155
|
-
topic_obj = NewTopic(TOPIC, num_partitions=1, replication_factor=3)
|
|
156
|
-
|
|
157
|
-
res = admin_client.create_topics([topic_obj], operation_timeout=10, validate_only=False)
|
|
158
|
-
for topic, f in res.items():
|
|
159
|
-
f.result()
|
|
160
|
-
|
|
161
|
-
producer = Producer(conf)
|
|
162
|
-
for i in range(expected_messages):
|
|
163
|
-
message = f"This is some text for message {i}"
|
|
164
|
-
producer.produce(topic=TOPIC, value=message)
|
|
165
|
-
producer.flush(timeout=10)
|
|
166
|
-
return expected_messages
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
@pytest.mark.asyncio
|
|
170
|
-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
171
|
-
@requires_env("KAFKA_API_KEY", "KAFKA_SECRET", "KAFKA_BOOTSTRAP_SERVER")
|
|
172
|
-
async def test_kafka_source_cloud(kafka_seed_topic_cloud: int):
|
|
173
|
-
"""
|
|
174
|
-
In order to have this test succeed, you need to create cluster on Confluent Cloud,
|
|
175
|
-
and create the API key with admin privileges. By default, user account keys have it.
|
|
176
|
-
"""
|
|
177
|
-
|
|
178
|
-
expected_messages = kafka_seed_topic_cloud
|
|
179
|
-
|
|
180
|
-
connection_config = CloudKafkaConnectionConfig(
|
|
181
|
-
bootstrap_server=os.environ["KAFKA_BOOTSTRAP_SERVER"],
|
|
182
|
-
port=9092,
|
|
183
|
-
access_config=CloudKafkaAccessConfig(
|
|
184
|
-
kafka_api_key=os.environ["KAFKA_API_KEY"],
|
|
185
|
-
secret=os.environ["KAFKA_SECRET"],
|
|
186
|
-
),
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
with tempfile.TemporaryDirectory() as tempdir:
|
|
190
|
-
tempdir_path = Path(tempdir)
|
|
191
|
-
download_config = CloudKafkaDownloaderConfig(download_dir=tempdir_path)
|
|
192
|
-
indexer = CloudKafkaIndexer(
|
|
193
|
-
connection_config=connection_config,
|
|
194
|
-
index_config=CloudKafkaIndexerConfig(
|
|
195
|
-
topic=TOPIC,
|
|
196
|
-
num_messages_to_consume=expected_messages,
|
|
197
|
-
),
|
|
198
|
-
)
|
|
199
|
-
downloader = CloudKafkaDownloader(
|
|
200
|
-
connection_config=connection_config, download_config=download_config
|
|
201
|
-
)
|
|
202
|
-
indexer.precheck()
|
|
203
|
-
await source_connector_validation(
|
|
204
|
-
indexer=indexer,
|
|
205
|
-
downloader=downloader,
|
|
206
|
-
configs=SourceValidationConfigs(
|
|
207
|
-
test_id="kafka-cloud",
|
|
208
|
-
exclude_fields_extend=["connector_type"],
|
|
209
|
-
expected_num_files=expected_messages,
|
|
210
|
-
validate_downloaded_files=True,
|
|
211
|
-
validate_file_data=True,
|
|
212
|
-
),
|
|
213
|
-
)
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
217
|
-
def test_kafka_source_local_precheck_fail_no_cluster():
|
|
218
|
-
connection_config = LocalKafkaConnectionConfig(bootstrap_server="localhost", port=29092)
|
|
219
|
-
indexer = LocalKafkaIndexer(
|
|
220
|
-
connection_config=connection_config,
|
|
221
|
-
index_config=LocalKafkaIndexerConfig(topic=TOPIC, num_messages_to_consume=5),
|
|
222
|
-
)
|
|
223
|
-
with pytest.raises(SourceConnectionError):
|
|
224
|
-
indexer.precheck()
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
228
|
-
def test_kafka_source_local_precheck_fail_no_topic(kafka_seed_topic: str):
|
|
229
|
-
connection_config = LocalKafkaConnectionConfig(bootstrap_server="localhost", port=29092)
|
|
230
|
-
indexer = LocalKafkaIndexer(
|
|
231
|
-
connection_config=connection_config,
|
|
232
|
-
index_config=LocalKafkaIndexerConfig(topic="topic", num_messages_to_consume=5),
|
|
233
|
-
)
|
|
234
|
-
with pytest.raises(SourceConnectionError):
|
|
235
|
-
indexer.precheck()
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
def get_all_messages(topic: str, max_empty_messages: int = 5) -> list[dict]:
|
|
239
|
-
conf = {
|
|
240
|
-
"bootstrap.servers": "localhost:29092",
|
|
241
|
-
"group.id": "default_group_id",
|
|
242
|
-
"enable.auto.commit": "false",
|
|
243
|
-
"auto.offset.reset": "earliest",
|
|
244
|
-
}
|
|
245
|
-
consumer = Consumer(conf)
|
|
246
|
-
consumer.subscribe([topic])
|
|
247
|
-
messages = []
|
|
248
|
-
try:
|
|
249
|
-
empty_count = 0
|
|
250
|
-
while empty_count < max_empty_messages:
|
|
251
|
-
msg = consumer.poll(timeout=1)
|
|
252
|
-
if msg is None:
|
|
253
|
-
empty_count += 1
|
|
254
|
-
continue
|
|
255
|
-
if msg.error():
|
|
256
|
-
if msg.error().code() == KafkaError._PARTITION_EOF:
|
|
257
|
-
break
|
|
258
|
-
else:
|
|
259
|
-
raise KafkaException(msg.error())
|
|
260
|
-
try:
|
|
261
|
-
message = json.loads(msg.value().decode("utf8"))
|
|
262
|
-
messages.append(message)
|
|
263
|
-
finally:
|
|
264
|
-
consumer.commit(asynchronous=False)
|
|
265
|
-
finally:
|
|
266
|
-
print("closing consumer")
|
|
267
|
-
consumer.close()
|
|
268
|
-
return messages
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
@pytest.mark.asyncio
|
|
272
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
273
|
-
async def test_kafka_destination_local(upload_file: Path, kafka_upload_topic: str):
|
|
274
|
-
uploader = LocalKafkaUploader(
|
|
275
|
-
connection_config=LocalKafkaConnectionConfig(bootstrap_server="localhost", port=29092),
|
|
276
|
-
upload_config=LocalKafkaUploaderConfig(topic=TOPIC, batch_size=10),
|
|
277
|
-
)
|
|
278
|
-
file_data = FileData(
|
|
279
|
-
source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),
|
|
280
|
-
connector_type=CONNECTOR_TYPE,
|
|
281
|
-
identifier="mock file data",
|
|
282
|
-
)
|
|
283
|
-
uploader.precheck()
|
|
284
|
-
if uploader.is_async():
|
|
285
|
-
await uploader.run_async(path=upload_file, file_data=file_data)
|
|
286
|
-
else:
|
|
287
|
-
uploader.run(path=upload_file, file_data=file_data)
|
|
288
|
-
all_messages = get_all_messages(topic=kafka_upload_topic)
|
|
289
|
-
with upload_file.open("r") as upload_fs:
|
|
290
|
-
content_to_upload = json.load(upload_fs)
|
|
291
|
-
assert len(all_messages) == len(content_to_upload), (
|
|
292
|
-
f"expected number of messages ({len(content_to_upload)}) doesn't "
|
|
293
|
-
f"match how many messages read off of kakfa topic {kafka_upload_topic}: {len(all_messages)}"
|
|
294
|
-
)
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
298
|
-
def test_kafka_destination_local_precheck_fail_no_cluster():
|
|
299
|
-
uploader = LocalKafkaUploader(
|
|
300
|
-
connection_config=LocalKafkaConnectionConfig(bootstrap_server="localhost", port=29092),
|
|
301
|
-
upload_config=LocalKafkaUploaderConfig(topic=TOPIC, batch_size=10),
|
|
302
|
-
)
|
|
303
|
-
with pytest.raises(DestinationConnectionError):
|
|
304
|
-
uploader.precheck()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|