unstructured-ingest 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (53) hide show
  1. test/integration/connectors/{databricks_tests → databricks}/test_volumes_native.py +75 -19
  2. test/integration/connectors/sql/test_postgres.py +6 -2
  3. test/integration/connectors/sql/test_singlestore.py +6 -2
  4. test/integration/connectors/sql/test_snowflake.py +6 -2
  5. test/integration/connectors/sql/test_sqlite.py +6 -2
  6. test/integration/connectors/test_milvus.py +13 -0
  7. test/integration/connectors/test_onedrive.py +6 -0
  8. test/integration/connectors/test_redis.py +119 -0
  9. test/integration/connectors/test_vectara.py +270 -0
  10. test/integration/embedders/test_bedrock.py +28 -0
  11. test/integration/embedders/test_octoai.py +14 -0
  12. test/integration/embedders/test_openai.py +13 -0
  13. test/integration/embedders/test_togetherai.py +10 -0
  14. test/integration/partitioners/test_partitioner.py +2 -2
  15. test/unit/embed/test_octoai.py +8 -1
  16. unstructured_ingest/__version__.py +1 -1
  17. unstructured_ingest/embed/bedrock.py +39 -11
  18. unstructured_ingest/embed/interfaces.py +5 -0
  19. unstructured_ingest/embed/octoai.py +44 -3
  20. unstructured_ingest/embed/openai.py +37 -1
  21. unstructured_ingest/embed/togetherai.py +28 -1
  22. unstructured_ingest/embed/voyageai.py +33 -1
  23. unstructured_ingest/v2/errors.py +18 -0
  24. unstructured_ingest/v2/interfaces/file_data.py +11 -1
  25. unstructured_ingest/v2/processes/connectors/__init__.py +7 -0
  26. unstructured_ingest/v2/processes/connectors/astradb.py +2 -0
  27. unstructured_ingest/v2/processes/connectors/chroma.py +0 -1
  28. unstructured_ingest/v2/processes/connectors/couchbase.py +2 -0
  29. unstructured_ingest/v2/processes/connectors/databricks/volumes.py +5 -0
  30. unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +2 -2
  31. unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +2 -2
  32. unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +2 -2
  33. unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +2 -2
  34. unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +1 -1
  35. unstructured_ingest/v2/processes/connectors/kafka/cloud.py +5 -2
  36. unstructured_ingest/v2/processes/connectors/kafka/kafka.py +14 -3
  37. unstructured_ingest/v2/processes/connectors/milvus.py +15 -6
  38. unstructured_ingest/v2/processes/connectors/mongodb.py +3 -4
  39. unstructured_ingest/v2/processes/connectors/neo4j.py +2 -0
  40. unstructured_ingest/v2/processes/connectors/onedrive.py +79 -25
  41. unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +0 -1
  42. unstructured_ingest/v2/processes/connectors/redisdb.py +182 -0
  43. unstructured_ingest/v2/processes/connectors/sql/sql.py +5 -0
  44. unstructured_ingest/v2/processes/connectors/vectara.py +350 -0
  45. unstructured_ingest/v2/unstructured_api.py +25 -2
  46. {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/METADATA +20 -16
  47. {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/RECORD +52 -48
  48. test/integration/connectors/test_kafka.py +0 -304
  49. /test/integration/connectors/{databricks_tests → databricks}/__init__.py +0 -0
  50. {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/LICENSE.md +0 -0
  51. {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/WHEEL +0 -0
  52. {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/entry_points.txt +0 -0
  53. {unstructured_ingest-0.3.10.dist-info → unstructured_ingest-0.3.12.dist-info}/top_level.txt +0 -0
@@ -10,17 +10,18 @@ test/integration/connectors/test_azure_ai_search.py,sha256=EGV-G_Lq3h6pHhhmmQGWj
10
10
  test/integration/connectors/test_chroma.py,sha256=KQCzBJsOHAOtg0Ehp0tNtuYchFtiSmhHDKyOju33kJg,3686
11
11
  test/integration/connectors/test_confluence.py,sha256=adJxIggjuO-jgMimBZdv_AqWeBFlQoodELucIYwWC98,3546
12
12
  test/integration/connectors/test_delta_table.py,sha256=xsnJmwlWVQrccYeAtpt2lm0DYm2jGxiKXeERQXqCDCM,6884
13
- test/integration/connectors/test_kafka.py,sha256=FtHLptvS9V3Br7wCm2Xyh_ulz8_wWvCOKKEd0xD9LyM,10758
14
13
  test/integration/connectors/test_lancedb.py,sha256=U2HfIrf6iJ7lYMn-vz0j-LesVyDY-jc9QrQhlJVhG9Q,9183
15
- test/integration/connectors/test_milvus.py,sha256=abYQOjF8grEFj3FB1_wQgFSbWPFWfZ2pEsgKarfKJE4,6574
14
+ test/integration/connectors/test_milvus.py,sha256=aRT5SpJHY4NA8pG_LcVTJwYwvLw2W_OOE-NIfDq03SE,7015
16
15
  test/integration/connectors/test_mongodb.py,sha256=UZ4eo61MisCw4s0p7HWaediN7M-lSddMDs71RFgdmJs,12347
17
16
  test/integration/connectors/test_neo4j.py,sha256=Esiq_Z9k1JLrWNXPmLBsX3LLwyEozwKoxX7iwMEJjRM,8252
18
- test/integration/connectors/test_onedrive.py,sha256=KIkBwKh1hnv203VCL2UABnDkS_bP4NxOFm1AL8EPGLA,3554
17
+ test/integration/connectors/test_onedrive.py,sha256=Bp9Ayv59JnfsjSwqbQ-zYvg-XAPGgZfKJ45Asc0y1bM,3808
19
18
  test/integration/connectors/test_pinecone.py,sha256=suPFi40d6rHXurQQLIpCzW5XRTdgzlP-f-KLPhGCUHo,10208
20
19
  test/integration/connectors/test_qdrant.py,sha256=hyuqSJDaylkQVxWh7byD8jo8bwPuBxSa8MWRD3sBu-Y,7906
20
+ test/integration/connectors/test_redis.py,sha256=Q_KAZPNE9NIoRN2UsbXtc1fe_aJg66RbSQtS3OKNpc0,4327
21
21
  test/integration/connectors/test_s3.py,sha256=PJaAwFRF2lXMQlkbv9JHpngPc6706ML7zowOlXT3TcY,7033
22
- test/integration/connectors/databricks_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- test/integration/connectors/databricks_tests/test_volumes_native.py,sha256=9Ndo0Q8uzBGGOYYjmTV6EdZtaTRy97lHAXvJczTOJe4,5859
22
+ test/integration/connectors/test_vectara.py,sha256=_FQHbhxL3f1rLV9MrHOvcljm_4qTVf5xl-Q7MplE_xs,8688
23
+ test/integration/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
+ test/integration/connectors/databricks/test_volumes_native.py,sha256=ig60-nCdLF0GsgJowG9eRaG28iuoYHtuf12HdK6OE1I,7764
24
25
  test/integration/connectors/duckdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
26
  test/integration/connectors/duckdb/conftest.py,sha256=rlBHMJTiJ2a5xbvIxTOyhhcuTBc9DO-yTzD6Kf8X3hY,301
26
27
  test/integration/connectors/duckdb/test_duckdb.py,sha256=tZfHJYNILVqwT20XD-aJUFZ67TnJvHLpfAxNvNiE51o,2891
@@ -30,10 +31,10 @@ test/integration/connectors/elasticsearch/conftest.py,sha256=-i4_7MkIxSQENz7nuD2
30
31
  test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=Lm8topVssTsqcI8H2Tzohuxb9j-CFHv9orM6WfAqCZw,11933
31
32
  test/integration/connectors/elasticsearch/test_opensearch.py,sha256=fWpZrhzRiVpm9AOlZvgZRCjyXSYvWG7-8j06x-HR3PY,11311
32
33
  test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- test/integration/connectors/sql/test_postgres.py,sha256=BNoH1Ha3hdpxk--DqE5cg8zwnx14rN-1LzQ_YJ1U5us,6734
34
- test/integration/connectors/sql/test_singlestore.py,sha256=W4bNc7HoVrtfHfDTdQF8rcwKdX-AdIDHV-phJeViSmU,5917
35
- test/integration/connectors/sql/test_snowflake.py,sha256=-fzpaB1LlFOW2bZmJHQMbH6T0zU30dMypAHLRAbT4d0,7258
36
- test/integration/connectors/sql/test_sqlite.py,sha256=NjIcAvujSiuqjTaitjjyLEsS3oazPyQzTnzCZ6QjIkM,5720
34
+ test/integration/connectors/sql/test_postgres.py,sha256=DXyHMZBQgrV2HyVflkoBpT1mewSnvw3ugoHtGR5o8OM,6876
35
+ test/integration/connectors/sql/test_singlestore.py,sha256=pzCPo8IW3c9VH-f3UdJS5MjPjkHarJPSepAxV0ZVajo,6059
36
+ test/integration/connectors/sql/test_snowflake.py,sha256=MiTzepeeJlv147CyzCGyd16MRk5QeUw4g4L3TTi5gVY,7400
37
+ test/integration/connectors/sql/test_sqlite.py,sha256=rSkjv3KpslAvt_8LQecJUT0lOLtuZSvhtlW2deJovLI,5862
37
38
  test/integration/connectors/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
39
  test/integration/connectors/utils/constants.py,sha256=0zSPnsZVqJuNhXduXvdXFQLZTRIQa5Fo_1qjBYVCfb8,209
39
40
  test/integration/connectors/utils/docker.py,sha256=8uOTJ3AVG1dxK4OiLvOLfRxL_TsYQX2KKCID9TZ7-Ac,4995
@@ -50,17 +51,17 @@ test/integration/connectors/weaviate/test_local.py,sha256=SK6iEwQUKiCd0X99BEk8Gl
50
51
  test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
52
  test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
52
53
  test/integration/embedders/test_azure_openai.py,sha256=6tFpKFBFRXD49imhhRzsvy3MPtuZ4L1PtnKyMVBRAqc,1808
53
- test/integration/embedders/test_bedrock.py,sha256=0oBRNS_DtFDGQ22Z1T3t6VOJ31PrItgvnJpqcLe9Fg4,1903
54
+ test/integration/embedders/test_bedrock.py,sha256=ZyS17PLaHOeh-ykrd71Jkgg_ext7aOadxvxVJ_4IvFE,2852
54
55
  test/integration/embedders/test_huggingface.py,sha256=0mMTOO-Nh7KB70AGs_7LLQIxMYrnSPqyihriUeqACbM,1007
55
56
  test/integration/embedders/test_mixedbread.py,sha256=RrLv8SByMNXsgrlh94RbaT-VyxZ4-DILO-OPpmOwvSI,1441
56
- test/integration/embedders/test_octoai.py,sha256=LnR0BLttamW5PGid6jFxATDAi0x7hq5iWMXurbHP6TI,1328
57
- test/integration/embedders/test_openai.py,sha256=0jlFqEeeCneIWX9tGyC3TXeUNqsMXR7u5n7uEIaAQKo,1328
58
- test/integration/embedders/test_togetherai.py,sha256=0W1ScD5yb1D9hPC2ewUsuCHLUOpCuM083YMBhqAI9fw,1395
57
+ test/integration/embedders/test_octoai.py,sha256=oQYpYh2XaKhiqtnOSpH0rP9TQrzykZ1-3C3jZRurPu8,1734
58
+ test/integration/embedders/test_openai.py,sha256=s4_XGQfVpsTb4hKh2QZkXdOG_MnF5OQgL98kzNjTFCg,1664
59
+ test/integration/embedders/test_togetherai.py,sha256=3otyr6i9smJMyXbhKCcaC2gx813rqGaZTKi2sEM7GIQ,1707
59
60
  test/integration/embedders/test_vertexai.py,sha256=OtoFzmrWWhGIO5Bbl5zt_4sp6qRHZxtaDQKpGcfzNLM,1345
60
61
  test/integration/embedders/test_voyageai.py,sha256=Zqf7nn1AxfBDBr5A9Jr-5pxes4QNvfKiyeGexCCm4nY,1346
61
62
  test/integration/embedders/utils.py,sha256=3AMKMBpgBep_0jFqrqMHH8BJo6w60kpouSZ5JPJTwIA,1850
62
63
  test/integration/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
- test/integration/partitioners/test_partitioner.py,sha256=KEpnhsz2YNAoQ2UZGOTsi1_uk1h4Vg-gGTsy5Fe9OCw,2846
64
+ test/integration/partitioners/test_partitioner.py,sha256=MEQJbRoc01uPLT6O8CkXeQF_DXK21nz3KVJkzkBtsgM,2835
64
65
  test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
66
  test/unit/test_chunking_utils.py,sha256=0iPwfnMPpyTm-yOE0BXMnEQQP4iguS6NhOqgMQU5nhk,1390
66
67
  test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
@@ -69,7 +70,7 @@ test/unit/test_logger.py,sha256=0SKndXE_VRd8XmUHkrj7zuBQHZscXx3ZQllMEOvtF9Y,2380
69
70
  test/unit/test_utils.py,sha256=Q6mp9YZPah8z3-2lreyRbmAc7m2Y_w26_N9vocSInoA,5421
70
71
  test/unit/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
72
  test/unit/embed/test_mixedbreadai.py,sha256=XFNJDP5pIgF3eQYwBiuEWmH3zZWx72Wpwyv-Q4m0DJg,1332
72
- test/unit/embed/test_octoai.py,sha256=Ha9EgAW64Q45hFj51tToe8RyKXWXwqAkdDqSFDMu37Q,831
73
+ test/unit/embed/test_octoai.py,sha256=pouR4J6B_mrlu4TsA5yr2Ln_LCYL2pGBojXY5KEqvKI,1053
73
74
  test/unit/embed/test_openai.py,sha256=0O1yshDcE0BMKv1yJqrNuiNLSdPhLpKqJ-D_wmnidsM,831
74
75
  test/unit/embed/test_vertexai.py,sha256=Pl7COc9E3tf_yGidkTEmTizNGyZF1F5zuL2TgPTMnfI,1048
75
76
  test/unit/embed/test_voyageai.py,sha256=DviCOJFhe5H4e26-kNyX3JNe8h3qB5Yl0KOe8rQEMrc,981
@@ -94,7 +95,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
94
95
  test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
96
  test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
96
97
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
97
- unstructured_ingest/__version__.py,sha256=agVWTd6LRRiz8p3CAvwS6-Qb_jT60tZHXAK83ewMwL4,43
98
+ unstructured_ingest/__version__.py,sha256=R522TM0FvpKddIRo55tqz-j1ENS8k4uXjk60bKhQ50M,43
98
99
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
99
100
  unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
100
101
  unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -263,15 +264,15 @@ unstructured_ingest/connector/notion/types/database_properties/url.py,sha256=iXQ
263
264
  unstructured_ingest/connector/notion/types/database_properties/verification.py,sha256=J_DLjY-v2T6xDGMQ7FkI0YMKMA6SG6Y3yYW7qUD1hKA,2334
264
265
  unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
265
266
  unstructured_ingest/embed/azure_openai.py,sha256=4YBOIxv66wVZ5EqNNC4uCDPNJ3VrsLPe5wwagT6zqe0,1001
266
- unstructured_ingest/embed/bedrock.py,sha256=-PRdZsF44vwi6G4G75gdO31AJKfZWClOXkJQAk7rEO8,3096
267
+ unstructured_ingest/embed/bedrock.py,sha256=qb1eo1Uooz2JxhHcqvJDTYGbuwaqyRTD0ZepQzlL9_o,4455
267
268
  unstructured_ingest/embed/huggingface.py,sha256=2cBiQhOhfWHX3hS-eKjocysOkUaRlyRfUj9Kxjrp6cE,1934
268
- unstructured_ingest/embed/interfaces.py,sha256=au4Xp8ciDvo4bidlUbazFW2aC7NZW5-UDLKXBFVzAX4,2025
269
+ unstructured_ingest/embed/interfaces.py,sha256=XsPtb53367KCkH-ItwWQ_EQ-sYWHaekhxkF4PwHCNXc,2210
269
270
  unstructured_ingest/embed/mixedbreadai.py,sha256=OwFWWukvkQaXhjgs6b6N6D4w7sYrtcHNhsHAj-Bocj4,4268
270
- unstructured_ingest/embed/octoai.py,sha256=jHytDfQgup0v1PBcmlMv1nIh9Obg8WGO5qtLmN-Ot5g,1473
271
- unstructured_ingest/embed/openai.py,sha256=JXo4boivNoo2lBzHuS4Z0FZ1zlgUGAPVt0X3HY540ZU,1282
272
- unstructured_ingest/embed/togetherai.py,sha256=BL7NzExSE-laQqrp4ybUgoZ9JG_eop4hk-s2yCO_d5c,1451
271
+ unstructured_ingest/embed/octoai.py,sha256=0LVZlbOMUuxwZV0QHhGWUlneWDX3fCklPRTuc4huze0,3007
272
+ unstructured_ingest/embed/openai.py,sha256=5M2idJ7Ynx_3-FXwm9mTjGnNiww0DSuZmbuvi2YAUqk,2543
273
+ unstructured_ingest/embed/togetherai.py,sha256=2jXYFB9QTDUlSKc_j32bMrwKu7YQA0oF893rGSmlXr8,2374
273
274
  unstructured_ingest/embed/vertexai.py,sha256=X5bGJdXyR5nAFH_ocAVgEowmd60nOBykyfclYo3VfBM,2808
274
- unstructured_ingest/embed/voyageai.py,sha256=bjom9QqWmH1Mv08ewg8ZG7gO3rQPMVS0_ztm2KBAOjI,1821
275
+ unstructured_ingest/embed/voyageai.py,sha256=BfYa-oedkq-56j5_0rDjOLy18b9zC0zagaoPHJry5xA,2958
275
276
  unstructured_ingest/enhanced_dataclass/__init__.py,sha256=gDZOUsv5eo-8jm4Yu7DdDwi101aGbfG7JctTdOYnTOM,151
276
277
  unstructured_ingest/enhanced_dataclass/core.py,sha256=d6aUkDynuKX87cHx9_N5UDUWrvISR4jYRFRTvd_avlI,3038
277
278
  unstructured_ingest/enhanced_dataclass/dataclasses.py,sha256=aZMsoCzAGRb8Rmh3BTSBFtNr6FmFTY93KYGLk3gYJKQ,1949
@@ -360,10 +361,11 @@ unstructured_ingest/utils/string_and_date_utils.py,sha256=kijtPlGAbH376vVjFSo5H_
360
361
  unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
361
362
  unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
362
363
  unstructured_ingest/v2/constants.py,sha256=pDspTYz-nEojHBqrZNfssGEiujmVa02pIWL63PQP9sU,103
364
+ unstructured_ingest/v2/errors.py,sha256=y1tGvobuhQdcR9vw5APuFigiQSfsQKrAYGDr4biGDdw,207
363
365
  unstructured_ingest/v2/logger.py,sha256=wcln4s5Nyp2fjjJux9iM3d6t9aQFNJ2H1IAZXmIknjI,4323
364
366
  unstructured_ingest/v2/main.py,sha256=WFdLEqEXRy6E9_G-dF20MK2AtgX51Aan1sp_N67U2B8,172
365
367
  unstructured_ingest/v2/otel.py,sha256=2fGj1c7cVcC3J8NwL6MNYhyPEAXiB33DsilvRDkrdLo,4130
366
- unstructured_ingest/v2/unstructured_api.py,sha256=f_6NK0QOVwjAFJvlyvzu0IaXb6QQgRNJleYxB1KvzKE,3856
368
+ unstructured_ingest/v2/unstructured_api.py,sha256=g6AO2Vy0lpy6-ooOvdgfJvIRhearPKArp3ggIdApG8I,4514
367
369
  unstructured_ingest/v2/utils.py,sha256=HHli5rHDBm6flUeQ_ovVDvtOdnzzL4FvNyw6jsHIJfw,2041
368
370
  unstructured_ingest/v2/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
369
371
  unstructured_ingest/v2/cli/cli.py,sha256=qHXIs-PcvMgDZhP1AR9iDMxh8FXBMJCEDksPBfiMULE,648
@@ -379,7 +381,7 @@ unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=7eEIkk1KU51-ZNiIfI1K
379
381
  unstructured_ingest/v2/interfaces/__init__.py,sha256=9VO09XuTvyOcFF8ZDKN169fNb_uA5TAYzPsiPHOyxhQ,963
380
382
  unstructured_ingest/v2/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
381
383
  unstructured_ingest/v2/interfaces/downloader.py,sha256=Lj3nTY1hPA71GfNeedFVCdHdZsHLle8qrx5RtXAy9GY,2940
382
- unstructured_ingest/v2/interfaces/file_data.py,sha256=weeCEp14QGEGjCBtJBByrAcAqA87Jx62qw3Ed7x-crU,3550
384
+ unstructured_ingest/v2/interfaces/file_data.py,sha256=7MyRlj5dijQsCR6W18wQ8fEgJigGKwoOYc10g9A6PSo,3834
383
385
  unstructured_ingest/v2/interfaces/indexer.py,sha256=gsa1MLhFa82BzD2h4Yb7ons0VxRwKINZOrzvHAahwVU,846
384
386
  unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
385
387
  unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
@@ -406,40 +408,42 @@ unstructured_ingest/v2/processes/embedder.py,sha256=xCBpaL07WnVUOUW8SHktaf1vwBGZ
406
408
  unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
407
409
  unstructured_ingest/v2/processes/partitioner.py,sha256=agpHwB9FR8OZVQqE7zFEb0IcDPCOPA_BZjLzLF71nOY,8194
408
410
  unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
409
- unstructured_ingest/v2/processes/connectors/__init__.py,sha256=CTWLEmaKLTjbqeUQGI0fxJobsqDOc1d2ZKJoXh98Lww,5432
411
+ unstructured_ingest/v2/processes/connectors/__init__.py,sha256=bmogp1sPbRS-RndN0R8V8gY4uaTkpmNJv-035-Y5SGU,5835
410
412
  unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XEFCdcGeJCguJU5WN2Mv9kLp5dVQ,8917
411
- unstructured_ingest/v2/processes/connectors/astradb.py,sha256=Y4rgEZZRMOLCVeC5jHbPWxnVtl1A-fPeU8BEibMZIlQ,14696
413
+ unstructured_ingest/v2/processes/connectors/astradb.py,sha256=xhUMoUdnrfAY1isZGqsV4lZUsnZNpbvgLyQWQbR4hVo,14814
412
414
  unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6m5sxIlB6u5ebQpqCS_SJ-_amCC1KQ03EQ,11529
413
- unstructured_ingest/v2/processes/connectors/chroma.py,sha256=G1DQHhhFQCS2RLF0cVvoUH9QO8KkVjIyNZ9nKh__aHw,7220
415
+ unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
414
416
  unstructured_ingest/v2/processes/connectors/confluence.py,sha256=-Y1OU_ZXhZQNj5NH3EN01CP8QKKZJaJ9xkXoAlSgnIk,7604
415
- unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=5Da-JWHXHcVcmJ_rqj4hZwURh_z7sJRcYpyx83Bc7zM,12132
417
+ unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=i7vuNKsUkN93JRVmg4--MO0ZgbjvhIqt46oYqk9zFSQ,12250
416
418
  unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=SotSXZQ85_6TO906YvFi3yTml8jE9A_zV6nBJ4oTx8A,7075
417
419
  unstructured_ingest/v2/processes/connectors/gitlab.py,sha256=ufE65Z8q_tC4oppGg5BsGXwSaL7RbEXcaagJQYsylNo,9984
418
420
  unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=5k7pdAzJGXSdyPCzW9vu2OaAjGVTo2JevDyGaXM1Hvk,13370
419
421
  unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=VRDAiou_7oWOIAgQTdOGQWxudzQEDopXM8XkfkQ2j6g,5004
420
422
  unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWoaQyp9zp0WVqAywMaHJ2kcAc,7153
421
- unstructured_ingest/v2/processes/connectors/milvus.py,sha256=I57hyH5nz_p7utmUOkvt_6vCPxNIVQMoukplUgIyYi8,8503
422
- unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=yzcsHTJXVyuASeLr56OSratDQRiw5v68JSHPYpMEzzY,14309
423
- unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=lRtWe6xWYogT-y_r_o7HWvlFMf_OIPGQq_Z-5v7IOq0,14163
424
- unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=H8qk53YJXAPrPyISze0dybZdDFv5B7dVO3fIr10dVU8,15982
423
+ unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
424
+ unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
425
+ unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=QTw_Kq1_kHMdqsaBST6yW8vl-SYXVQFlIofDP1W_IuI,14250
426
+ unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=d6gC40YmfqBNXxizAt4MO4OOu5BoCZ7SAe1AbNwTP0E,18322
425
427
  unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
426
428
  unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=cohF7gBj0opSGKXlENSdGfTtyIKMHd1pwu4ydeb7JAY,10605
429
+ unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bWzpScvVbLTVlI3DzsCNUKiBI5M,6757
427
430
  unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
428
431
  unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=Ndn2Wm7RupfjAtlLxxQwJueeE0V8aGMbNVPuFq9nqdQ,19730
429
432
  unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
430
433
  unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
434
+ unstructured_ingest/v2/processes/connectors/vectara.py,sha256=BlI_4nkpNR99aYxDd9eusm5LQsVB9EI0r-5Kc1D7pgQ,12255
431
435
  unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=jO71UTC7bLA_N12CrLWJzh_yZML5gfT7VohxzCpUGWg,1848
432
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=p7sjCYZb7JmY3v3Xy1gm-q0O7oamLTsSFf2EWXYfXYQ,6447
433
- unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=I1MJwe5LOxoPLjwo00H0XbXO6u_SJHWYgsj4s6ePoyI,2754
434
- unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=P4rfcE3td7WyuuguRgUnGQytCMDpfeYrrpshBZuVynY,3539
435
- unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=UUotY_-HpgSEJkvdQfZTlbxY7CRLZ4ctL8TlryeFvxk,2790
436
- unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=Wk7s2_u5G0BOV5slvGc8IlUf7ivznY9PrgPqe6nlJKM,2897
436
+ unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=kI_ThB5e-DS8-GiQP5TQ8cP3fiGRm-V2AuNlGoSjH6I,6613
437
+ unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=TA2e_1SIr4VaEI62873eyReCNfgmQ51_2Pko2I04pPM,2747
438
+ unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=cb-EUW0T-linZMkbU6AcKEGWnFHQvhpO5Abtps4P2X0,3532
439
+ unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=tR8NubkyHw49IpW_42g6w1Koxlm56EPiPf1lB-eoRSI,2783
440
+ unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=dJLD1fueXf8_0AfC4cg0G7siJZVefz68iuEx2Kq7rMs,2890
437
441
  unstructured_ingest/v2/processes/connectors/duckdb/__init__.py,sha256=5sVvJCWhU-YkjHIwk4W6BZCanFYK5W4xTpWtQ8xzeB4,561
438
442
  unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=FVblIddorGCh9D9GZ8zLVUm8n39PJA5JLoJeWd-tSy8,2610
439
443
  unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=oUHHaLpO2pWW2Lu4Mc-XFjrA0ze97205WQ_xP95ua4M,4296
440
444
  unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=mU5x6SnbFgRsVicNGh4y4gtR6ek7eQFinI0dQQmzMds,4481
441
445
  unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py,sha256=Zzc0JNPP-eFqpwWw1Gp-XC8H-s__IgkYKzoagECycZY,829
442
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=Ws6NoHvbXkxizbcvr1bKGkMMB7l_e-sfkQQGU6u7e0Y,18818
446
+ unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=MEKU64OsiQmbLPb3ken-WWCIV6-pnFbs_6kjJweG-SY,18813
443
447
  unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=qRz8Fyr2RSZIPZGkhPeme6AZxM0aX-c_xOa1ZtSr2Kg,6781
444
448
  unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
445
449
  unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=NWsouhaxeyxrS_WlZJ70X2YIdioFH5LSaRLhnCPYAH0,6034
@@ -451,8 +455,8 @@ unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=l3TRKPEb0AJ7e0VS
451
455
  unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=JsxXKXNI20mdgwR_A6Rnf4u8fsFwLe3AkJmIe_3NEKY,6150
452
456
  unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
453
457
  unstructured_ingest/v2/processes/connectors/kafka/__init__.py,sha256=mQJ9Ex-QCfhz-BB5YWTfbPf7xGLd1i7FpjRr0ukbhNw,754
454
- unstructured_ingest/v2/processes/connectors/kafka/cloud.py,sha256=1SqNdY8Q8JwwB57wk9efxKv_BCeUkxZJ2HJ526wuCMw,3294
455
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py,sha256=a-LWqYeJAK-g32UPgvvDt6W7dJp85N66aR_EKSR66RU,9685
458
+ unstructured_ingest/v2/processes/connectors/kafka/cloud.py,sha256=GdAeQ8Uz-6v1C5byBHtjfevVfbzW3obScBFFLRTb0ps,3441
459
+ unstructured_ingest/v2/processes/connectors/kafka/kafka.py,sha256=UfS41jzV9VxekS6AwWHhURJmJ7RUAw5iiIrj75BWrXQ,10255
456
460
  unstructured_ingest/v2/processes/connectors/kafka/local.py,sha256=lUkmfbTxyQW87CXxbJaijIT6foV09Gi-IG9o08OgiEs,2581
457
461
  unstructured_ingest/v2/processes/connectors/lancedb/__init__.py,sha256=LW37xZrn48JeHluRNulLTreUPdaF-ZU81F7MCUHcCv8,1253
458
462
  unstructured_ingest/v2/processes/connectors/lancedb/aws.py,sha256=eeXWsh8UeVm1Ur53C4MEnpLplfO8U91KYgk--0kk5pE,1413
@@ -464,22 +468,22 @@ unstructured_ingest/v2/processes/connectors/lancedb/local.py,sha256=_7-6iO6B60gA
464
468
  unstructured_ingest/v2/processes/connectors/qdrant/__init__.py,sha256=xM19uYzAuGizVoZIM_hnVZ5AcBN69aOBGpqZcpWPtuE,760
465
469
  unstructured_ingest/v2/processes/connectors/qdrant/cloud.py,sha256=accJ4sNWBVWV-KiVBDBDBYYx5A9CUoikP5NCErRmfik,1624
466
470
  unstructured_ingest/v2/processes/connectors/qdrant/local.py,sha256=cGEyv3Oy6y4BQ4DU8yhJWMpL82QYwBVdPTxxNuV127U,1588
467
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py,sha256=ITRYXKYEFhlagSe-AKKGRvC8jzyWmhQLfHbFb0ax8o8,5438
471
+ unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py,sha256=BHI7HYSdbS05j2vrjyDvLzVG1WfsM8osKeq-lttlybQ,5437
468
472
  unstructured_ingest/v2/processes/connectors/qdrant/server.py,sha256=odvCZWZp8DmRxLXMR7tHhW-c7UQbix1_zpFdfXfCvKI,1613
469
473
  unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=E16CXRBw8fZKTuXIECns5wif_I07oncBHskVxHC4p7w,1448
470
474
  unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=BATfX1PQGT2kl8jAbdNKXTojYKJxh3pJV9-h3OBnHGo,5124
471
475
  unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=-2E9dsdNhjAiuzeSBytBbAhljOhvQ8kN8wvlUESvLo8,5465
472
476
  unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=TApGi5G0W0TktJFmo4QWDR3X3R-MUQTKbIxjAX_M8ZI,7402
473
- unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=LlhdBL_yPsPwNzDPwAblUDLBOsvorD6bB3OUrxPk_PQ,15112
477
+ unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=HC-qnhdpuyScKoGh50pPjkQLGSac_mOAnuB2FZwSVl0,15265
474
478
  unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=Q5RAqn5Ccw-pbeKZLkiMn5IVw6EemCMukXzLlS7pDhc,5162
475
479
  unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWan69KnzVELvaqX34tMhCytIa-C8EDsXVKsEo,856
476
480
  unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
477
481
  unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
478
482
  unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
479
483
  unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=X1yv1H_orDQ-J965EMXhR2XaURqe8vovSi9n1fk85B4,10499
480
- unstructured_ingest-0.3.10.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
481
- unstructured_ingest-0.3.10.dist-info/METADATA,sha256=emdZRN3DNrVAwB6fUYQonLRpymZzh4ktHT73x2_vdxg,7623
482
- unstructured_ingest-0.3.10.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
483
- unstructured_ingest-0.3.10.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
484
- unstructured_ingest-0.3.10.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
485
- unstructured_ingest-0.3.10.dist-info/RECORD,,
484
+ unstructured_ingest-0.3.12.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
485
+ unstructured_ingest-0.3.12.dist-info/METADATA,sha256=nNPregI5d4D8fHqXxTPkKmn7bqmfUX5RB-AcMsgj0J4,7769
486
+ unstructured_ingest-0.3.12.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
487
+ unstructured_ingest-0.3.12.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
488
+ unstructured_ingest-0.3.12.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
489
+ unstructured_ingest-0.3.12.dist-info/RECORD,,
@@ -1,304 +0,0 @@
1
- import json
2
- import os
3
- import tempfile
4
- import time
5
- from pathlib import Path
6
-
7
- import pytest
8
- from confluent_kafka import Consumer, KafkaError, KafkaException, Producer
9
- from confluent_kafka.admin import AdminClient, NewTopic
10
-
11
- from test.integration.connectors.utils.constants import (
12
- DESTINATION_TAG,
13
- SOURCE_TAG,
14
- env_setup_path,
15
- )
16
- from test.integration.connectors.utils.docker_compose import docker_compose_context
17
- from test.integration.connectors.utils.validation.source import (
18
- SourceValidationConfigs,
19
- source_connector_validation,
20
- )
21
- from test.integration.utils import requires_env
22
- from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
23
- from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
24
- from unstructured_ingest.v2.processes.connectors.kafka.cloud import (
25
- CloudKafkaAccessConfig,
26
- CloudKafkaConnectionConfig,
27
- CloudKafkaDownloader,
28
- CloudKafkaDownloaderConfig,
29
- CloudKafkaIndexer,
30
- CloudKafkaIndexerConfig,
31
- )
32
- from unstructured_ingest.v2.processes.connectors.kafka.local import (
33
- CONNECTOR_TYPE,
34
- LocalKafkaConnectionConfig,
35
- LocalKafkaDownloader,
36
- LocalKafkaDownloaderConfig,
37
- LocalKafkaIndexer,
38
- LocalKafkaIndexerConfig,
39
- LocalKafkaUploader,
40
- LocalKafkaUploaderConfig,
41
- )
42
-
43
- SEED_MESSAGES = 10
44
- TOPIC = "fake-topic"
45
-
46
-
47
- def get_admin_client() -> AdminClient:
48
- conf = {
49
- "bootstrap.servers": "localhost:29092",
50
- }
51
- return AdminClient(conf)
52
-
53
-
54
- @pytest.fixture
55
- def docker_compose_ctx():
56
- with docker_compose_context(docker_compose_path=env_setup_path / "kafka") as ctx:
57
- yield ctx
58
-
59
-
60
- def wait_for_topic(
61
- topic: str,
62
- retries: int = 10,
63
- interval: int = 1,
64
- exists: bool = True,
65
- admin_client=None,
66
- ):
67
- if admin_client is None:
68
- admin_client = get_admin_client()
69
- current_topics = admin_client.list_topics().topics
70
- attempts = 0
71
- while (topic not in current_topics) == exists and attempts < retries:
72
- attempts += 1
73
- print(
74
- "Attempt {}: Waiting for topic {} to {} exist. Current topics: [{}]".format(
75
- attempts, topic, "" if exists else "not", ", ".join(current_topics)
76
- )
77
- )
78
- time.sleep(interval)
79
- current_topics = admin_client.list_topics().topics
80
- if (topic not in current_topics) == exists:
81
- raise TimeoutError(f"Timeout out waiting for topic {topic} to exist")
82
-
83
-
84
- @pytest.fixture
85
- def kafka_seed_topic(docker_compose_ctx) -> str:
86
- conf = {
87
- "bootstrap.servers": "localhost:29092",
88
- }
89
- producer = Producer(conf)
90
- for i in range(SEED_MESSAGES):
91
- message = f"This is some text for message {i}"
92
- producer.produce(topic=TOPIC, value=message)
93
- producer.flush(timeout=10)
94
- print(f"kafka topic {TOPIC} seeded with {SEED_MESSAGES} messages")
95
- wait_for_topic(topic=TOPIC)
96
- return TOPIC
97
-
98
-
99
- @pytest.fixture
100
- def kafka_upload_topic(docker_compose_ctx) -> str:
101
- admin_client = get_admin_client()
102
- admin_client.create_topics([NewTopic(TOPIC, 1, 1)])
103
- return TOPIC
104
-
105
-
106
- @pytest.mark.asyncio
107
- @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
108
- async def test_kafka_source_local(kafka_seed_topic: str):
109
- connection_config = LocalKafkaConnectionConfig(bootstrap_server="localhost", port=29092)
110
- with tempfile.TemporaryDirectory() as tempdir:
111
- tempdir_path = Path(tempdir)
112
- download_config = LocalKafkaDownloaderConfig(download_dir=tempdir_path)
113
- indexer = LocalKafkaIndexer(
114
- connection_config=connection_config,
115
- index_config=LocalKafkaIndexerConfig(topic=kafka_seed_topic, num_messages_to_consume=5),
116
- )
117
- downloader = LocalKafkaDownloader(
118
- connection_config=connection_config, download_config=download_config
119
- )
120
- indexer.precheck()
121
- await source_connector_validation(
122
- indexer=indexer,
123
- downloader=downloader,
124
- configs=SourceValidationConfigs(
125
- test_id="kafka-local", expected_num_files=5, validate_downloaded_files=True
126
- ),
127
- )
128
-
129
-
130
- @pytest.fixture
131
- def kafka_seed_topic_cloud(expected_messages: int = 5) -> int:
132
- conf = {
133
- "bootstrap.servers": os.environ["KAFKA_BOOTSTRAP_SERVER"],
134
- "sasl.username": os.environ["KAFKA_API_KEY"],
135
- "sasl.password": os.environ["KAFKA_SECRET"],
136
- "sasl.mechanism": "PLAIN",
137
- "security.protocol": "SASL_SSL",
138
- }
139
- admin_client = AdminClient(conf)
140
- try:
141
- res = admin_client.delete_topics([TOPIC], operation_timeout=10)
142
- for topic, f in res.items():
143
- f.result()
144
- print(f"Topic {topic} removed")
145
- wait_for_topic(TOPIC, 5, 1, False, admin_client)
146
- except Exception:
147
- pass
148
-
149
- cluster_meta = admin_client.list_topics()
150
- current_topics = [topic for topic in cluster_meta.topics if topic != "__consumer_offsets"]
151
-
152
- assert TOPIC not in current_topics, f"Topic {TOPIC} shouldn't exist"
153
-
154
- # Kafka Cloud allows to use replication_factor=1 only for Dedicated clusters.
155
- topic_obj = NewTopic(TOPIC, num_partitions=1, replication_factor=3)
156
-
157
- res = admin_client.create_topics([topic_obj], operation_timeout=10, validate_only=False)
158
- for topic, f in res.items():
159
- f.result()
160
-
161
- producer = Producer(conf)
162
- for i in range(expected_messages):
163
- message = f"This is some text for message {i}"
164
- producer.produce(topic=TOPIC, value=message)
165
- producer.flush(timeout=10)
166
- return expected_messages
167
-
168
-
169
- @pytest.mark.asyncio
170
- @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
171
- @requires_env("KAFKA_API_KEY", "KAFKA_SECRET", "KAFKA_BOOTSTRAP_SERVER")
172
- async def test_kafka_source_cloud(kafka_seed_topic_cloud: int):
173
- """
174
- In order to have this test succeed, you need to create cluster on Confluent Cloud,
175
- and create the API key with admin privileges. By default, user account keys have it.
176
- """
177
-
178
- expected_messages = kafka_seed_topic_cloud
179
-
180
- connection_config = CloudKafkaConnectionConfig(
181
- bootstrap_server=os.environ["KAFKA_BOOTSTRAP_SERVER"],
182
- port=9092,
183
- access_config=CloudKafkaAccessConfig(
184
- kafka_api_key=os.environ["KAFKA_API_KEY"],
185
- secret=os.environ["KAFKA_SECRET"],
186
- ),
187
- )
188
-
189
- with tempfile.TemporaryDirectory() as tempdir:
190
- tempdir_path = Path(tempdir)
191
- download_config = CloudKafkaDownloaderConfig(download_dir=tempdir_path)
192
- indexer = CloudKafkaIndexer(
193
- connection_config=connection_config,
194
- index_config=CloudKafkaIndexerConfig(
195
- topic=TOPIC,
196
- num_messages_to_consume=expected_messages,
197
- ),
198
- )
199
- downloader = CloudKafkaDownloader(
200
- connection_config=connection_config, download_config=download_config
201
- )
202
- indexer.precheck()
203
- await source_connector_validation(
204
- indexer=indexer,
205
- downloader=downloader,
206
- configs=SourceValidationConfigs(
207
- test_id="kafka-cloud",
208
- exclude_fields_extend=["connector_type"],
209
- expected_num_files=expected_messages,
210
- validate_downloaded_files=True,
211
- validate_file_data=True,
212
- ),
213
- )
214
-
215
-
216
- @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
217
- def test_kafka_source_local_precheck_fail_no_cluster():
218
- connection_config = LocalKafkaConnectionConfig(bootstrap_server="localhost", port=29092)
219
- indexer = LocalKafkaIndexer(
220
- connection_config=connection_config,
221
- index_config=LocalKafkaIndexerConfig(topic=TOPIC, num_messages_to_consume=5),
222
- )
223
- with pytest.raises(SourceConnectionError):
224
- indexer.precheck()
225
-
226
-
227
- @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
228
- def test_kafka_source_local_precheck_fail_no_topic(kafka_seed_topic: str):
229
- connection_config = LocalKafkaConnectionConfig(bootstrap_server="localhost", port=29092)
230
- indexer = LocalKafkaIndexer(
231
- connection_config=connection_config,
232
- index_config=LocalKafkaIndexerConfig(topic="topic", num_messages_to_consume=5),
233
- )
234
- with pytest.raises(SourceConnectionError):
235
- indexer.precheck()
236
-
237
-
238
- def get_all_messages(topic: str, max_empty_messages: int = 5) -> list[dict]:
239
- conf = {
240
- "bootstrap.servers": "localhost:29092",
241
- "group.id": "default_group_id",
242
- "enable.auto.commit": "false",
243
- "auto.offset.reset": "earliest",
244
- }
245
- consumer = Consumer(conf)
246
- consumer.subscribe([topic])
247
- messages = []
248
- try:
249
- empty_count = 0
250
- while empty_count < max_empty_messages:
251
- msg = consumer.poll(timeout=1)
252
- if msg is None:
253
- empty_count += 1
254
- continue
255
- if msg.error():
256
- if msg.error().code() == KafkaError._PARTITION_EOF:
257
- break
258
- else:
259
- raise KafkaException(msg.error())
260
- try:
261
- message = json.loads(msg.value().decode("utf8"))
262
- messages.append(message)
263
- finally:
264
- consumer.commit(asynchronous=False)
265
- finally:
266
- print("closing consumer")
267
- consumer.close()
268
- return messages
269
-
270
-
271
- @pytest.mark.asyncio
272
- @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
273
- async def test_kafka_destination_local(upload_file: Path, kafka_upload_topic: str):
274
- uploader = LocalKafkaUploader(
275
- connection_config=LocalKafkaConnectionConfig(bootstrap_server="localhost", port=29092),
276
- upload_config=LocalKafkaUploaderConfig(topic=TOPIC, batch_size=10),
277
- )
278
- file_data = FileData(
279
- source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),
280
- connector_type=CONNECTOR_TYPE,
281
- identifier="mock file data",
282
- )
283
- uploader.precheck()
284
- if uploader.is_async():
285
- await uploader.run_async(path=upload_file, file_data=file_data)
286
- else:
287
- uploader.run(path=upload_file, file_data=file_data)
288
- all_messages = get_all_messages(topic=kafka_upload_topic)
289
- with upload_file.open("r") as upload_fs:
290
- content_to_upload = json.load(upload_fs)
291
- assert len(all_messages) == len(content_to_upload), (
292
- f"expected number of messages ({len(content_to_upload)}) doesn't "
293
- f"match how many messages read off of kakfa topic {kafka_upload_topic}: {len(all_messages)}"
294
- )
295
-
296
-
297
- @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
298
- def test_kafka_destination_local_precheck_fail_no_cluster():
299
- uploader = LocalKafkaUploader(
300
- connection_config=LocalKafkaConnectionConfig(bootstrap_server="localhost", port=29092),
301
- upload_config=LocalKafkaUploaderConfig(topic=TOPIC, batch_size=10),
302
- )
303
- with pytest.raises(DestinationConnectionError):
304
- uploader.precheck()