unstructured-ingest 0.3.2__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (477) hide show
  1. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/PKG-INFO +12 -10
  2. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/setup.py +2 -1
  3. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/test_lancedb.py +7 -7
  4. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/test_milvus.py +34 -6
  5. unstructured_ingest-0.3.4/test/integration/connectors/weaviate/test_cloud.py +34 -0
  6. unstructured_ingest-0.3.4/test/integration/embedders/test_azure_openai.py +59 -0
  7. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/test_utils.py +21 -1
  8. unstructured_ingest-0.3.4/unstructured_ingest/__version__.py +1 -0
  9. unstructured_ingest-0.3.4/unstructured_ingest/embed/azure_openai.py +31 -0
  10. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/utils/string_and_date_utils.py +10 -0
  11. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/astradb.py +16 -0
  12. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/couchbase.py +4 -1
  13. unstructured_ingest-0.3.4/unstructured_ingest/v2/processes/connectors/lancedb/__init__.py +30 -0
  14. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/lancedb/aws.py +7 -7
  15. unstructured_ingest-0.3.4/unstructured_ingest/v2/processes/connectors/lancedb/cloud.py +42 -0
  16. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/milvus.py +9 -3
  17. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/weaviate/cloud.py +4 -3
  18. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/embedder.py +30 -0
  19. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest.egg-info/PKG-INFO +12 -10
  20. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest.egg-info/SOURCES.txt +4 -0
  21. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest.egg-info/requires.txt +12 -9
  22. unstructured_ingest-0.3.2/unstructured_ingest/__version__.py +0 -1
  23. unstructured_ingest-0.3.2/unstructured_ingest/v2/processes/connectors/lancedb/__init__.py +0 -17
  24. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/LICENSE.md +0 -0
  25. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/README.md +0 -0
  26. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/pyproject.toml +0 -0
  27. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/setup.cfg +0 -0
  28. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/__init__.py +0 -0
  29. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/__init__.py +0 -0
  30. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/chunkers/__init__.py +0 -0
  31. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/chunkers/test_chunkers.py +0 -0
  32. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/__init__.py +0 -0
  33. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/conftest.py +0 -0
  34. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/databricks_tests/__init__.py +0 -0
  35. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/databricks_tests/test_volumes_native.py +0 -0
  36. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/elasticsearch/__init__.py +0 -0
  37. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/elasticsearch/conftest.py +0 -0
  38. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/elasticsearch/test_elasticsearch.py +0 -0
  39. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/elasticsearch/test_opensearch.py +0 -0
  40. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/sql/__init__.py +0 -0
  41. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/sql/test_postgres.py +0 -0
  42. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/sql/test_singlestore.py +0 -0
  43. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/sql/test_snowflake.py +0 -0
  44. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/sql/test_sqlite.py +0 -0
  45. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/test_astradb.py +0 -0
  46. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/test_azure_ai_search.py +0 -0
  47. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/test_confluence.py +0 -0
  48. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/test_delta_table.py +0 -0
  49. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/test_kafka.py +0 -0
  50. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/test_mongodb.py +0 -0
  51. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/test_onedrive.py +0 -0
  52. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/test_pinecone.py +0 -0
  53. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/test_qdrant.py +0 -0
  54. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/test_s3.py +0 -0
  55. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/utils/__init__.py +0 -0
  56. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/utils/constants.py +0 -0
  57. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/utils/docker.py +0 -0
  58. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/utils/docker_compose.py +0 -0
  59. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/utils/validation.py +0 -0
  60. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/weaviate/__init__.py +0 -0
  61. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/weaviate/conftest.py +0 -0
  62. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/connectors/weaviate/test_local.py +0 -0
  63. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/embedders/__init__.py +0 -0
  64. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/embedders/conftest.py +0 -0
  65. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/embedders/test_bedrock.py +0 -0
  66. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/embedders/test_huggingface.py +0 -0
  67. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/embedders/test_mixedbread.py +0 -0
  68. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/embedders/test_octoai.py +0 -0
  69. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/embedders/test_openai.py +0 -0
  70. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/embedders/test_togetherai.py +0 -0
  71. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/embedders/test_vertexai.py +0 -0
  72. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/embedders/test_voyageai.py +0 -0
  73. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/embedders/utils.py +0 -0
  74. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/partitioners/__init__.py +0 -0
  75. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/partitioners/test_partitioner.py +0 -0
  76. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/integration/utils.py +0 -0
  77. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/__init__.py +0 -0
  78. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/embed/__init__.py +0 -0
  79. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/embed/test_mixedbreadai.py +0 -0
  80. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/embed/test_octoai.py +0 -0
  81. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/embed/test_openai.py +0 -0
  82. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/embed/test_vertexai.py +0 -0
  83. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/embed/test_voyageai.py +0 -0
  84. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/test_chunking_utils.py +0 -0
  85. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/test_error.py +0 -0
  86. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/test_interfaces.py +0 -0
  87. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/test_logger.py +0 -0
  88. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/__init__.py +0 -0
  89. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/chunkers/__init__.py +0 -0
  90. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/chunkers/test_chunkers.py +0 -0
  91. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/connectors/__init__.py +0 -0
  92. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/embedders/__init__.py +0 -0
  93. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/embedders/test_bedrock.py +0 -0
  94. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/embedders/test_huggingface.py +0 -0
  95. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/embedders/test_mixedbread.py +0 -0
  96. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/embedders/test_octoai.py +0 -0
  97. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/embedders/test_openai.py +0 -0
  98. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/embedders/test_togetherai.py +0 -0
  99. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/embedders/test_vertexai.py +0 -0
  100. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/embedders/test_voyageai.py +0 -0
  101. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/partitioners/__init__.py +0 -0
  102. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/partitioners/test_partitioner.py +0 -0
  103. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/test_interfaces.py +0 -0
  104. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/test_utils.py +0 -0
  105. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/utils/__init__.py +0 -0
  106. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/test/unit/v2/utils/data_generator.py +0 -0
  107. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/__init__.py +0 -0
  108. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/__init__.py +0 -0
  109. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/base/__init__.py +0 -0
  110. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/base/cmd.py +0 -0
  111. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/base/dest.py +0 -0
  112. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/base/src.py +0 -0
  113. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cli.py +0 -0
  114. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmd_factory.py +0 -0
  115. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/__init__.py +0 -0
  116. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/airtable.py +0 -0
  117. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/astradb.py +0 -0
  118. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/azure_ai_search.py +0 -0
  119. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/biomed.py +0 -0
  120. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/chroma.py +0 -0
  121. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/clarifai.py +0 -0
  122. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/confluence.py +0 -0
  123. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/databricks_volumes.py +0 -0
  124. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/delta_table.py +0 -0
  125. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/discord.py +0 -0
  126. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/elasticsearch.py +0 -0
  127. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
  128. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/fsspec/azure.py +0 -0
  129. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/fsspec/box.py +0 -0
  130. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -0
  131. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -0
  132. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -0
  133. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/fsspec/s3.py +0 -0
  134. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -0
  135. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/github.py +0 -0
  136. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/gitlab.py +0 -0
  137. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/google_drive.py +0 -0
  138. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/hubspot.py +0 -0
  139. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/jira.py +0 -0
  140. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/kafka.py +0 -0
  141. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/local.py +0 -0
  142. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/mongodb.py +0 -0
  143. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/notion.py +0 -0
  144. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/onedrive.py +0 -0
  145. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/opensearch.py +0 -0
  146. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/outlook.py +0 -0
  147. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/pinecone.py +0 -0
  148. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/qdrant.py +0 -0
  149. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/reddit.py +0 -0
  150. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/salesforce.py +0 -0
  151. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/sharepoint.py +0 -0
  152. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/slack.py +0 -0
  153. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/sql.py +0 -0
  154. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/vectara.py +0 -0
  155. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/weaviate.py +0 -0
  156. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/cmds/wikipedia.py +0 -0
  157. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/common.py +0 -0
  158. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/interfaces.py +0 -0
  159. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/cli/utils.py +0 -0
  160. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/__init__.py +0 -0
  161. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/airtable.py +0 -0
  162. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/astradb.py +0 -0
  163. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/azure_ai_search.py +0 -0
  164. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/biomed.py +0 -0
  165. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/chroma.py +0 -0
  166. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/clarifai.py +0 -0
  167. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/confluence.py +0 -0
  168. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/databricks_volumes.py +0 -0
  169. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/delta_table.py +0 -0
  170. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/discord.py +0 -0
  171. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/elasticsearch.py +0 -0
  172. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/fsspec/__init__.py +0 -0
  173. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/fsspec/azure.py +0 -0
  174. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/fsspec/box.py +0 -0
  175. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/fsspec/dropbox.py +0 -0
  176. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/fsspec/fsspec.py +0 -0
  177. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/fsspec/gcs.py +0 -0
  178. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/fsspec/s3.py +0 -0
  179. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/fsspec/sftp.py +0 -0
  180. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/git.py +0 -0
  181. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/github.py +0 -0
  182. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/gitlab.py +0 -0
  183. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/google_drive.py +0 -0
  184. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/hubspot.py +0 -0
  185. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/jira.py +0 -0
  186. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/kafka.py +0 -0
  187. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/local.py +0 -0
  188. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/mongodb.py +0 -0
  189. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/__init__.py +0 -0
  190. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/client.py +0 -0
  191. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/connector.py +0 -0
  192. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/helpers.py +0 -0
  193. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/interfaces.py +0 -0
  194. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/__init__.py +0 -0
  195. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/block.py +0 -0
  196. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/__init__.py +0 -0
  197. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -0
  198. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +0 -0
  199. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +0 -0
  200. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/callout.py +0 -0
  201. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -0
  202. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/child_page.py +0 -0
  203. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/code.py +0 -0
  204. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -0
  205. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/divider.py +0 -0
  206. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/embed.py +0 -0
  207. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/equation.py +0 -0
  208. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/file.py +0 -0
  209. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/heading.py +0 -0
  210. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/image.py +0 -0
  211. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -0
  212. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/link_to_page.py +0 -0
  213. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -0
  214. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/paragraph.py +0 -0
  215. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/pdf.py +0 -0
  216. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/quote.py +0 -0
  217. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -0
  218. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/table.py +0 -0
  219. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -0
  220. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/template.py +0 -0
  221. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/todo.py +0 -0
  222. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/toggle.py +0 -0
  223. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -0
  224. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/blocks/video.py +0 -0
  225. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database.py +0 -0
  226. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -0
  227. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -0
  228. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/created_by.py +0 -0
  229. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/created_time.py +0 -0
  230. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/date.py +0 -0
  231. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/email.py +0 -0
  232. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/files.py +0 -0
  233. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -0
  234. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +0 -0
  235. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -0
  236. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -0
  237. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/number.py +0 -0
  238. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/people.py +0 -0
  239. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -0
  240. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -0
  241. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/rich_text.py +0 -0
  242. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/rollup.py +0 -0
  243. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/select.py +0 -0
  244. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/status.py +0 -0
  245. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/title.py +0 -0
  246. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -0
  247. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/url.py +0 -0
  248. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/database_properties/verification.py +0 -0
  249. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/date.py +0 -0
  250. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/file.py +0 -0
  251. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/page.py +0 -0
  252. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/parent.py +0 -0
  253. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/rich_text.py +0 -0
  254. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/notion/types/user.py +0 -0
  255. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/onedrive.py +0 -0
  256. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/opensearch.py +0 -0
  257. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/outlook.py +0 -0
  258. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/pinecone.py +0 -0
  259. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/qdrant.py +0 -0
  260. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/reddit.py +0 -0
  261. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/registry.py +0 -0
  262. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/salesforce.py +0 -0
  263. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/sharepoint.py +0 -0
  264. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/slack.py +0 -0
  265. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/sql.py +0 -0
  266. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/vectara.py +0 -0
  267. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/weaviate.py +0 -0
  268. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/connector/wikipedia.py +0 -0
  269. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/embed/__init__.py +0 -0
  270. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/embed/bedrock.py +0 -0
  271. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/embed/huggingface.py +0 -0
  272. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/embed/interfaces.py +0 -0
  273. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/embed/mixedbreadai.py +0 -0
  274. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/embed/octoai.py +0 -0
  275. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/embed/openai.py +0 -0
  276. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/embed/togetherai.py +0 -0
  277. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/embed/vertexai.py +0 -0
  278. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/embed/voyageai.py +0 -0
  279. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/enhanced_dataclass/__init__.py +0 -0
  280. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/enhanced_dataclass/core.py +0 -0
  281. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -0
  282. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -0
  283. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/error.py +0 -0
  284. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/ingest_backoff/__init__.py +0 -0
  285. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/ingest_backoff/_common.py +0 -0
  286. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/ingest_backoff/_wrapper.py +0 -0
  287. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/interfaces.py +0 -0
  288. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/logger.py +0 -0
  289. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/main.py +0 -0
  290. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/pipeline/__init__.py +0 -0
  291. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/pipeline/copy.py +0 -0
  292. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/pipeline/doc_factory.py +0 -0
  293. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/pipeline/interfaces.py +0 -0
  294. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/pipeline/partition.py +0 -0
  295. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/pipeline/permissions.py +0 -0
  296. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/pipeline/pipeline.py +0 -0
  297. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/pipeline/reformat/__init__.py +0 -0
  298. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/pipeline/reformat/chunking.py +0 -0
  299. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/pipeline/reformat/embedding.py +0 -0
  300. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/pipeline/source.py +0 -0
  301. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/pipeline/utils.py +0 -0
  302. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/pipeline/write.py +0 -0
  303. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/processor.py +0 -0
  304. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/__init__.py +0 -0
  305. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/airtable.py +0 -0
  306. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/astradb.py +0 -0
  307. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/base_runner.py +0 -0
  308. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/biomed.py +0 -0
  309. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/confluence.py +0 -0
  310. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/delta_table.py +0 -0
  311. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/discord.py +0 -0
  312. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/elasticsearch.py +0 -0
  313. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/fsspec/__init__.py +0 -0
  314. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/fsspec/azure.py +0 -0
  315. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/fsspec/box.py +0 -0
  316. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/fsspec/dropbox.py +0 -0
  317. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/fsspec/fsspec.py +0 -0
  318. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/fsspec/gcs.py +0 -0
  319. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/fsspec/s3.py +0 -0
  320. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/fsspec/sftp.py +0 -0
  321. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/github.py +0 -0
  322. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/gitlab.py +0 -0
  323. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/google_drive.py +0 -0
  324. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/hubspot.py +0 -0
  325. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/jira.py +0 -0
  326. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/kafka.py +0 -0
  327. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/local.py +0 -0
  328. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/mongodb.py +0 -0
  329. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/notion.py +0 -0
  330. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/onedrive.py +0 -0
  331. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/opensearch.py +0 -0
  332. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/outlook.py +0 -0
  333. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/reddit.py +0 -0
  334. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/salesforce.py +0 -0
  335. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/sharepoint.py +0 -0
  336. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/slack.py +0 -0
  337. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/utils.py +0 -0
  338. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/wikipedia.py +0 -0
  339. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/__init__.py +0 -0
  340. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/astradb.py +0 -0
  341. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/azure_ai_search.py +0 -0
  342. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/base_writer.py +0 -0
  343. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/chroma.py +0 -0
  344. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/clarifai.py +0 -0
  345. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/databricks_volumes.py +0 -0
  346. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/delta_table.py +0 -0
  347. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/elasticsearch.py +0 -0
  348. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
  349. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/fsspec/azure.py +0 -0
  350. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/fsspec/box.py +0 -0
  351. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -0
  352. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/fsspec/gcs.py +0 -0
  353. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/fsspec/s3.py +0 -0
  354. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/kafka.py +0 -0
  355. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/mongodb.py +0 -0
  356. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/opensearch.py +0 -0
  357. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/pinecone.py +0 -0
  358. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/qdrant.py +0 -0
  359. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/sql.py +0 -0
  360. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/vectara.py +0 -0
  361. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/runner/writers/weaviate.py +0 -0
  362. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/utils/__init__.py +0 -0
  363. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/utils/chunking.py +0 -0
  364. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/utils/compression.py +0 -0
  365. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/utils/data_prep.py +0 -0
  366. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/utils/dep_check.py +0 -0
  367. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/utils/google_filetype.py +0 -0
  368. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/utils/table.py +0 -0
  369. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/__init__.py +0 -0
  370. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/cli/__init__.py +0 -0
  371. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/cli/base/__init__.py +0 -0
  372. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/cli/base/cmd.py +0 -0
  373. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/cli/base/dest.py +0 -0
  374. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/cli/base/importer.py +0 -0
  375. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/cli/base/src.py +0 -0
  376. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/cli/cli.py +0 -0
  377. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/cli/cmds.py +0 -0
  378. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  379. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/cli/utils/click.py +0 -0
  380. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/cli/utils/model_conversion.py +0 -0
  381. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/constants.py +0 -0
  382. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/interfaces/__init__.py +0 -0
  383. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/interfaces/connector.py +0 -0
  384. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/interfaces/downloader.py +0 -0
  385. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/interfaces/file_data.py +0 -0
  386. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/interfaces/indexer.py +0 -0
  387. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/interfaces/process.py +0 -0
  388. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/interfaces/processor.py +0 -0
  389. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/interfaces/upload_stager.py +0 -0
  390. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/interfaces/uploader.py +0 -0
  391. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/logger.py +0 -0
  392. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/main.py +0 -0
  393. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/otel.py +0 -0
  394. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/__init__.py +0 -0
  395. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/interfaces.py +0 -0
  396. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/otel.py +0 -0
  397. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/pipeline.py +0 -0
  398. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  399. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/steps/chunk.py +0 -0
  400. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/steps/download.py +0 -0
  401. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/steps/embed.py +0 -0
  402. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/steps/filter.py +0 -0
  403. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/steps/index.py +0 -0
  404. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/steps/partition.py +0 -0
  405. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/steps/stage.py +0 -0
  406. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/steps/uncompress.py +0 -0
  407. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/pipeline/steps/upload.py +0 -0
  408. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/__init__.py +0 -0
  409. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/chunker.py +0 -0
  410. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connector_registry.py +0 -0
  411. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/__init__.py +0 -0
  412. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/airtable.py +0 -0
  413. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/azure_ai_search.py +0 -0
  414. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/chroma.py +0 -0
  415. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/confluence.py +0 -0
  416. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/databricks/__init__.py +0 -0
  417. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/databricks/volumes.py +0 -0
  418. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +0 -0
  419. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +0 -0
  420. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +0 -0
  421. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +0 -0
  422. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/delta_table.py +0 -0
  423. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py +0 -0
  424. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +0 -0
  425. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py +0 -0
  426. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +0 -0
  427. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/fsspec/azure.py +0 -0
  428. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/fsspec/box.py +0 -0
  429. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +0 -0
  430. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +0 -0
  431. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +0 -0
  432. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/fsspec/s3.py +0 -0
  433. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +0 -0
  434. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/fsspec/utils.py +0 -0
  435. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/gitlab.py +0 -0
  436. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/google_drive.py +0 -0
  437. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/kafka/__init__.py +0 -0
  438. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/kafka/cloud.py +0 -0
  439. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/kafka/kafka.py +0 -0
  440. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/kafka/local.py +0 -0
  441. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/kdbai.py +0 -0
  442. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/lancedb/azure.py +0 -0
  443. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/lancedb/gcp.py +0 -0
  444. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +0 -0
  445. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/lancedb/local.py +0 -0
  446. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/local.py +0 -0
  447. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/mongodb.py +0 -0
  448. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/onedrive.py +0 -0
  449. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/outlook.py +0 -0
  450. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/pinecone.py +0 -0
  451. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/qdrant/__init__.py +0 -0
  452. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/qdrant/cloud.py +0 -0
  453. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/qdrant/local.py +0 -0
  454. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +0 -0
  455. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/qdrant/server.py +0 -0
  456. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/salesforce.py +0 -0
  457. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/sharepoint.py +0 -0
  458. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/slack.py +0 -0
  459. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/sql/__init__.py +0 -0
  460. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/sql/postgres.py +0 -0
  461. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/sql/singlestore.py +0 -0
  462. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/sql/snowflake.py +0 -0
  463. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/sql/sql.py +0 -0
  464. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/sql/sqlite.py +0 -0
  465. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/utils.py +0 -0
  466. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/weaviate/__init__.py +0 -0
  467. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/weaviate/embedded.py +0 -0
  468. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/weaviate/local.py +0 -0
  469. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +0 -0
  470. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/filter.py +0 -0
  471. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/partitioner.py +0 -0
  472. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/processes/uncompress.py +0 -0
  473. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/unstructured_api.py +0 -0
  474. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest/v2/utils.py +0 -0
  475. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest.egg-info/dependency_links.txt +0 -0
  476. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest.egg-info/entry_points.txt +0 -0
  477. {unstructured_ingest-0.3.2 → unstructured_ingest-0.3.4}/unstructured_ingest.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,13 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.13
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
- Requires-Dist: dataclasses_json
26
- Requires-Dist: click
27
25
  Requires-Dist: pydantic>=2.7
26
+ Requires-Dist: click
27
+ Requires-Dist: dataclasses_json
28
28
  Requires-Dist: opentelemetry-sdk
29
- Requires-Dist: tqdm
30
29
  Requires-Dist: python-dateutil
31
30
  Requires-Dist: pandas
31
+ Requires-Dist: tqdm
32
32
  Provides-Extra: remote
33
33
  Requires-Dist: unstructured-client>=0.26.1; extra == "remote"
34
34
  Provides-Extra: csv
@@ -71,8 +71,8 @@ Requires-Dist: adlfs; extra == "azure"
71
71
  Provides-Extra: azure-ai-search
72
72
  Requires-Dist: azure-search-documents; extra == "azure-ai-search"
73
73
  Provides-Extra: biomed
74
- Requires-Dist: requests; extra == "biomed"
75
74
  Requires-Dist: bs4; extra == "biomed"
75
+ Requires-Dist: requests; extra == "biomed"
76
76
  Provides-Extra: box
77
77
  Requires-Dist: fsspec; extra == "box"
78
78
  Requires-Dist: boxfs; extra == "box"
@@ -81,13 +81,13 @@ Requires-Dist: chromadb; extra == "chroma"
81
81
  Provides-Extra: clarifai
82
82
  Requires-Dist: clarifai; extra == "clarifai"
83
83
  Provides-Extra: confluence
84
- Requires-Dist: atlassian-python-api; extra == "confluence"
85
84
  Requires-Dist: requests; extra == "confluence"
85
+ Requires-Dist: atlassian-python-api; extra == "confluence"
86
86
  Provides-Extra: couchbase
87
87
  Requires-Dist: couchbase; extra == "couchbase"
88
88
  Provides-Extra: delta-table
89
- Requires-Dist: deltalake; extra == "delta-table"
90
89
  Requires-Dist: boto3; extra == "delta-table"
90
+ Requires-Dist: deltalake; extra == "delta-table"
91
91
  Provides-Extra: discord
92
92
  Requires-Dist: discord-py; extra == "discord"
93
93
  Provides-Extra: dropbox
@@ -97,8 +97,8 @@ Provides-Extra: elasticsearch
97
97
  Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
98
98
  Provides-Extra: gcs
99
99
  Requires-Dist: fsspec; extra == "gcs"
100
- Requires-Dist: bs4; extra == "gcs"
101
100
  Requires-Dist: gcsfs; extra == "gcs"
101
+ Requires-Dist: bs4; extra == "gcs"
102
102
  Provides-Extra: github
103
103
  Requires-Dist: pygithub>1.58.0; extra == "github"
104
104
  Requires-Dist: requests; extra == "github"
@@ -107,22 +107,24 @@ Requires-Dist: python-gitlab; extra == "gitlab"
107
107
  Provides-Extra: google-drive
108
108
  Requires-Dist: google-api-python-client; extra == "google-drive"
109
109
  Provides-Extra: hubspot
110
- Requires-Dist: urllib3; extra == "hubspot"
111
110
  Requires-Dist: hubspot-api-client; extra == "hubspot"
111
+ Requires-Dist: urllib3; extra == "hubspot"
112
112
  Provides-Extra: jira
113
113
  Requires-Dist: atlassian-python-api; extra == "jira"
114
114
  Provides-Extra: kafka
115
115
  Requires-Dist: confluent-kafka; extra == "kafka"
116
116
  Provides-Extra: kdbai
117
117
  Requires-Dist: kdbai-client>=1.4.0; extra == "kdbai"
118
+ Provides-Extra: lancedb
119
+ Requires-Dist: lancedb; extra == "lancedb"
118
120
  Provides-Extra: milvus
119
121
  Requires-Dist: pymilvus; extra == "milvus"
120
122
  Provides-Extra: mongodb
121
123
  Requires-Dist: pymongo; extra == "mongodb"
122
124
  Provides-Extra: notion
125
+ Requires-Dist: notion-client; extra == "notion"
123
126
  Requires-Dist: htmlBuilder; extra == "notion"
124
127
  Requires-Dist: httpx; extra == "notion"
125
- Requires-Dist: notion-client; extra == "notion"
126
128
  Requires-Dist: backoff; extra == "notion"
127
129
  Provides-Extra: onedrive
128
130
  Requires-Dist: msal; extra == "onedrive"
@@ -1,7 +1,7 @@
1
1
  """
2
2
  setup.py
3
3
 
4
- unstructured - pre-processing tools for unstructured data
4
+ unstructured-ingest - pre-processing tools for unstructured data
5
5
 
6
6
  Copyright 2022 Unstructured Technologies, Inc.
7
7
 
@@ -104,6 +104,7 @@ connectors_reqs = {
104
104
  "jira": load_requirements("requirements/connectors/jira.in"),
105
105
  "kafka": load_requirements("requirements/connectors/kafka.in"),
106
106
  "kdbai": load_requirements("requirements/connectors/kdbai.in"),
107
+ "lancedb": load_requirements("requirements/connectors/lancedb.in"),
107
108
  "milvus": load_requirements("requirements/connectors/milvus.in"),
108
109
  "mongodb": load_requirements("requirements/connectors/mongodb.in"),
109
110
  "notion": load_requirements("requirements/connectors/notion.in"),
@@ -14,9 +14,9 @@ from upath import UPath
14
14
  from test.integration.connectors.utils.constants import DESTINATION_TAG
15
15
  from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
16
16
  from unstructured_ingest.v2.processes.connectors.lancedb.aws import (
17
- LanceDBS3AccessConfig,
18
- LanceDBS3ConnectionConfig,
19
- LanceDBS3Uploader,
17
+ LanceDBAwsAccessConfig,
18
+ LanceDBAwsConnectionConfig,
19
+ LanceDBAwsUploader,
20
20
  )
21
21
  from unstructured_ingest.v2.processes.connectors.lancedb.azure import (
22
22
  LanceDBAzureAccessConfig,
@@ -156,7 +156,7 @@ def _get_uri(target: Literal["local", "s3", "gcs", "az"], local_base_path: Path)
156
156
 
157
157
  def _get_uploader(
158
158
  uri: str,
159
- ) -> Union[LanceDBAzureUploader, LanceDBAzureUploader, LanceDBS3Uploader, LanceDBGSPUploader]:
159
+ ) -> Union[LanceDBAzureUploader, LanceDBAzureUploader, LanceDBAwsUploader, LanceDBGSPUploader]:
160
160
  target = uri.split("://", maxsplit=1)[0] if uri.startswith(("s3", "az", "gs")) else "local"
161
161
  if target == "az":
162
162
  azure_connection_string = os.getenv("AZURE_DEST_CONNECTION_STR")
@@ -170,10 +170,10 @@ def _get_uploader(
170
170
  )
171
171
 
172
172
  elif target == "s3":
173
- return LanceDBS3Uploader(
173
+ return LanceDBAwsUploader(
174
174
  upload_config=LanceDBUploaderConfig(table_name=TABLE_NAME),
175
- connection_config=LanceDBS3ConnectionConfig(
176
- access_config=LanceDBS3AccessConfig(
175
+ connection_config=LanceDBAwsConnectionConfig(
176
+ access_config=LanceDBAwsAccessConfig(
177
177
  aws_access_key_id=os.getenv("S3_INGEST_TEST_ACCESS_KEY"),
178
178
  aws_secret_access_key=os.getenv("S3_INGEST_TEST_SECRET_KEY"),
179
179
  ),
@@ -15,6 +15,7 @@ from pymilvus.milvus_client import IndexParams
15
15
  from test.integration.connectors.utils.constants import DESTINATION_TAG, env_setup_path
16
16
  from test.integration.connectors.utils.docker import healthcheck_wait
17
17
  from test.integration.connectors.utils.docker_compose import docker_compose_context
18
+ from unstructured_ingest.error import DestinationConnectionError
18
19
  from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
19
20
  from unstructured_ingest.v2.processes.connectors.milvus import (
20
21
  CONNECTOR_TYPE,
@@ -24,9 +25,10 @@ from unstructured_ingest.v2.processes.connectors.milvus import (
24
25
  MilvusUploadStager,
25
26
  )
26
27
 
27
- DB_URI = "http://localhost:19530"
28
28
  DB_NAME = "test_database"
29
- COLLECTION_NAME = "test_collection"
29
+ EXISTENT_COLLECTION_NAME = "test_collection"
30
+ NONEXISTENT_COLLECTION_NAME = "nonexistent_collection"
31
+ DB_URI = "http://localhost:19530"
30
32
 
31
33
 
32
34
  def get_schema() -> CollectionSchema:
@@ -55,7 +57,9 @@ def get_index_params() -> IndexParams:
55
57
  return index_params
56
58
 
57
59
 
58
- @pytest.fixture
60
+ # NOTE: Precheck tests are read-only so they don't interfere with destination test,
61
+ # using scope="module" we can limit number of times the docker-compose has to be run
62
+ @pytest.fixture(scope="module")
59
63
  def collection():
60
64
  docker_client = docker.from_env()
61
65
  with docker_compose_context(docker_compose_path=env_setup_path / "milvus"):
@@ -73,10 +77,10 @@ def collection():
73
77
  schema = get_schema()
74
78
  index_params = get_index_params()
75
79
  collection_resp = milvus_client.create_collection(
76
- collection_name=COLLECTION_NAME, schema=schema, index_params=index_params
80
+ collection_name=EXISTENT_COLLECTION_NAME, schema=schema, index_params=index_params
77
81
  )
78
- print(f"Created collection {COLLECTION_NAME}: {collection_resp}")
79
- yield COLLECTION_NAME
82
+ print(f"Created collection {EXISTENT_COLLECTION_NAME}: {collection_resp}")
83
+ yield EXISTENT_COLLECTION_NAME
80
84
  finally:
81
85
  milvus_client.close()
82
86
 
@@ -139,3 +143,27 @@ async def test_milvus_destination(
139
143
  uploader.run(path=staged_filepath, file_data=file_data)
140
144
  with uploader.get_client() as client:
141
145
  validate_count(client=client, expected_count=expected_count)
146
+
147
+
148
+ @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
149
+ def test_precheck_succeeds(collection: str):
150
+ uploader = MilvusUploader(
151
+ connection_config=MilvusConnectionConfig(uri=DB_URI),
152
+ upload_config=MilvusUploaderConfig(db_name=DB_NAME, collection_name=collection),
153
+ )
154
+ uploader.precheck()
155
+
156
+
157
+ @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
158
+ def test_precheck_fails_on_nonexistent_collection(collection: str):
159
+ uploader = MilvusUploader(
160
+ connection_config=MilvusConnectionConfig(uri=DB_URI),
161
+ upload_config=MilvusUploaderConfig(
162
+ db_name=DB_NAME, collection_name=NONEXISTENT_COLLECTION_NAME
163
+ ),
164
+ )
165
+ with pytest.raises(
166
+ DestinationConnectionError,
167
+ match=f"Collection '{NONEXISTENT_COLLECTION_NAME}' does not exist",
168
+ ):
169
+ uploader.precheck()
@@ -0,0 +1,34 @@
1
+ import pytest
2
+ from pydantic import ValidationError
3
+
4
+ from unstructured_ingest.v2.processes.connectors.weaviate.cloud import (
5
+ CloudWeaviateAccessConfig,
6
+ CloudWeaviateConnectionConfig,
7
+ )
8
+
9
+
10
+ def test_weaviate_failing_connection_config():
11
+ with pytest.raises(ValidationError):
12
+ CloudWeaviateConnectionConfig(
13
+ access_config=CloudWeaviateAccessConfig(api_key="my key", password="password"),
14
+ username="username",
15
+ cluster_url="clusterurl",
16
+ )
17
+
18
+
19
+ def test_weaviate_connection_config_happy_path():
20
+ CloudWeaviateConnectionConfig(
21
+ access_config=CloudWeaviateAccessConfig(
22
+ api_key="my key",
23
+ ),
24
+ cluster_url="clusterurl",
25
+ )
26
+
27
+
28
+ def test_weaviate_connection_config_anonymous():
29
+ CloudWeaviateConnectionConfig(
30
+ access_config=CloudWeaviateAccessConfig(api_key="my key", password="password"),
31
+ username="username",
32
+ anonymous=True,
33
+ cluster_url="clusterurl",
34
+ )
@@ -0,0 +1,59 @@
1
+ import json
2
+ import os
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ from test.integration.embedders.utils import validate_embedding_output, validate_raw_embedder
7
+ from test.integration.utils import requires_env
8
+ from unstructured_ingest.embed.azure_openai import (
9
+ AzureOpenAIEmbeddingConfig,
10
+ AzureOpenAIEmbeddingEncoder,
11
+ )
12
+ from unstructured_ingest.v2.processes.embedder import Embedder, EmbedderConfig
13
+
14
+ API_KEY = "AZURE_OPENAI_API_KEY"
15
+ ENDPOINT = "AZURE_OPENAI_ENDPOINT"
16
+
17
+
18
+ @dataclass(frozen=True)
19
+ class AzureData:
20
+ api_key: str
21
+ endpoint: str
22
+
23
+
24
+ def get_azure_data() -> AzureData:
25
+ api_key = os.getenv(API_KEY, None)
26
+ assert api_key
27
+ endpoint = os.getenv(ENDPOINT, None)
28
+ assert endpoint
29
+ return AzureData(api_key, endpoint)
30
+
31
+
32
+ @requires_env(API_KEY, ENDPOINT)
33
+ def test_azure_openai_embedder(embedder_file: Path):
34
+ azure_data = get_azure_data()
35
+ embedder_config = EmbedderConfig(
36
+ embedding_provider="azure-openai",
37
+ embedding_api_key=azure_data.api_key,
38
+ embedding_azure_endpoint=azure_data.endpoint,
39
+ )
40
+ embedder = Embedder(config=embedder_config)
41
+ results = embedder.run(elements_filepath=embedder_file)
42
+ assert results
43
+ with embedder_file.open("r") as f:
44
+ original_elements = json.load(f)
45
+ validate_embedding_output(original_elements=original_elements, output_elements=results)
46
+
47
+
48
+ @requires_env(API_KEY, ENDPOINT)
49
+ def test_raw_azure_openai_embedder(embedder_file: Path):
50
+ azure_data = get_azure_data()
51
+ embedder = AzureOpenAIEmbeddingEncoder(
52
+ config=AzureOpenAIEmbeddingConfig(
53
+ api_key=azure_data.api_key,
54
+ azure_endpoint=azure_data.endpoint,
55
+ )
56
+ )
57
+ validate_raw_embedder(
58
+ embedder=embedder, embedder_file=embedder_file, expected_dimensions=(1536,)
59
+ )
@@ -8,7 +8,11 @@ import pytz
8
8
 
9
9
  from unstructured_ingest.cli.utils import extract_config
10
10
  from unstructured_ingest.interfaces import BaseConfig
11
- from unstructured_ingest.utils.string_and_date_utils import ensure_isoformat_datetime, json_to_dict
11
+ from unstructured_ingest.utils.string_and_date_utils import (
12
+ ensure_isoformat_datetime,
13
+ json_to_dict,
14
+ truncate_string_bytes,
15
+ )
12
16
 
13
17
 
14
18
  @dataclass
@@ -162,3 +166,19 @@ def test_ensure_isoformat_datetime_fails_on_string():
162
166
  def test_ensure_isoformat_datetime_fails_on_int():
163
167
  with pytest.raises(TypeError):
164
168
  ensure_isoformat_datetime(1111)
169
+
170
+
171
+ def test_truncate_string_bytes_return_truncated_string():
172
+ test_string = "abcdef안녕하세요ghijklmn방갑습니opqrstu 더 길어지면 안되는 문자열vwxyz"
173
+ max_bytes = 11
174
+ result = truncate_string_bytes(test_string, max_bytes)
175
+ assert result == "abcdef안"
176
+ assert len(result.encode("utf-8")) <= max_bytes
177
+
178
+
179
+ def test_truncate_string_bytes_return_untouched_string():
180
+ test_string = "abcdef"
181
+ max_bytes = 11
182
+ result = truncate_string_bytes(test_string, max_bytes)
183
+ assert result == "abcdef"
184
+ assert len(result.encode("utf-8")) <= max_bytes
@@ -0,0 +1 @@
1
+ __version__ = "0.3.4" # pragma: no cover
@@ -0,0 +1,31 @@
1
+ from dataclasses import dataclass
2
+ from typing import TYPE_CHECKING
3
+
4
+ from pydantic import Field
5
+
6
+ from unstructured_ingest.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
7
+ from unstructured_ingest.utils.dep_check import requires_dependencies
8
+
9
+ if TYPE_CHECKING:
10
+ from openai import AzureOpenAI
11
+
12
+
13
+ class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
14
+ api_version: str = Field(description="Azure API version", default="2024-06-01")
15
+ azure_endpoint: str
16
+ embedder_model_name: str = Field(default="text-embedding-ada-002", alias="model_name")
17
+
18
+ @requires_dependencies(["openai"], extras="openai")
19
+ def get_client(self) -> "AzureOpenAI":
20
+ from openai import AzureOpenAI
21
+
22
+ return AzureOpenAI(
23
+ api_key=self.api_key.get_secret_value(),
24
+ api_version=self.api_version,
25
+ azure_endpoint=self.azure_endpoint,
26
+ )
27
+
28
+
29
+ @dataclass
30
+ class AzureOpenAIEmbeddingEncoder(OpenAIEmbeddingEncoder):
31
+ config: AzureOpenAIEmbeddingConfig
@@ -37,3 +37,13 @@ def ensure_isoformat_datetime(timestamp: t.Union[datetime, str]) -> str:
37
37
  raise ValueError(f"String '{timestamp}' could not be parsed as a datetime.") from e
38
38
  else:
39
39
  raise TypeError(f"Expected input type datetime or str, but got {type(timestamp)}.")
40
+
41
+
42
+ def truncate_string_bytes(string: str, max_bytes: int, encoding: str = "utf-8") -> str:
43
+ """
44
+ Truncates a string to a specified maximum number of bytes.
45
+ """
46
+ encoded_string = str(string).encode(encoding)
47
+ if len(encoded_string) <= max_bytes:
48
+ return string
49
+ return encoded_string[:max_bytes].decode(encoding, errors="ignore")
@@ -19,6 +19,7 @@ from unstructured_ingest.error import (
19
19
  )
20
20
  from unstructured_ingest.utils.data_prep import batch_generator
21
21
  from unstructured_ingest.utils.dep_check import requires_dependencies
22
+ from unstructured_ingest.utils.string_and_date_utils import truncate_string_bytes
22
23
  from unstructured_ingest.v2.constants import RECORD_ID_LABEL
23
24
  from unstructured_ingest.v2.interfaces import (
24
25
  AccessConfig,
@@ -50,6 +51,8 @@ if TYPE_CHECKING:
50
51
 
51
52
  CONNECTOR_TYPE = "astradb"
52
53
 
54
+ MAX_CONTENT_PARAM_BYTE_SIZE = 8000
55
+
53
56
 
54
57
  class AstraDBAccessConfig(AccessConfig):
55
58
  token: str = Field(description="Astra DB Token with access to the database.")
@@ -301,7 +304,20 @@ class AstraDBUploadStager(UploadStager):
301
304
  default_factory=lambda: AstraDBUploadStagerConfig()
302
305
  )
303
306
 
307
+ def truncate_dict_elements(self, element_dict: dict) -> None:
308
+ text = element_dict.pop("text", None)
309
+ if text is not None:
310
+ element_dict["text"] = truncate_string_bytes(text, MAX_CONTENT_PARAM_BYTE_SIZE)
311
+ metadata = element_dict.get("metadata")
312
+ if metadata is not None and isinstance(metadata, dict):
313
+ text_as_html = element_dict["metadata"].pop("text_as_html", None)
314
+ if text_as_html is not None:
315
+ element_dict["metadata"]["text_as_html"] = truncate_string_bytes(
316
+ text_as_html, MAX_CONTENT_PARAM_BYTE_SIZE
317
+ )
318
+
304
319
  def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
320
+ self.truncate_dict_elements(element_dict)
305
321
  return {
306
322
  "$vector": element_dict.pop("embeddings", None),
307
323
  "content": element_dict.pop("text", None),
@@ -219,6 +219,9 @@ class CouchbaseIndexer(Indexer):
219
219
 
220
220
 
221
221
  class CouchbaseDownloaderConfig(DownloaderConfig):
222
+ collection_id: str = Field(
223
+ default="id", description="The unique key of the id field in the collection"
224
+ )
222
225
  fields: list[str] = field(default_factory=list)
223
226
 
224
227
 
@@ -250,7 +253,7 @@ class CouchbaseDownloader(Downloader):
250
253
  def generate_download_response(
251
254
  self, result: dict, bucket: str, file_data: FileData
252
255
  ) -> DownloadResponse:
253
- record_id = result["id"]
256
+ record_id = result[self.download_config.collection_id]
254
257
  filename_id = self.get_identifier(bucket=bucket, record_id=record_id)
255
258
  filename = f"{filename_id}.txt"
256
259
  download_path = self.download_dir / Path(filename)
@@ -0,0 +1,30 @@
1
+ from __future__ import annotations
2
+
3
+ from unstructured_ingest.v2.processes.connector_registry import add_destination_entry
4
+
5
+ from .aws import CONNECTOR_TYPE as LANCEDB_S3_CONNECTOR_TYPE
6
+ from .aws import lancedb_aws_destination_entry
7
+ from .azure import CONNECTOR_TYPE as LANCEDB_AZURE_CONNECTOR_TYPE
8
+ from .azure import lancedb_azure_destination_entry
9
+ from .cloud import CONNECTOR_TYPE as LANCEDB_CLOUD_CONNECTOR_TYPE
10
+ from .cloud import lancedb_cloud_destination_entry
11
+ from .gcp import CONNECTOR_TYPE as LANCEDB_GCS_CONNECTOR_TYPE
12
+ from .gcp import lancedb_gcp_destination_entry
13
+ from .local import CONNECTOR_TYPE as LANCEDB_LOCAL_CONNECTOR_TYPE
14
+ from .local import lancedb_local_destination_entry
15
+
16
+ add_destination_entry(
17
+ destination_type=LANCEDB_S3_CONNECTOR_TYPE, entry=lancedb_aws_destination_entry
18
+ )
19
+ add_destination_entry(
20
+ destination_type=LANCEDB_AZURE_CONNECTOR_TYPE, entry=lancedb_azure_destination_entry
21
+ )
22
+ add_destination_entry(
23
+ destination_type=LANCEDB_GCS_CONNECTOR_TYPE, entry=lancedb_gcp_destination_entry
24
+ )
25
+ add_destination_entry(
26
+ destination_type=LANCEDB_LOCAL_CONNECTOR_TYPE, entry=lancedb_local_destination_entry
27
+ )
28
+ add_destination_entry(
29
+ destination_type=LANCEDB_CLOUD_CONNECTOR_TYPE, entry=lancedb_cloud_destination_entry
30
+ )
@@ -15,28 +15,28 @@ from unstructured_ingest.v2.processes.connectors.lancedb.lancedb import (
15
15
  CONNECTOR_TYPE = "lancedb_aws"
16
16
 
17
17
 
18
- class LanceDBS3AccessConfig(AccessConfig):
18
+ class LanceDBAwsAccessConfig(AccessConfig):
19
19
  aws_access_key_id: str = Field(description="The AWS access key ID to use.")
20
20
  aws_secret_access_key: str = Field(description="The AWS secret access key to use.")
21
21
 
22
22
 
23
- class LanceDBS3ConnectionConfig(LanceDBRemoteConnectionConfig):
24
- access_config: Secret[LanceDBS3AccessConfig]
23
+ class LanceDBAwsConnectionConfig(LanceDBRemoteConnectionConfig):
24
+ access_config: Secret[LanceDBAwsAccessConfig]
25
25
 
26
26
  def get_storage_options(self) -> dict:
27
27
  return {**self.access_config.get_secret_value().model_dump(), "timeout": self.timeout}
28
28
 
29
29
 
30
30
  @dataclass
31
- class LanceDBS3Uploader(LanceDBUploader):
31
+ class LanceDBAwsUploader(LanceDBUploader):
32
32
  upload_config: LanceDBUploaderConfig
33
- connection_config: LanceDBS3ConnectionConfig
33
+ connection_config: LanceDBAwsConnectionConfig
34
34
  connector_type: str = CONNECTOR_TYPE
35
35
 
36
36
 
37
37
  lancedb_aws_destination_entry = DestinationRegistryEntry(
38
- connection_config=LanceDBS3ConnectionConfig,
39
- uploader=LanceDBS3Uploader,
38
+ connection_config=LanceDBAwsConnectionConfig,
39
+ uploader=LanceDBAwsUploader,
40
40
  uploader_config=LanceDBUploaderConfig,
41
41
  upload_stager_config=LanceDBUploadStagerConfig,
42
42
  upload_stager=LanceDBUploadStager,
@@ -0,0 +1,42 @@
1
+ from dataclasses import dataclass
2
+
3
+ from pydantic import Field, Secret
4
+
5
+ from unstructured_ingest.v2.interfaces.connector import AccessConfig
6
+ from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
7
+ from unstructured_ingest.v2.processes.connectors.lancedb.lancedb import (
8
+ LanceDBRemoteConnectionConfig,
9
+ LanceDBUploader,
10
+ LanceDBUploaderConfig,
11
+ LanceDBUploadStager,
12
+ LanceDBUploadStagerConfig,
13
+ )
14
+
15
+ CONNECTOR_TYPE = "lancedb_cloud"
16
+
17
+
18
+ class LanceDBCloudAccessConfig(AccessConfig):
19
+ api_key: str = Field(description="Api key associated with LanceDb cloud")
20
+
21
+
22
+ class LanceDBCloudConnectionConfig(LanceDBRemoteConnectionConfig):
23
+ access_config: Secret[LanceDBCloudAccessConfig]
24
+
25
+ def get_storage_options(self) -> dict:
26
+ return {**self.access_config.get_secret_value().model_dump(), "timeout": self.timeout}
27
+
28
+
29
+ @dataclass
30
+ class LanceDBCloudUploader(LanceDBUploader):
31
+ upload_config: LanceDBUploaderConfig
32
+ connection_config: LanceDBCloudConnectionConfig
33
+ connector_type: str = CONNECTOR_TYPE
34
+
35
+
36
+ lancedb_cloud_destination_entry = DestinationRegistryEntry(
37
+ connection_config=LanceDBCloudConnectionConfig,
38
+ uploader=LanceDBCloudUploader,
39
+ uploader_config=LanceDBUploaderConfig,
40
+ upload_stager_config=LanceDBUploadStagerConfig,
41
+ upload_stager=LanceDBUploadStager,
42
+ )
@@ -8,7 +8,7 @@ import pandas as pd
8
8
  from dateutil import parser
9
9
  from pydantic import Field, Secret
10
10
 
11
- from unstructured_ingest.error import WriteError
11
+ from unstructured_ingest.error import DestinationConnectionError, WriteError
12
12
  from unstructured_ingest.utils.data_prep import flatten_dict
13
13
  from unstructured_ingest.utils.dep_check import requires_dependencies
14
14
  from unstructured_ingest.v2.constants import RECORD_ID_LABEL
@@ -66,7 +66,6 @@ class MilvusConnectionConfig(ConnectionConfig):
66
66
 
67
67
 
68
68
  class MilvusUploadStagerConfig(UploadStagerConfig):
69
-
70
69
  fields_to_include: Optional[list[str]] = None
71
70
  """If set - list of fields to include in the output.
72
71
  Unspecified fields are removed from the elements.
@@ -174,6 +173,14 @@ class MilvusUploader(Uploader):
174
173
  upload_config: MilvusUploaderConfig
175
174
  connector_type: str = CONNECTOR_TYPE
176
175
 
176
+ @DestinationConnectionError.wrap
177
+ def precheck(self):
178
+ with self.get_client() as client:
179
+ if not client.has_collection(self.upload_config.collection_name):
180
+ raise DestinationConnectionError(
181
+ f"Collection '{self.upload_config.collection_name}' does not exist"
182
+ )
183
+
177
184
  @contextmanager
178
185
  def get_client(self) -> Generator["MilvusClient", None, None]:
179
186
  client = self.connection_config.get_client()
@@ -218,7 +225,6 @@ class MilvusUploader(Uploader):
218
225
  f"db in collection {self.upload_config.collection_name}"
219
226
  )
220
227
  with self.get_client() as client:
221
-
222
228
  try:
223
229
  res = client.insert(collection_name=self.upload_config.collection_name, data=data)
224
230
  except MilvusException as milvus_exception:
@@ -55,10 +55,11 @@ class CloudWeaviateConnectionConfig(WeaviateConnectionConfig):
55
55
  "client_secret": access_config.client_secret is not None,
56
56
  "client_password": access_config.password is not None and self.username is not None,
57
57
  }
58
- if len(auths) == 0:
58
+ existing_auths = [auth_method for auth_method, flag in auths.items() if flag]
59
+
60
+ if len(existing_auths) == 0:
59
61
  raise ValueError("No auth values provided and anonymous is False")
60
- if len(auths) > 1:
61
- existing_auths = [auth_method for auth_method, flag in auths.items() if flag]
62
+ if len(existing_auths) > 1:
62
63
  raise ValueError(
63
64
  "Multiple auth values provided, only one approach can be used: {}".format(
64
65
  ", ".join(existing_auths)