unstructured-ingest 0.7.1__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (385) hide show
  1. unstructured_ingest-1.0.1/.gitignore +212 -0
  2. unstructured_ingest-1.0.1/PKG-INFO +226 -0
  3. unstructured_ingest-1.0.1/pyproject.toml +212 -0
  4. unstructured_ingest-1.0.1/unstructured_ingest/__version__.py +1 -0
  5. unstructured_ingest-1.0.1/unstructured_ingest/cli/README.md +28 -0
  6. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/embed/mixedbreadai.py +0 -1
  7. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/interfaces/upload_stager.py +2 -2
  8. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/interfaces/uploader.py +3 -3
  9. unstructured_ingest-1.0.1/unstructured_ingest/logger.py +39 -0
  10. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/pipeline/interfaces.py +1 -1
  11. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/pipeline/pipeline.py +1 -1
  12. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/chunker.py +4 -0
  13. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/airtable.py +4 -2
  14. unstructured_ingest-1.0.1/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +10 -0
  15. unstructured_ingest-1.0.1/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +23 -0
  16. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/astradb.py +2 -2
  17. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/azure_ai_search.py +1 -1
  18. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/confluence.py +0 -1
  19. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +1 -1
  20. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +2 -2
  21. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +1 -1
  22. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +1 -2
  23. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/delta_table.py +1 -0
  24. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/duckdb/base.py +2 -2
  25. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +3 -3
  26. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +3 -3
  27. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/fsspec/s3.py +5 -3
  28. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/gitlab.py +1 -2
  29. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/google_drive.py +0 -2
  30. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -7
  31. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/kdbai.py +1 -0
  32. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/outlook.py +1 -2
  33. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/pinecone.py +0 -1
  34. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/redisdb.py +28 -24
  35. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/salesforce.py +1 -1
  36. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/slack.py +1 -2
  37. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +5 -0
  38. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/sql/postgres.py +7 -1
  39. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/sql/singlestore.py +11 -6
  40. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/sql/snowflake.py +5 -0
  41. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/sql/sql.py +3 -4
  42. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/sql/sqlite.py +5 -0
  43. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/sql/vastdb.py +7 -3
  44. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/vectara.py +0 -2
  45. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -2
  46. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/embedder.py +2 -2
  47. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/filter.py +1 -1
  48. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/partitioner.py +4 -0
  49. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/utils/blob_storage.py +2 -2
  50. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/unstructured_api.py +13 -8
  51. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/utils/data_prep.py +8 -32
  52. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/utils/string_and_date_utils.py +3 -3
  53. unstructured_ingest-0.7.1/MANIFEST.in +0 -2
  54. unstructured_ingest-0.7.1/PKG-INFO +0 -383
  55. unstructured_ingest-0.7.1/examples/airtable.py +0 -44
  56. unstructured_ingest-0.7.1/examples/azure_cognitive_search.py +0 -55
  57. unstructured_ingest-0.7.1/examples/chroma.py +0 -54
  58. unstructured_ingest-0.7.1/examples/couchbase.py +0 -55
  59. unstructured_ingest-0.7.1/examples/databricks_volumes_dest.py +0 -55
  60. unstructured_ingest-0.7.1/examples/databricks_volumes_source.py +0 -53
  61. unstructured_ingest-0.7.1/examples/delta_table.py +0 -45
  62. unstructured_ingest-0.7.1/examples/discord_example.py +0 -36
  63. unstructured_ingest-0.7.1/examples/elasticsearch.py +0 -49
  64. unstructured_ingest-0.7.1/examples/google_drive.py +0 -45
  65. unstructured_ingest-0.7.1/examples/kdbai.py +0 -54
  66. unstructured_ingest-0.7.1/examples/local.py +0 -36
  67. unstructured_ingest-0.7.1/examples/milvus.py +0 -44
  68. unstructured_ingest-0.7.1/examples/mongodb.py +0 -53
  69. unstructured_ingest-0.7.1/examples/opensearch.py +0 -50
  70. unstructured_ingest-0.7.1/examples/pinecone.py +0 -57
  71. unstructured_ingest-0.7.1/examples/s3.py +0 -38
  72. unstructured_ingest-0.7.1/examples/salesforce.py +0 -44
  73. unstructured_ingest-0.7.1/examples/sharepoint.py +0 -47
  74. unstructured_ingest-0.7.1/examples/singlestore.py +0 -49
  75. unstructured_ingest-0.7.1/examples/sql.py +0 -90
  76. unstructured_ingest-0.7.1/examples/vectara.py +0 -54
  77. unstructured_ingest-0.7.1/examples/weaviate.py +0 -44
  78. unstructured_ingest-0.7.1/pyproject.toml +0 -79
  79. unstructured_ingest-0.7.1/setup.cfg +0 -19
  80. unstructured_ingest-0.7.1/setup.py +0 -217
  81. unstructured_ingest-0.7.1/test/integration/chunkers/test_chunkers.py +0 -31
  82. unstructured_ingest-0.7.1/test/integration/connectors/conftest.py +0 -38
  83. unstructured_ingest-0.7.1/test/integration/connectors/databricks/test_volumes_native.py +0 -273
  84. unstructured_ingest-0.7.1/test/integration/connectors/discord/test_discord.py +0 -90
  85. unstructured_ingest-0.7.1/test/integration/connectors/duckdb/conftest.py +0 -14
  86. unstructured_ingest-0.7.1/test/integration/connectors/duckdb/test_duckdb.py +0 -90
  87. unstructured_ingest-0.7.1/test/integration/connectors/duckdb/test_motherduck.py +0 -95
  88. unstructured_ingest-0.7.1/test/integration/connectors/elasticsearch/conftest.py +0 -34
  89. unstructured_ingest-0.7.1/test/integration/connectors/elasticsearch/test_elasticsearch.py +0 -331
  90. unstructured_ingest-0.7.1/test/integration/connectors/elasticsearch/test_opensearch.py +0 -326
  91. unstructured_ingest-0.7.1/test/integration/connectors/sql/test_databricks_delta_tables.py +0 -170
  92. unstructured_ingest-0.7.1/test/integration/connectors/sql/test_postgres.py +0 -201
  93. unstructured_ingest-0.7.1/test/integration/connectors/sql/test_singlestore.py +0 -182
  94. unstructured_ingest-0.7.1/test/integration/connectors/sql/test_snowflake.py +0 -244
  95. unstructured_ingest-0.7.1/test/integration/connectors/sql/test_sqlite.py +0 -168
  96. unstructured_ingest-0.7.1/test/integration/connectors/sql/test_vastdb.py +0 -34
  97. unstructured_ingest-0.7.1/test/integration/connectors/test_astradb.py +0 -287
  98. unstructured_ingest-0.7.1/test/integration/connectors/test_azure_ai_search.py +0 -254
  99. unstructured_ingest-0.7.1/test/integration/connectors/test_chroma.py +0 -136
  100. unstructured_ingest-0.7.1/test/integration/connectors/test_confluence.py +0 -111
  101. unstructured_ingest-0.7.1/test/integration/connectors/test_delta_table.py +0 -183
  102. unstructured_ingest-0.7.1/test/integration/connectors/test_dropbox.py +0 -151
  103. unstructured_ingest-0.7.1/test/integration/connectors/test_github.py +0 -49
  104. unstructured_ingest-0.7.1/test/integration/connectors/test_google_drive.py +0 -257
  105. unstructured_ingest-0.7.1/test/integration/connectors/test_jira.py +0 -67
  106. unstructured_ingest-0.7.1/test/integration/connectors/test_lancedb.py +0 -247
  107. unstructured_ingest-0.7.1/test/integration/connectors/test_milvus.py +0 -208
  108. unstructured_ingest-0.7.1/test/integration/connectors/test_mongodb.py +0 -335
  109. unstructured_ingest-0.7.1/test/integration/connectors/test_neo4j.py +0 -244
  110. unstructured_ingest-0.7.1/test/integration/connectors/test_notion.py +0 -152
  111. unstructured_ingest-0.7.1/test/integration/connectors/test_onedrive.py +0 -163
  112. unstructured_ingest-0.7.1/test/integration/connectors/test_pinecone.py +0 -387
  113. unstructured_ingest-0.7.1/test/integration/connectors/test_qdrant.py +0 -216
  114. unstructured_ingest-0.7.1/test/integration/connectors/test_redis.py +0 -143
  115. unstructured_ingest-0.7.1/test/integration/connectors/test_s3.py +0 -184
  116. unstructured_ingest-0.7.1/test/integration/connectors/test_sharepoint.py +0 -222
  117. unstructured_ingest-0.7.1/test/integration/connectors/test_vectara.py +0 -282
  118. unstructured_ingest-0.7.1/test/integration/connectors/test_zendesk.py +0 -120
  119. unstructured_ingest-0.7.1/test/integration/connectors/utils/constants.py +0 -13
  120. unstructured_ingest-0.7.1/test/integration/connectors/utils/docker.py +0 -151
  121. unstructured_ingest-0.7.1/test/integration/connectors/utils/docker_compose.py +0 -59
  122. unstructured_ingest-0.7.1/test/integration/connectors/utils/validation/destination.py +0 -77
  123. unstructured_ingest-0.7.1/test/integration/connectors/utils/validation/equality.py +0 -76
  124. unstructured_ingest-0.7.1/test/integration/connectors/utils/validation/source.py +0 -331
  125. unstructured_ingest-0.7.1/test/integration/connectors/utils/validation/utils.py +0 -36
  126. unstructured_ingest-0.7.1/test/integration/connectors/weaviate/__init__.py +0 -0
  127. unstructured_ingest-0.7.1/test/integration/connectors/weaviate/conftest.py +0 -15
  128. unstructured_ingest-0.7.1/test/integration/connectors/weaviate/test_cloud.py +0 -39
  129. unstructured_ingest-0.7.1/test/integration/connectors/weaviate/test_local.py +0 -152
  130. unstructured_ingest-0.7.1/test/integration/embedders/__init__.py +0 -0
  131. unstructured_ingest-0.7.1/test/integration/embedders/conftest.py +0 -13
  132. unstructured_ingest-0.7.1/test/integration/embedders/test_azure_openai.py +0 -57
  133. unstructured_ingest-0.7.1/test/integration/embedders/test_bedrock.py +0 -103
  134. unstructured_ingest-0.7.1/test/integration/embedders/test_huggingface.py +0 -24
  135. unstructured_ingest-0.7.1/test/integration/embedders/test_mixedbread.py +0 -71
  136. unstructured_ingest-0.7.1/test/integration/embedders/test_octoai.py +0 -75
  137. unstructured_ingest-0.7.1/test/integration/embedders/test_openai.py +0 -74
  138. unstructured_ingest-0.7.1/test/integration/embedders/test_togetherai.py +0 -71
  139. unstructured_ingest-0.7.1/test/integration/embedders/test_vertexai.py +0 -63
  140. unstructured_ingest-0.7.1/test/integration/embedders/test_voyageai.py +0 -79
  141. unstructured_ingest-0.7.1/test/integration/embedders/utils.py +0 -66
  142. unstructured_ingest-0.7.1/test/integration/partitioners/__init__.py +0 -0
  143. unstructured_ingest-0.7.1/test/integration/partitioners/test_partitioner.py +0 -76
  144. unstructured_ingest-0.7.1/test/integration/utils.py +0 -15
  145. unstructured_ingest-0.7.1/test/unit/__init__.py +0 -0
  146. unstructured_ingest-0.7.1/test/unit/chunkers/__init__.py +0 -0
  147. unstructured_ingest-0.7.1/test/unit/chunkers/test_chunkers.py +0 -49
  148. unstructured_ingest-0.7.1/test/unit/connectors/__init__.py +0 -0
  149. unstructured_ingest-0.7.1/test/unit/connectors/ibm_watsonx/__init__.py +0 -0
  150. unstructured_ingest-0.7.1/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +0 -459
  151. unstructured_ingest-0.7.1/test/unit/connectors/motherduck/__init__.py +0 -0
  152. unstructured_ingest-0.7.1/test/unit/connectors/motherduck/test_base.py +0 -73
  153. unstructured_ingest-0.7.1/test/unit/connectors/sql/__init__.py +0 -0
  154. unstructured_ingest-0.7.1/test/unit/connectors/sql/test_sql.py +0 -152
  155. unstructured_ingest-0.7.1/test/unit/connectors/test_confluence.py +0 -71
  156. unstructured_ingest-0.7.1/test/unit/connectors/test_jira.py +0 -401
  157. unstructured_ingest-0.7.1/test/unit/embed/__init__.py +0 -0
  158. unstructured_ingest-0.7.1/test/unit/embed/test_mixedbreadai.py +0 -42
  159. unstructured_ingest-0.7.1/test/unit/embed/test_octoai.py +0 -27
  160. unstructured_ingest-0.7.1/test/unit/embed/test_openai.py +0 -28
  161. unstructured_ingest-0.7.1/test/unit/embed/test_vertexai.py +0 -25
  162. unstructured_ingest-0.7.1/test/unit/embed/test_voyageai.py +0 -24
  163. unstructured_ingest-0.7.1/test/unit/embedders/__init__.py +0 -0
  164. unstructured_ingest-0.7.1/test/unit/embedders/test_bedrock.py +0 -36
  165. unstructured_ingest-0.7.1/test/unit/embedders/test_huggingface.py +0 -48
  166. unstructured_ingest-0.7.1/test/unit/embedders/test_mixedbread.py +0 -37
  167. unstructured_ingest-0.7.1/test/unit/embedders/test_octoai.py +0 -35
  168. unstructured_ingest-0.7.1/test/unit/embedders/test_openai.py +0 -35
  169. unstructured_ingest-0.7.1/test/unit/embedders/test_togetherai.py +0 -37
  170. unstructured_ingest-0.7.1/test/unit/embedders/test_vertexai.py +0 -37
  171. unstructured_ingest-0.7.1/test/unit/embedders/test_voyageai.py +0 -38
  172. unstructured_ingest-0.7.1/test/unit/partitioners/__init__.py +0 -0
  173. unstructured_ingest-0.7.1/test/unit/partitioners/test_partitioner.py +0 -63
  174. unstructured_ingest-0.7.1/test/unit/test_error.py +0 -27
  175. unstructured_ingest-0.7.1/test/unit/test_html.py +0 -112
  176. unstructured_ingest-0.7.1/test/unit/test_interfaces.py +0 -26
  177. unstructured_ingest-0.7.1/test/unit/test_logger.py +0 -78
  178. unstructured_ingest-0.7.1/test/unit/test_utils.py +0 -220
  179. unstructured_ingest-0.7.1/test/unit/utils/__init__.py +0 -0
  180. unstructured_ingest-0.7.1/test/unit/utils/data_generator.py +0 -32
  181. unstructured_ingest-0.7.1/unstructured_ingest/__version__.py +0 -1
  182. unstructured_ingest-0.7.1/unstructured_ingest/cli/__init__.py +0 -0
  183. unstructured_ingest-0.7.1/unstructured_ingest/cli/utils/__init__.py +0 -0
  184. unstructured_ingest-0.7.1/unstructured_ingest/data_types/__init__.py +0 -0
  185. unstructured_ingest-0.7.1/unstructured_ingest/embed/__init__.py +0 -0
  186. unstructured_ingest-0.7.1/unstructured_ingest/logger.py +0 -130
  187. unstructured_ingest-0.7.1/unstructured_ingest/pipeline/__init__.py +0 -0
  188. unstructured_ingest-0.7.1/unstructured_ingest/pipeline/steps/__init__.py +0 -0
  189. unstructured_ingest-0.7.1/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
  190. unstructured_ingest-0.7.1/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
  191. unstructured_ingest-0.7.1/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
  192. unstructured_ingest-0.7.1/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
  193. unstructured_ingest-0.7.1/unstructured_ingest/processes/utils/__init__.py +0 -0
  194. unstructured_ingest-0.7.1/unstructured_ingest/utils/__init__.py +0 -0
  195. unstructured_ingest-0.7.1/unstructured_ingest.egg-info/PKG-INFO +0 -383
  196. unstructured_ingest-0.7.1/unstructured_ingest.egg-info/SOURCES.txt +0 -376
  197. unstructured_ingest-0.7.1/unstructured_ingest.egg-info/dependency_links.txt +0 -1
  198. unstructured_ingest-0.7.1/unstructured_ingest.egg-info/entry_points.txt +0 -2
  199. unstructured_ingest-0.7.1/unstructured_ingest.egg-info/requires.txt +0 -418
  200. unstructured_ingest-0.7.1/unstructured_ingest.egg-info/top_level.txt +0 -3
  201. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/LICENSE.md +0 -0
  202. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/README.md +0 -0
  203. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/__init__.py +0 -0
  204. {unstructured_ingest-0.7.1/examples → unstructured_ingest-1.0.1/unstructured_ingest/cli}/__init__.py +0 -0
  205. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/cli/base/__init__.py +0 -0
  206. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/cli/base/cmd.py +0 -0
  207. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/cli/base/dest.py +0 -0
  208. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/cli/base/importer.py +0 -0
  209. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/cli/base/src.py +0 -0
  210. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/cli/cli.py +0 -0
  211. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/cli/cmds.py +0 -0
  212. {unstructured_ingest-0.7.1/test → unstructured_ingest-1.0.1/unstructured_ingest/cli/utils}/__init__.py +0 -0
  213. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/cli/utils/click.py +0 -0
  214. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
  215. {unstructured_ingest-0.7.1/test/integration → unstructured_ingest-1.0.1/unstructured_ingest/data_types}/__init__.py +0 -0
  216. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/data_types/file_data.py +0 -0
  217. {unstructured_ingest-0.7.1/test/integration/chunkers → unstructured_ingest-1.0.1/unstructured_ingest/embed}/__init__.py +0 -0
  218. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/embed/azure_openai.py +0 -0
  219. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/embed/bedrock.py +0 -0
  220. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/embed/huggingface.py +0 -0
  221. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/embed/interfaces.py +0 -0
  222. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/embed/octoai.py +0 -0
  223. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/embed/openai.py +0 -0
  224. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/embed/togetherai.py +0 -0
  225. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/embed/vertexai.py +0 -0
  226. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/embed/voyageai.py +0 -0
  227. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/error.py +0 -0
  228. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/errors_v2.py +0 -0
  229. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/interfaces/__init__.py +0 -0
  230. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/interfaces/connector.py +0 -0
  231. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/interfaces/downloader.py +0 -0
  232. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/interfaces/indexer.py +0 -0
  233. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/interfaces/process.py +0 -0
  234. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/interfaces/processor.py +0 -0
  235. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/main.py +0 -0
  236. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/otel.py +0 -0
  237. {unstructured_ingest-0.7.1/test/integration/connectors → unstructured_ingest-1.0.1/unstructured_ingest/pipeline}/__init__.py +0 -0
  238. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/pipeline/otel.py +0 -0
  239. {unstructured_ingest-0.7.1/test/integration/connectors/databricks → unstructured_ingest-1.0.1/unstructured_ingest/pipeline/steps}/__init__.py +0 -0
  240. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
  241. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/pipeline/steps/download.py +0 -0
  242. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/pipeline/steps/embed.py +0 -0
  243. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/pipeline/steps/filter.py +0 -0
  244. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/pipeline/steps/index.py +0 -0
  245. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/pipeline/steps/partition.py +0 -0
  246. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/pipeline/steps/stage.py +0 -0
  247. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
  248. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/pipeline/steps/upload.py +0 -0
  249. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/__init__.py +0 -0
  250. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connector_registry.py +0 -0
  251. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/__init__.py +0 -0
  252. {unstructured_ingest-0.7.1/test/integration/connectors/discord → unstructured_ingest-1.0.1/unstructured_ingest/processes/connectors/assets}/__init__.py +0 -0
  253. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/chroma.py +0 -0
  254. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
  255. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
  256. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/databricks/volumes.py +0 -0
  257. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
  258. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/discord.py +0 -0
  259. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
  260. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
  261. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +0 -0
  262. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
  263. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
  264. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
  265. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
  266. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
  267. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +0 -0
  268. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
  269. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
  270. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
  271. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/github.py +0 -0
  272. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
  273. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/jira.py +0 -0
  274. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
  275. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
  276. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
  277. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
  278. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
  279. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
  280. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
  281. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
  282. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
  283. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
  284. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
  285. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/local.py +0 -0
  286. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/milvus.py +0 -0
  287. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/mongodb.py +0 -0
  288. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/neo4j.py +0 -0
  289. {unstructured_ingest-0.7.1/test/integration/connectors/duckdb → unstructured_ingest-1.0.1/unstructured_ingest/processes/connectors/notion}/__init__.py +0 -0
  290. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
  291. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/connector.py +0 -0
  292. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
  293. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
  294. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
  295. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
  296. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
  297. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
  298. {unstructured_ingest-0.7.1/test/integration/connectors/elasticsearch → unstructured_ingest-1.0.1/unstructured_ingest/processes/connectors/notion/types}/__init__.py +0 -0
  299. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
  300. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
  301. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
  302. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
  303. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
  304. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
  305. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
  306. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
  307. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
  308. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
  309. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
  310. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
  311. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
  312. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
  313. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
  314. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
  315. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
  316. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
  317. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
  318. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
  319. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
  320. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
  321. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
  322. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
  323. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
  324. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
  325. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
  326. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
  327. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
  328. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
  329. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
  330. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +0 -0
  331. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +0 -0
  332. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +0 -0
  333. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +0 -0
  334. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +0 -0
  335. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +0 -0
  336. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +0 -0
  337. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +0 -0
  338. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -0
  339. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +0 -0
  340. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +0 -0
  341. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +0 -0
  342. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +0 -0
  343. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +0 -0
  344. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +0 -0
  345. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +0 -0
  346. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +0 -0
  347. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +0 -0
  348. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +0 -0
  349. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +0 -0
  350. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +0 -0
  351. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +0 -0
  352. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +0 -0
  353. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
  354. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
  355. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
  356. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
  357. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
  358. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
  359. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/onedrive.py +0 -0
  360. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
  361. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
  362. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
  363. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
  364. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
  365. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/sharepoint.py +0 -0
  366. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
  367. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/utils.py +0 -0
  368. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
  369. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
  370. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
  371. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
  372. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +0 -0
  373. {unstructured_ingest-0.7.1/test/integration/connectors/sql → unstructured_ingest-1.0.1/unstructured_ingest/processes/connectors/zendesk}/__init__.py +0 -0
  374. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
  375. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/processes/uncompress.py +0 -0
  376. {unstructured_ingest-0.7.1/test/integration/connectors → unstructured_ingest-1.0.1/unstructured_ingest/processes}/utils/__init__.py +0 -0
  377. {unstructured_ingest-0.7.1/test/integration/connectors/utils/validation → unstructured_ingest-1.0.1/unstructured_ingest/utils}/__init__.py +0 -0
  378. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/utils/chunking.py +0 -0
  379. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/utils/compression.py +0 -0
  380. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/utils/constants.py +0 -0
  381. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/utils/dep_check.py +0 -0
  382. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/utils/html.py +0 -0
  383. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/utils/ndjson.py +0 -0
  384. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/utils/pydantic_models.py +0 -0
  385. {unstructured_ingest-0.7.1 → unstructured_ingest-1.0.1}/unstructured_ingest/utils/table.py +0 -0
@@ -0,0 +1,212 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ figures/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ pip-wheel-metadata/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Pycharm
42
+ .idea/
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ *.py,cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+ db.sqlite3-journal
67
+
68
+ # Flask stuff:
69
+ instance/
70
+ .webassets-cache
71
+
72
+ # Scrapy stuff:
73
+ .scrapy
74
+
75
+ # Sphinx documentation
76
+ docs/_build/
77
+
78
+ # PyBuilder
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+ nbs/
84
+
85
+ # IPython
86
+ profile_default/
87
+ ipython_config.py
88
+
89
+ # pyenv
90
+ .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100
+ __pypackages__/
101
+
102
+ # Celery stuff
103
+ celerybeat-schedule
104
+ celerybeat.pid
105
+
106
+ # SageMath parsed files
107
+ *.sage.py
108
+
109
+ # Environments
110
+ .env
111
+ .venv
112
+ env/
113
+ venv/
114
+ ENV/
115
+ env.bak/
116
+ venv.bak/
117
+
118
+ # Spyder project settings
119
+ .spyderproject
120
+ .spyproject
121
+
122
+ # Rope project settings
123
+ .ropeproject
124
+
125
+ # mkdocs documentation
126
+ /site
127
+
128
+ # mypy
129
+ .mypy_cache/
130
+ .dmypy.json
131
+ dmypy.json
132
+
133
+ # Pyre type checker
134
+ .pyre/
135
+
136
+ # pyright (Python LSP/type-checker in VSCode) config
137
+ /pyrightconfig.json
138
+
139
+ # ingest outputs
140
+ /structured-output
141
+ test_unstructured_ingest/workdir/
142
+ test_unstructured_ingest/delta-table-dest/
143
+ test_unstructured_ingest/skipped-files.txt
144
+ test_unstructured_ingest/chroma-dest/
145
+
146
+ # suggested ingest mirror directory
147
+ /mirror
148
+
149
+ ## https://github.com/github/gitignore/blob/main/Global/Emacs.gitignore (partial)
150
+
151
+ *~
152
+ \#*\#
153
+ /.emacs.desktop
154
+ /.emacs.desktop.lock
155
+ *.elc
156
+ auto-save-list
157
+ tramp
158
+ .\#*
159
+
160
+ ## https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
161
+ .vscode/*
162
+ !.vscode/tasks.json
163
+ !.vscode/launch.json
164
+ !.vscode/extensions.json
165
+ !.vscode/*.code-snippets
166
+
167
+ # Local History for Visual Studio Code
168
+ .history/
169
+
170
+ # Built Visual Studio Code Extensions
171
+ *.vsix
172
+
173
+ ## https://github.com/github/gitignore/blob/main/Global/Vim.gitignore
174
+ # Swap
175
+ [._]*.s[a-v][a-z]
176
+ !*.svg # comment out if you don't need vector files
177
+ [._]*.sw[a-p]
178
+ [._]s[a-rt-v][a-z]
179
+ [._]ss[a-gi-z]
180
+ [._]sw[a-p]
181
+
182
+ # Session
183
+ Session.vim
184
+ Sessionx.vim
185
+
186
+ # Temporary
187
+ .netrwhist
188
+ # Auto-generated tag files
189
+ tags
190
+ # Persistent undo
191
+ [._]*.un~
192
+
193
+ .DS_Store
194
+
195
+ # Ruff cache
196
+ .ruff_cache/
197
+
198
+ .ppm
199
+ .vs
200
+
201
+ example-docs/*_images
202
+ examples/**/output/
203
+
204
+ outputdiff.txt
205
+ metricsdiff.txt
206
+
207
+ # analysis
208
+ annotated/
209
+
210
+ tmp_ingest/
211
+ .vs
212
+ .report.json
@@ -0,0 +1,226 @@
1
+ Metadata-Version: 2.4
2
+ Name: unstructured_ingest
3
+ Version: 1.0.1
4
+ Summary: Local ETL data pipeline to get data RAG ready
5
+ Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
+ License-Expression: Apache-2.0
7
+ License-File: LICENSE.md
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Education
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: <3.13,>=3.9
21
+ Requires-Dist: click
22
+ Requires-Dist: dataclasses-json
23
+ Requires-Dist: opentelemetry-sdk
24
+ Requires-Dist: pydantic>=2.7
25
+ Requires-Dist: python-dateutil
26
+ Requires-Dist: tqdm
27
+ Provides-Extra: airtable
28
+ Requires-Dist: pandas; extra == 'airtable'
29
+ Requires-Dist: pyairtable; extra == 'airtable'
30
+ Provides-Extra: astradb
31
+ Requires-Dist: astrapy; extra == 'astradb'
32
+ Provides-Extra: azure
33
+ Requires-Dist: adlfs; extra == 'azure'
34
+ Requires-Dist: fsspec; extra == 'azure'
35
+ Provides-Extra: azure-ai-search
36
+ Requires-Dist: azure-search-documents; extra == 'azure-ai-search'
37
+ Provides-Extra: bedrock
38
+ Requires-Dist: aioboto3; extra == 'bedrock'
39
+ Requires-Dist: boto3; extra == 'bedrock'
40
+ Provides-Extra: biomed
41
+ Requires-Dist: bs4; extra == 'biomed'
42
+ Requires-Dist: requests; extra == 'biomed'
43
+ Provides-Extra: box
44
+ Requires-Dist: boxfs; extra == 'box'
45
+ Requires-Dist: fsspec; extra == 'box'
46
+ Provides-Extra: chroma
47
+ Requires-Dist: chromadb; extra == 'chroma'
48
+ Provides-Extra: clarifai
49
+ Requires-Dist: clarifai; extra == 'clarifai'
50
+ Provides-Extra: confluence
51
+ Requires-Dist: atlassian-python-api; extra == 'confluence'
52
+ Requires-Dist: requests; extra == 'confluence'
53
+ Provides-Extra: couchbase
54
+ Requires-Dist: couchbase; extra == 'couchbase'
55
+ Provides-Extra: databricks-delta-tables
56
+ Requires-Dist: databricks-sql-connector; extra == 'databricks-delta-tables'
57
+ Requires-Dist: pandas; extra == 'databricks-delta-tables'
58
+ Provides-Extra: databricks-volumes
59
+ Requires-Dist: databricks-sdk; extra == 'databricks-volumes'
60
+ Provides-Extra: delta-table
61
+ Requires-Dist: boto3; extra == 'delta-table'
62
+ Requires-Dist: deltalake; extra == 'delta-table'
63
+ Requires-Dist: pandas; extra == 'delta-table'
64
+ Provides-Extra: discord
65
+ Requires-Dist: discord-py; extra == 'discord'
66
+ Provides-Extra: doc
67
+ Requires-Dist: unstructured[doc]; extra == 'doc'
68
+ Provides-Extra: docx
69
+ Requires-Dist: unstructured[docx]; extra == 'docx'
70
+ Provides-Extra: dropbox
71
+ Requires-Dist: dropboxdrivefs; extra == 'dropbox'
72
+ Requires-Dist: fsspec; extra == 'dropbox'
73
+ Provides-Extra: duckdb
74
+ Requires-Dist: duckdb; extra == 'duckdb'
75
+ Requires-Dist: pandas; extra == 'duckdb'
76
+ Provides-Extra: elasticsearch
77
+ Requires-Dist: elasticsearch[async]; extra == 'elasticsearch'
78
+ Provides-Extra: epub
79
+ Requires-Dist: unstructured[epub]; extra == 'epub'
80
+ Provides-Extra: gcs
81
+ Requires-Dist: bs4; extra == 'gcs'
82
+ Requires-Dist: fsspec; extra == 'gcs'
83
+ Requires-Dist: gcsfs; extra == 'gcs'
84
+ Provides-Extra: github
85
+ Requires-Dist: pygithub>1.58.0; extra == 'github'
86
+ Requires-Dist: requests; extra == 'github'
87
+ Provides-Extra: gitlab
88
+ Requires-Dist: python-gitlab; extra == 'gitlab'
89
+ Provides-Extra: google-drive
90
+ Requires-Dist: google-api-python-client; extra == 'google-drive'
91
+ Provides-Extra: hubspot
92
+ Requires-Dist: hubspot-api-client; extra == 'hubspot'
93
+ Requires-Dist: urllib3; extra == 'hubspot'
94
+ Provides-Extra: huggingface
95
+ Requires-Dist: sentence-transformers; extra == 'huggingface'
96
+ Provides-Extra: ibm-watsonx-s3
97
+ Requires-Dist: httpx; extra == 'ibm-watsonx-s3'
98
+ Requires-Dist: pandas; extra == 'ibm-watsonx-s3'
99
+ Requires-Dist: pyarrow; extra == 'ibm-watsonx-s3'
100
+ Requires-Dist: pyiceberg; extra == 'ibm-watsonx-s3'
101
+ Requires-Dist: tenacity; extra == 'ibm-watsonx-s3'
102
+ Provides-Extra: image
103
+ Requires-Dist: unstructured[image]; extra == 'image'
104
+ Provides-Extra: jira
105
+ Requires-Dist: atlassian-python-api; extra == 'jira'
106
+ Provides-Extra: kafka
107
+ Requires-Dist: confluent-kafka; extra == 'kafka'
108
+ Provides-Extra: kdbai
109
+ Requires-Dist: kdbai-client>=1.4.0; extra == 'kdbai'
110
+ Requires-Dist: pandas; extra == 'kdbai'
111
+ Provides-Extra: lancedb
112
+ Requires-Dist: lancedb; extra == 'lancedb'
113
+ Provides-Extra: md
114
+ Requires-Dist: unstructured[md]; extra == 'md'
115
+ Provides-Extra: milvus
116
+ Requires-Dist: pymilvus; extra == 'milvus'
117
+ Provides-Extra: mixedbreadai
118
+ Requires-Dist: mixedbread-ai; extra == 'mixedbreadai'
119
+ Provides-Extra: mongodb
120
+ Requires-Dist: pymongo; extra == 'mongodb'
121
+ Provides-Extra: msg
122
+ Requires-Dist: unstructured[msg]; extra == 'msg'
123
+ Provides-Extra: neo4j
124
+ Requires-Dist: cymple; extra == 'neo4j'
125
+ Requires-Dist: neo4j-rust-ext; extra == 'neo4j'
126
+ Requires-Dist: networkx; extra == 'neo4j'
127
+ Provides-Extra: notion
128
+ Requires-Dist: backoff; extra == 'notion'
129
+ Requires-Dist: htmlbuilder; extra == 'notion'
130
+ Requires-Dist: httpx; extra == 'notion'
131
+ Requires-Dist: notion-client; extra == 'notion'
132
+ Provides-Extra: octoai
133
+ Requires-Dist: openai; extra == 'octoai'
134
+ Requires-Dist: tiktoken; extra == 'octoai'
135
+ Provides-Extra: odt
136
+ Requires-Dist: unstructured[odt]; extra == 'odt'
137
+ Provides-Extra: onedrive
138
+ Requires-Dist: msal; extra == 'onedrive'
139
+ Requires-Dist: office365-rest-python-client; extra == 'onedrive'
140
+ Requires-Dist: requests; extra == 'onedrive'
141
+ Provides-Extra: openai
142
+ Requires-Dist: openai; extra == 'openai'
143
+ Requires-Dist: tiktoken; extra == 'openai'
144
+ Provides-Extra: opensearch
145
+ Requires-Dist: opensearch-py; extra == 'opensearch'
146
+ Provides-Extra: org
147
+ Requires-Dist: unstructured[org]; extra == 'org'
148
+ Provides-Extra: outlook
149
+ Requires-Dist: msal; extra == 'outlook'
150
+ Requires-Dist: office365-rest-python-client; extra == 'outlook'
151
+ Provides-Extra: pdf
152
+ Requires-Dist: unstructured[pdf]; extra == 'pdf'
153
+ Provides-Extra: pinecone
154
+ Requires-Dist: pinecone; extra == 'pinecone'
155
+ Provides-Extra: postgres
156
+ Requires-Dist: pandas; extra == 'postgres'
157
+ Requires-Dist: psycopg2-binary; extra == 'postgres'
158
+ Provides-Extra: ppt
159
+ Requires-Dist: unstructured[ppt]; extra == 'ppt'
160
+ Provides-Extra: pptx
161
+ Requires-Dist: unstructured[pptx]; extra == 'pptx'
162
+ Provides-Extra: qdrant
163
+ Requires-Dist: qdrant-client; extra == 'qdrant'
164
+ Provides-Extra: reddit
165
+ Requires-Dist: praw; extra == 'reddit'
166
+ Provides-Extra: redis
167
+ Requires-Dist: redis; extra == 'redis'
168
+ Provides-Extra: remote
169
+ Requires-Dist: unstructured-client>=0.30.0; extra == 'remote'
170
+ Provides-Extra: rst
171
+ Requires-Dist: unstructured[rst]; extra == 'rst'
172
+ Provides-Extra: rtf
173
+ Requires-Dist: unstructured[rtf]; extra == 'rtf'
174
+ Provides-Extra: s3
175
+ Requires-Dist: fsspec; extra == 's3'
176
+ Requires-Dist: s3fs; extra == 's3'
177
+ Provides-Extra: salesforce
178
+ Requires-Dist: simple-salesforce; extra == 'salesforce'
179
+ Provides-Extra: sftp
180
+ Requires-Dist: fsspec; extra == 'sftp'
181
+ Requires-Dist: paramiko; extra == 'sftp'
182
+ Provides-Extra: sharepoint
183
+ Requires-Dist: msal; extra == 'sharepoint'
184
+ Requires-Dist: office365-rest-python-client; extra == 'sharepoint'
185
+ Requires-Dist: requests; extra == 'sharepoint'
186
+ Provides-Extra: singlestore
187
+ Requires-Dist: pandas; extra == 'singlestore'
188
+ Requires-Dist: singlestoredb; extra == 'singlestore'
189
+ Provides-Extra: slack
190
+ Requires-Dist: slack-sdk[optional]; extra == 'slack'
191
+ Provides-Extra: snowflake
192
+ Requires-Dist: pandas; extra == 'snowflake'
193
+ Requires-Dist: psycopg2-binary; extra == 'snowflake'
194
+ Requires-Dist: snowflake-connector-python; extra == 'snowflake'
195
+ Provides-Extra: togetherai
196
+ Requires-Dist: together; extra == 'togetherai'
197
+ Provides-Extra: tsv
198
+ Requires-Dist: unstructured[tsv]; extra == 'tsv'
199
+ Provides-Extra: vastdb
200
+ Requires-Dist: ibis; extra == 'vastdb'
201
+ Requires-Dist: pandas; extra == 'vastdb'
202
+ Requires-Dist: pyarrow; extra == 'vastdb'
203
+ Requires-Dist: vastdb; extra == 'vastdb'
204
+ Provides-Extra: vectara
205
+ Requires-Dist: aiofiles; extra == 'vectara'
206
+ Requires-Dist: httpx; extra == 'vectara'
207
+ Requires-Dist: requests; extra == 'vectara'
208
+ Provides-Extra: vertexai
209
+ Requires-Dist: vertexai; extra == 'vertexai'
210
+ Provides-Extra: voyageai
211
+ Requires-Dist: voyageai; extra == 'voyageai'
212
+ Provides-Extra: weaviate
213
+ Requires-Dist: weaviate-client; extra == 'weaviate'
214
+ Provides-Extra: wikipedia
215
+ Requires-Dist: wikipedia; extra == 'wikipedia'
216
+ Provides-Extra: xlsx
217
+ Requires-Dist: unstructured[xlsx]; extra == 'xlsx'
218
+ Provides-Extra: zendesk
219
+ Requires-Dist: aiofiles; extra == 'zendesk'
220
+ Requires-Dist: bs4; extra == 'zendesk'
221
+ Requires-Dist: httpx; extra == 'zendesk'
222
+ Description-Content-Type: text/markdown
223
+
224
+ # Unstructured Ingest
225
+
226
+ For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.
@@ -0,0 +1,212 @@
1
+ [project]
2
+ name = "unstructured_ingest"
3
+ description = "Local ETL data pipeline to get data RAG ready"
4
+ requires-python = ">=3.9, <3.13"
5
+ authors = [{name = "Unstructured Technologies", email = "devops@unstructuredai.io"}]
6
+ classifiers = [
7
+ "Development Status :: 4 - Beta",
8
+ "Intended Audience :: Developers",
9
+ "Intended Audience :: Education",
10
+ "Intended Audience :: Science/Research",
11
+ "License :: OSI Approved :: Apache Software License",
12
+ "Operating System :: OS Independent",
13
+ "Programming Language :: Python :: 3",
14
+ "Programming Language :: Python :: 3.9",
15
+ "Programming Language :: Python :: 3.10",
16
+ "Programming Language :: Python :: 3.11",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
19
+ ]
20
+ readme = "README.md"
21
+ license = "Apache-2.0"
22
+ dynamic = ["version", "dependencies", "optional-dependencies"]
23
+
24
+ [tool.hatch.metadata.hooks.requirements_txt]
25
+ files = ["requirements/base.txt"]
26
+
27
+ [tool.hatch.metadata.hooks.requirements_txt.optional-dependencies]
28
+ # Connectors
29
+ airtable = ["requirements/connectors/airtable.txt"]
30
+ astradb = ["requirements/connectors/astradb.txt"]
31
+ azure-ai-search = ["requirements/connectors/azure-ai-search.txt"]
32
+ azure = ["requirements/connectors/azure.txt"]
33
+ biomed = ["requirements/connectors/biomed.txt"]
34
+ box = ["requirements/connectors/box.txt"]
35
+ chroma = ["requirements/connectors/chroma.txt"]
36
+ clarifai = ["requirements/connectors/clarifai.txt"]
37
+ confluence = ["requirements/connectors/confluence.txt"]
38
+ couchbase = ["requirements/connectors/couchbase.txt"]
39
+ databricks-delta-tables = ["requirements/connectors/databricks-delta-tables.txt"]
40
+ databricks-volumes = ["requirements/connectors/databricks-volumes.txt"]
41
+ delta-table = ["requirements/connectors/delta-table.txt"]
42
+ discord = ["requirements/connectors/discord.txt"]
43
+ dropbox = ["requirements/connectors/dropbox.txt"]
44
+ duckdb = ["requirements/connectors/duckdb.txt"]
45
+ elasticsearch = ["requirements/connectors/elasticsearch.txt"]
46
+ gcs = ["requirements/connectors/gcs.txt"]
47
+ github = ["requirements/connectors/github.txt"]
48
+ gitlab = ["requirements/connectors/gitlab.txt"]
49
+ google-drive = ["requirements/connectors/google-drive.txt"]
50
+ hubspot = ["requirements/connectors/hubspot.txt"]
51
+ ibm-watsonx-s3 = ["requirements/connectors/ibm-watsonx-s3.txt"]
52
+ jira = ["requirements/connectors/jira.txt"]
53
+ kafka = ["requirements/connectors/kafka.txt"]
54
+ kdbai = ["requirements/connectors/kdbai.txt"]
55
+ lancedb = ["requirements/connectors/lancedb.txt"]
56
+ milvus = ["requirements/connectors/milvus.txt"]
57
+ mongodb = ["requirements/connectors/mongodb.txt"]
58
+ neo4j = ["requirements/connectors/neo4j.txt"]
59
+ notion = ["requirements/connectors/notion.txt"]
60
+ onedrive = ["requirements/connectors/onedrive.txt"]
61
+ opensearch = ["requirements/connectors/opensearch.txt"]
62
+ outlook = ["requirements/connectors/outlook.txt"]
63
+ pinecone = ["requirements/connectors/pinecone.txt"]
64
+ postgres = ["requirements/connectors/postgres.txt"]
65
+ qdrant = ["requirements/connectors/qdrant.txt"]
66
+ reddit = ["requirements/connectors/reddit.txt"]
67
+ redis = ["requirements/connectors/redis.txt"]
68
+ s3 = ["requirements/connectors/s3.txt"]
69
+ salesforce = ["requirements/connectors/salesforce.txt"]
70
+ sftp = ["requirements/connectors/sftp.txt"]
71
+ sharepoint = ["requirements/connectors/sharepoint.txt"]
72
+ singlestore = ["requirements/connectors/singlestore.txt"]
73
+ slack = ["requirements/connectors/slack.txt"]
74
+ snowflake = ["requirements/connectors/snowflake.txt"]
75
+ vastdb = ["requirements/connectors/vastdb.txt"]
76
+ vectara = ["requirements/connectors/vectara.txt"]
77
+ weaviate = ["requirements/connectors/weaviate.txt"]
78
+ wikipedia = ["requirements/connectors/wikipedia.txt"]
79
+ zendesk = ["requirements/connectors/zendesk.txt"]
80
+
81
+ # Embedders
82
+ bedrock = ["requirements/embed/bedrock.txt"]
83
+ huggingface = ["requirements/embed/huggingface.txt"]
84
+ mixedbreadai = ["requirements/embed/mixedbreadai.txt"]
85
+ octoai = ["requirements/embed/octoai.txt"]
86
+ openai = ["requirements/embed/openai.txt"]
87
+ togetherai = ["requirements/embed/togetherai.txt"]
88
+ vertexai = ["requirements/embed/vertexai.txt"]
89
+ voyageai = ["requirements/embed/voyageai.txt"]
90
+
91
+ # remote
92
+ remote = ["requirements/remote/client.txt"]
93
+
94
+ # local partition
95
+ doc = ["requirements/local_partition/doc.txt"]
96
+ docx = ["requirements/local_partition/docx.txt"]
97
+ epub = ["requirements/local_partition/epub.txt"]
98
+ image = ["requirements/local_partition/image.txt"]
99
+ md = ["requirements/local_partition/md.txt"]
100
+ msg = ["requirements/local_partition/msg.txt"]
101
+ odt = ["requirements/local_partition/odt.txt"]
102
+ org = ["requirements/local_partition/org.txt"]
103
+ pdf = ["requirements/local_partition/pdf.txt"]
104
+ ppt = ["requirements/local_partition/ppt.txt"]
105
+ pptx = ["requirements/local_partition/pptx.txt"]
106
+ rst = ["requirements/local_partition/rst.txt"]
107
+ rtf = ["requirements/local_partition/rtf.txt"]
108
+ tsv = ["requirements/local_partition/tsv.txt"]
109
+ xlsx = ["requirements/local_partition/xlsx.txt"]
110
+
111
+
112
+ [tool.hatch.version]
113
+ path = "unstructured_ingest/__version__.py"
114
+
115
+ [dependency-groups]
116
+ release = [
117
+ "twine",
118
+ "wheel",
119
+ "build"
120
+ ]
121
+ lint = [
122
+ "ruff",
123
+ ]
124
+ test = [
125
+ "pytest",
126
+ "pytest-cov",
127
+ "pytest-mock",
128
+ "pytest-check",
129
+ "pytest-asyncio",
130
+ "pytest_tagging",
131
+ "pytest-json-report",
132
+ "pytest-timeout",
133
+ "faker",
134
+ "docker",
135
+ "universal_pathlib",
136
+ "deepdiff",
137
+ "bs4",
138
+ "pandas",
139
+
140
+ # Connector specific deps
141
+ "cryptography",
142
+ "fsspec",
143
+ "vertexai",
144
+ "pyiceberg",
145
+ "pyarrow",
146
+ ]
147
+ # Add constraints needed for CI
148
+ ci = [
149
+ # consistency with local-inference-pin
150
+ "protobuf<4.24",
151
+ "grpcio>=1.65.5",
152
+ # TODO: Pinned in transformers package, remove when that gets updated
153
+ "tokenizers>=0.19,<0.20",
154
+ # TODO: Constaint due to boto, with python before 3.10 not requiring openssl 1.1.1, remove when that gets
155
+ # updated or we drop support for 3.9
156
+ "urllib3<1.27",
157
+ # TODO: Constriant due to aiobotocore, remove when that gets updates:
158
+ "botocore<1.34.132",
159
+ # TODO: Constriant due to both 8.5.0 and 8.4.0 being installed during pip-compile
160
+ "importlib-metadata>=8.5.0",
161
+ # TODO: Constraint due to boto, with python before 3.10 not requiring openssl 1.1.1, remove when that gets
162
+ # updated or we drop support for 3.9
163
+ "urllib3<1.27",
164
+ "unstructured-client>= 0.25.8",
165
+ "fsspec==2024.5.0",
166
+ # python 3.12 support
167
+ "wrapt>=1.14.0",
168
+ "numpy<2",
169
+ # deltalake >=0.23.0 currently has a bug with the versio of pyarrow it installs
170
+ "deltalake<=0.22.0",
171
+ # TODO: investigate breaking changed introduced in lancedb>0.15.0
172
+ "lancedb<=0.15.0",
173
+ # TODO: versions higher than this are missing the macos wheel
174
+ "pykx==2.5.3",
175
+ "astrapy<2.0.0"
176
+ ]
177
+
178
+ [project.scripts]
179
+ unstructured-ingest = "unstructured_ingest.main:main"
180
+
181
+
182
+ [build-system]
183
+ requires = ["hatchling", "hatch-requirements-txt"]
184
+ build-backend = "hatchling.build"
185
+
186
+ [tool.ruff]
187
+ line-length = 100
188
+
189
+ [tool.ruff.lint]
190
+ select = [
191
+ # pycodestyle
192
+ "E",
193
+ # Pyflakes
194
+ "F",
195
+ # flake8-simplify
196
+ "SIM",
197
+ # isort
198
+ "I",
199
+ ]
200
+
201
+ [tool.pytest.ini_options]
202
+ asyncio_mode = "auto"
203
+
204
+ [tool.coverage.report]
205
+ # TODO: Update as this improves
206
+ fail_under = 0
207
+
208
+ [tool.hatch.build.targets.wheel]
209
+ packages = ["/unstructured_ingest"]
210
+
211
+ [tool.hatch.build.targets.sdist]
212
+ packages = ["/unstructured_ingest"]
@@ -0,0 +1 @@
1
+ __version__ = "1.0.1" # pragma: no cover
@@ -0,0 +1,28 @@
1
+ # Ingest CLI
2
+ This package helps map user input via a cli to the underlying ingest code to run a small ETL pipeline.
3
+
4
+ ## Design Reference
5
+ [cli.py](cli.py) is the main entrypoint to run the cli itself. The key points for this is the interaction between all
6
+ source and destination connectors.
7
+
8
+ To manually run the cli:
9
+ ```shell
10
+ PYTHONPATH=. python unstructured_ingest/v2/main.py --help
11
+ ```
12
+
13
+ The `main.py` file simply wraps the generated Click command created in `cli.py`.
14
+
15
+ ### Source Commands
16
+ All source commands are added as sub commands to the parent ingest Click group. This allows each command to map to
17
+ different connectors with shared and unique parameters.
18
+
19
+ ### Destination Commands
20
+ All destination commands are added as sub commands to each parent source command. This allows each invocation of the source
21
+ sub command to display all possible destination subcommands. The code un [utils.py](./utils.py) helps structure the
22
+ generated text from the Click library to be more intuitive on this approach (i.e. list sub commands as `Destinations`).
23
+
24
+ ### Configs
25
+ The configs in [configs/](./configs) and connector specific ones in [cmds/](./cmds) help surface all user parameters that
26
+ are needed to marshall the input dictionary from Click into all the respective configs needed to create a full pipeline run.
27
+ Because click returns a flat dictionary of user inputs, the `extract_config` method in `utils.py` helps deserialize this dictionary
28
+ into dataclasses that have nested fields (such as access configs).
@@ -114,7 +114,6 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
114
114
 
115
115
  @dataclass
116
116
  class AsyncMixedbreadAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
117
-
118
117
  config: MixedbreadAIEmbeddingConfig
119
118
 
120
119
  async def get_exemplary_embedding(self) -> list[float]: