unstructured-ingest 0.7.2__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (383) hide show
  1. unstructured_ingest-1.0.2/.gitignore +212 -0
  2. unstructured_ingest-1.0.2/PKG-INFO +226 -0
  3. unstructured_ingest-1.0.2/pyproject.toml +211 -0
  4. unstructured_ingest-1.0.2/unstructured_ingest/__version__.py +1 -0
  5. unstructured_ingest-1.0.2/unstructured_ingest/cli/README.md +28 -0
  6. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/embed/mixedbreadai.py +0 -1
  7. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/interfaces/upload_stager.py +2 -2
  8. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/interfaces/uploader.py +3 -3
  9. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/pipeline/interfaces.py +1 -1
  10. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/pipeline/pipeline.py +1 -1
  11. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/chunker.py +4 -0
  12. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/airtable.py +4 -2
  13. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/astradb.py +48 -34
  14. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/azure_ai_search.py +1 -1
  15. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/confluence.py +0 -1
  16. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +1 -1
  17. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +2 -2
  18. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +1 -1
  19. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +1 -2
  20. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/delta_table.py +1 -0
  21. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/duckdb/base.py +2 -2
  22. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +3 -3
  23. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +3 -3
  24. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/fsspec/s3.py +5 -3
  25. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/gitlab.py +1 -2
  26. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/google_drive.py +0 -2
  27. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -7
  28. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/kdbai.py +1 -0
  29. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/outlook.py +1 -2
  30. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/pinecone.py +0 -1
  31. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/redisdb.py +28 -24
  32. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/salesforce.py +1 -1
  33. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/slack.py +1 -2
  34. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +5 -0
  35. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/sql/postgres.py +7 -1
  36. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/sql/singlestore.py +11 -6
  37. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/sql/snowflake.py +5 -0
  38. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/sql/sql.py +3 -4
  39. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/sql/sqlite.py +5 -0
  40. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/sql/vastdb.py +7 -3
  41. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/vectara.py +0 -2
  42. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -2
  43. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/embedder.py +2 -2
  44. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/filter.py +1 -1
  45. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/partitioner.py +4 -0
  46. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/utils/blob_storage.py +2 -2
  47. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/unstructured_api.py +13 -8
  48. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/utils/data_prep.py +8 -32
  49. unstructured_ingest-0.7.2/MANIFEST.in +0 -2
  50. unstructured_ingest-0.7.2/PKG-INFO +0 -383
  51. unstructured_ingest-0.7.2/examples/airtable.py +0 -44
  52. unstructured_ingest-0.7.2/examples/azure_cognitive_search.py +0 -55
  53. unstructured_ingest-0.7.2/examples/chroma.py +0 -54
  54. unstructured_ingest-0.7.2/examples/couchbase.py +0 -55
  55. unstructured_ingest-0.7.2/examples/databricks_volumes_dest.py +0 -55
  56. unstructured_ingest-0.7.2/examples/databricks_volumes_source.py +0 -53
  57. unstructured_ingest-0.7.2/examples/delta_table.py +0 -45
  58. unstructured_ingest-0.7.2/examples/discord_example.py +0 -36
  59. unstructured_ingest-0.7.2/examples/elasticsearch.py +0 -49
  60. unstructured_ingest-0.7.2/examples/google_drive.py +0 -45
  61. unstructured_ingest-0.7.2/examples/kdbai.py +0 -54
  62. unstructured_ingest-0.7.2/examples/local.py +0 -36
  63. unstructured_ingest-0.7.2/examples/milvus.py +0 -44
  64. unstructured_ingest-0.7.2/examples/mongodb.py +0 -53
  65. unstructured_ingest-0.7.2/examples/opensearch.py +0 -50
  66. unstructured_ingest-0.7.2/examples/pinecone.py +0 -57
  67. unstructured_ingest-0.7.2/examples/s3.py +0 -38
  68. unstructured_ingest-0.7.2/examples/salesforce.py +0 -44
  69. unstructured_ingest-0.7.2/examples/sharepoint.py +0 -47
  70. unstructured_ingest-0.7.2/examples/singlestore.py +0 -49
  71. unstructured_ingest-0.7.2/examples/sql.py +0 -90
  72. unstructured_ingest-0.7.2/examples/vectara.py +0 -54
  73. unstructured_ingest-0.7.2/examples/weaviate.py +0 -44
  74. unstructured_ingest-0.7.2/pyproject.toml +0 -79
  75. unstructured_ingest-0.7.2/setup.cfg +0 -19
  76. unstructured_ingest-0.7.2/setup.py +0 -217
  77. unstructured_ingest-0.7.2/test/integration/chunkers/test_chunkers.py +0 -31
  78. unstructured_ingest-0.7.2/test/integration/connectors/conftest.py +0 -38
  79. unstructured_ingest-0.7.2/test/integration/connectors/databricks/test_volumes_native.py +0 -273
  80. unstructured_ingest-0.7.2/test/integration/connectors/discord/test_discord.py +0 -90
  81. unstructured_ingest-0.7.2/test/integration/connectors/duckdb/conftest.py +0 -14
  82. unstructured_ingest-0.7.2/test/integration/connectors/duckdb/test_duckdb.py +0 -90
  83. unstructured_ingest-0.7.2/test/integration/connectors/duckdb/test_motherduck.py +0 -95
  84. unstructured_ingest-0.7.2/test/integration/connectors/elasticsearch/conftest.py +0 -34
  85. unstructured_ingest-0.7.2/test/integration/connectors/elasticsearch/test_elasticsearch.py +0 -331
  86. unstructured_ingest-0.7.2/test/integration/connectors/elasticsearch/test_opensearch.py +0 -326
  87. unstructured_ingest-0.7.2/test/integration/connectors/sql/test_databricks_delta_tables.py +0 -170
  88. unstructured_ingest-0.7.2/test/integration/connectors/sql/test_postgres.py +0 -201
  89. unstructured_ingest-0.7.2/test/integration/connectors/sql/test_singlestore.py +0 -182
  90. unstructured_ingest-0.7.2/test/integration/connectors/sql/test_snowflake.py +0 -244
  91. unstructured_ingest-0.7.2/test/integration/connectors/sql/test_sqlite.py +0 -168
  92. unstructured_ingest-0.7.2/test/integration/connectors/sql/test_vastdb.py +0 -34
  93. unstructured_ingest-0.7.2/test/integration/connectors/test_astradb.py +0 -287
  94. unstructured_ingest-0.7.2/test/integration/connectors/test_azure_ai_search.py +0 -254
  95. unstructured_ingest-0.7.2/test/integration/connectors/test_chroma.py +0 -136
  96. unstructured_ingest-0.7.2/test/integration/connectors/test_confluence.py +0 -111
  97. unstructured_ingest-0.7.2/test/integration/connectors/test_delta_table.py +0 -183
  98. unstructured_ingest-0.7.2/test/integration/connectors/test_dropbox.py +0 -151
  99. unstructured_ingest-0.7.2/test/integration/connectors/test_github.py +0 -49
  100. unstructured_ingest-0.7.2/test/integration/connectors/test_google_drive.py +0 -257
  101. unstructured_ingest-0.7.2/test/integration/connectors/test_jira.py +0 -67
  102. unstructured_ingest-0.7.2/test/integration/connectors/test_lancedb.py +0 -247
  103. unstructured_ingest-0.7.2/test/integration/connectors/test_milvus.py +0 -208
  104. unstructured_ingest-0.7.2/test/integration/connectors/test_mongodb.py +0 -335
  105. unstructured_ingest-0.7.2/test/integration/connectors/test_neo4j.py +0 -244
  106. unstructured_ingest-0.7.2/test/integration/connectors/test_notion.py +0 -152
  107. unstructured_ingest-0.7.2/test/integration/connectors/test_onedrive.py +0 -163
  108. unstructured_ingest-0.7.2/test/integration/connectors/test_pinecone.py +0 -387
  109. unstructured_ingest-0.7.2/test/integration/connectors/test_qdrant.py +0 -216
  110. unstructured_ingest-0.7.2/test/integration/connectors/test_redis.py +0 -143
  111. unstructured_ingest-0.7.2/test/integration/connectors/test_s3.py +0 -184
  112. unstructured_ingest-0.7.2/test/integration/connectors/test_sharepoint.py +0 -222
  113. unstructured_ingest-0.7.2/test/integration/connectors/test_vectara.py +0 -282
  114. unstructured_ingest-0.7.2/test/integration/connectors/test_zendesk.py +0 -120
  115. unstructured_ingest-0.7.2/test/integration/connectors/utils/constants.py +0 -13
  116. unstructured_ingest-0.7.2/test/integration/connectors/utils/docker.py +0 -151
  117. unstructured_ingest-0.7.2/test/integration/connectors/utils/docker_compose.py +0 -59
  118. unstructured_ingest-0.7.2/test/integration/connectors/utils/validation/destination.py +0 -77
  119. unstructured_ingest-0.7.2/test/integration/connectors/utils/validation/equality.py +0 -76
  120. unstructured_ingest-0.7.2/test/integration/connectors/utils/validation/source.py +0 -331
  121. unstructured_ingest-0.7.2/test/integration/connectors/utils/validation/utils.py +0 -36
  122. unstructured_ingest-0.7.2/test/integration/connectors/weaviate/__init__.py +0 -0
  123. unstructured_ingest-0.7.2/test/integration/connectors/weaviate/conftest.py +0 -15
  124. unstructured_ingest-0.7.2/test/integration/connectors/weaviate/test_cloud.py +0 -39
  125. unstructured_ingest-0.7.2/test/integration/connectors/weaviate/test_local.py +0 -152
  126. unstructured_ingest-0.7.2/test/integration/embedders/__init__.py +0 -0
  127. unstructured_ingest-0.7.2/test/integration/embedders/conftest.py +0 -13
  128. unstructured_ingest-0.7.2/test/integration/embedders/test_azure_openai.py +0 -57
  129. unstructured_ingest-0.7.2/test/integration/embedders/test_bedrock.py +0 -103
  130. unstructured_ingest-0.7.2/test/integration/embedders/test_huggingface.py +0 -24
  131. unstructured_ingest-0.7.2/test/integration/embedders/test_mixedbread.py +0 -71
  132. unstructured_ingest-0.7.2/test/integration/embedders/test_octoai.py +0 -75
  133. unstructured_ingest-0.7.2/test/integration/embedders/test_openai.py +0 -74
  134. unstructured_ingest-0.7.2/test/integration/embedders/test_togetherai.py +0 -71
  135. unstructured_ingest-0.7.2/test/integration/embedders/test_vertexai.py +0 -63
  136. unstructured_ingest-0.7.2/test/integration/embedders/test_voyageai.py +0 -79
  137. unstructured_ingest-0.7.2/test/integration/embedders/utils.py +0 -66
  138. unstructured_ingest-0.7.2/test/integration/partitioners/__init__.py +0 -0
  139. unstructured_ingest-0.7.2/test/integration/partitioners/test_partitioner.py +0 -76
  140. unstructured_ingest-0.7.2/test/integration/utils.py +0 -15
  141. unstructured_ingest-0.7.2/test/unit/__init__.py +0 -0
  142. unstructured_ingest-0.7.2/test/unit/chunkers/__init__.py +0 -0
  143. unstructured_ingest-0.7.2/test/unit/chunkers/test_chunkers.py +0 -49
  144. unstructured_ingest-0.7.2/test/unit/connectors/__init__.py +0 -0
  145. unstructured_ingest-0.7.2/test/unit/connectors/ibm_watsonx/__init__.py +0 -0
  146. unstructured_ingest-0.7.2/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +0 -459
  147. unstructured_ingest-0.7.2/test/unit/connectors/motherduck/__init__.py +0 -0
  148. unstructured_ingest-0.7.2/test/unit/connectors/motherduck/test_base.py +0 -73
  149. unstructured_ingest-0.7.2/test/unit/connectors/sql/__init__.py +0 -0
  150. unstructured_ingest-0.7.2/test/unit/connectors/sql/test_sql.py +0 -152
  151. unstructured_ingest-0.7.2/test/unit/connectors/test_confluence.py +0 -71
  152. unstructured_ingest-0.7.2/test/unit/connectors/test_jira.py +0 -401
  153. unstructured_ingest-0.7.2/test/unit/embed/__init__.py +0 -0
  154. unstructured_ingest-0.7.2/test/unit/embed/test_mixedbreadai.py +0 -42
  155. unstructured_ingest-0.7.2/test/unit/embed/test_octoai.py +0 -27
  156. unstructured_ingest-0.7.2/test/unit/embed/test_openai.py +0 -28
  157. unstructured_ingest-0.7.2/test/unit/embed/test_vertexai.py +0 -25
  158. unstructured_ingest-0.7.2/test/unit/embed/test_voyageai.py +0 -24
  159. unstructured_ingest-0.7.2/test/unit/embedders/__init__.py +0 -0
  160. unstructured_ingest-0.7.2/test/unit/embedders/test_bedrock.py +0 -36
  161. unstructured_ingest-0.7.2/test/unit/embedders/test_huggingface.py +0 -48
  162. unstructured_ingest-0.7.2/test/unit/embedders/test_mixedbread.py +0 -37
  163. unstructured_ingest-0.7.2/test/unit/embedders/test_octoai.py +0 -35
  164. unstructured_ingest-0.7.2/test/unit/embedders/test_openai.py +0 -35
  165. unstructured_ingest-0.7.2/test/unit/embedders/test_togetherai.py +0 -37
  166. unstructured_ingest-0.7.2/test/unit/embedders/test_vertexai.py +0 -37
  167. unstructured_ingest-0.7.2/test/unit/embedders/test_voyageai.py +0 -38
  168. unstructured_ingest-0.7.2/test/unit/partitioners/__init__.py +0 -0
  169. unstructured_ingest-0.7.2/test/unit/partitioners/test_partitioner.py +0 -63
  170. unstructured_ingest-0.7.2/test/unit/test_error.py +0 -27
  171. unstructured_ingest-0.7.2/test/unit/test_html.py +0 -112
  172. unstructured_ingest-0.7.2/test/unit/test_interfaces.py +0 -26
  173. unstructured_ingest-0.7.2/test/unit/test_utils.py +0 -220
  174. unstructured_ingest-0.7.2/test/unit/utils/__init__.py +0 -0
  175. unstructured_ingest-0.7.2/test/unit/utils/data_generator.py +0 -32
  176. unstructured_ingest-0.7.2/unstructured_ingest/__version__.py +0 -1
  177. unstructured_ingest-0.7.2/unstructured_ingest/cli/__init__.py +0 -0
  178. unstructured_ingest-0.7.2/unstructured_ingest/cli/utils/__init__.py +0 -0
  179. unstructured_ingest-0.7.2/unstructured_ingest/data_types/__init__.py +0 -0
  180. unstructured_ingest-0.7.2/unstructured_ingest/embed/__init__.py +0 -0
  181. unstructured_ingest-0.7.2/unstructured_ingest/pipeline/__init__.py +0 -0
  182. unstructured_ingest-0.7.2/unstructured_ingest/pipeline/steps/__init__.py +0 -0
  183. unstructured_ingest-0.7.2/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
  184. unstructured_ingest-0.7.2/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
  185. unstructured_ingest-0.7.2/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
  186. unstructured_ingest-0.7.2/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
  187. unstructured_ingest-0.7.2/unstructured_ingest/processes/utils/__init__.py +0 -0
  188. unstructured_ingest-0.7.2/unstructured_ingest/utils/__init__.py +0 -0
  189. unstructured_ingest-0.7.2/unstructured_ingest.egg-info/PKG-INFO +0 -383
  190. unstructured_ingest-0.7.2/unstructured_ingest.egg-info/SOURCES.txt +0 -377
  191. unstructured_ingest-0.7.2/unstructured_ingest.egg-info/dependency_links.txt +0 -1
  192. unstructured_ingest-0.7.2/unstructured_ingest.egg-info/entry_points.txt +0 -2
  193. unstructured_ingest-0.7.2/unstructured_ingest.egg-info/requires.txt +0 -418
  194. unstructured_ingest-0.7.2/unstructured_ingest.egg-info/top_level.txt +0 -3
  195. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/LICENSE.md +0 -0
  196. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/README.md +0 -0
  197. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/__init__.py +0 -0
  198. {unstructured_ingest-0.7.2/examples → unstructured_ingest-1.0.2/unstructured_ingest/cli}/__init__.py +0 -0
  199. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/cli/base/__init__.py +0 -0
  200. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/cli/base/cmd.py +0 -0
  201. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/cli/base/dest.py +0 -0
  202. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/cli/base/importer.py +0 -0
  203. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/cli/base/src.py +0 -0
  204. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/cli/cli.py +0 -0
  205. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/cli/cmds.py +0 -0
  206. {unstructured_ingest-0.7.2/test → unstructured_ingest-1.0.2/unstructured_ingest/cli/utils}/__init__.py +0 -0
  207. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/cli/utils/click.py +0 -0
  208. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
  209. {unstructured_ingest-0.7.2/test/integration → unstructured_ingest-1.0.2/unstructured_ingest/data_types}/__init__.py +0 -0
  210. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/data_types/file_data.py +0 -0
  211. {unstructured_ingest-0.7.2/test/integration/chunkers → unstructured_ingest-1.0.2/unstructured_ingest/embed}/__init__.py +0 -0
  212. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/embed/azure_openai.py +0 -0
  213. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/embed/bedrock.py +0 -0
  214. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/embed/huggingface.py +0 -0
  215. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/embed/interfaces.py +0 -0
  216. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/embed/octoai.py +0 -0
  217. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/embed/openai.py +0 -0
  218. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/embed/togetherai.py +0 -0
  219. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/embed/vertexai.py +0 -0
  220. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/embed/voyageai.py +0 -0
  221. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/error.py +0 -0
  222. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/errors_v2.py +0 -0
  223. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/interfaces/__init__.py +0 -0
  224. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/interfaces/connector.py +0 -0
  225. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/interfaces/downloader.py +0 -0
  226. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/interfaces/indexer.py +0 -0
  227. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/interfaces/process.py +0 -0
  228. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/interfaces/processor.py +0 -0
  229. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/logger.py +0 -0
  230. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/main.py +0 -0
  231. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/otel.py +0 -0
  232. {unstructured_ingest-0.7.2/test/integration/connectors → unstructured_ingest-1.0.2/unstructured_ingest/pipeline}/__init__.py +0 -0
  233. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/pipeline/otel.py +0 -0
  234. {unstructured_ingest-0.7.2/test/integration/connectors/databricks → unstructured_ingest-1.0.2/unstructured_ingest/pipeline/steps}/__init__.py +0 -0
  235. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
  236. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/pipeline/steps/download.py +0 -0
  237. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/pipeline/steps/embed.py +0 -0
  238. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/pipeline/steps/filter.py +0 -0
  239. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/pipeline/steps/index.py +0 -0
  240. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/pipeline/steps/partition.py +0 -0
  241. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/pipeline/steps/stage.py +0 -0
  242. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
  243. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/pipeline/steps/upload.py +0 -0
  244. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/__init__.py +0 -0
  245. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connector_registry.py +0 -0
  246. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/__init__.py +0 -0
  247. {unstructured_ingest-0.7.2/test/integration/connectors/discord → unstructured_ingest-1.0.2/unstructured_ingest/processes/connectors/assets}/__init__.py +0 -0
  248. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +0 -0
  249. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
  250. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/chroma.py +0 -0
  251. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
  252. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
  253. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/databricks/volumes.py +0 -0
  254. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
  255. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/discord.py +0 -0
  256. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
  257. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
  258. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +0 -0
  259. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
  260. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
  261. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
  262. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
  263. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
  264. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +0 -0
  265. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
  266. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
  267. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
  268. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/github.py +0 -0
  269. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
  270. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/jira.py +0 -0
  271. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
  272. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
  273. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
  274. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
  275. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
  276. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
  277. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
  278. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
  279. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
  280. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
  281. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
  282. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/local.py +0 -0
  283. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/milvus.py +0 -0
  284. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/mongodb.py +0 -0
  285. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/neo4j.py +0 -0
  286. {unstructured_ingest-0.7.2/test/integration/connectors/duckdb → unstructured_ingest-1.0.2/unstructured_ingest/processes/connectors/notion}/__init__.py +0 -0
  287. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
  288. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/connector.py +0 -0
  289. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
  290. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
  291. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
  292. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
  293. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
  294. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
  295. {unstructured_ingest-0.7.2/test/integration/connectors/elasticsearch → unstructured_ingest-1.0.2/unstructured_ingest/processes/connectors/notion/types}/__init__.py +0 -0
  296. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
  297. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
  298. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
  299. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
  300. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
  301. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
  302. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
  303. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
  304. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
  305. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
  306. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
  307. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
  308. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
  309. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
  310. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
  311. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
  312. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
  313. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
  314. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
  315. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
  316. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
  317. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
  318. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
  319. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
  320. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
  321. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
  322. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
  323. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
  324. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
  325. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
  326. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
  327. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +0 -0
  328. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +0 -0
  329. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +0 -0
  330. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +0 -0
  331. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +0 -0
  332. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +0 -0
  333. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +0 -0
  334. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +0 -0
  335. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -0
  336. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +0 -0
  337. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +0 -0
  338. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +0 -0
  339. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +0 -0
  340. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +0 -0
  341. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +0 -0
  342. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +0 -0
  343. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +0 -0
  344. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +0 -0
  345. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +0 -0
  346. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +0 -0
  347. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +0 -0
  348. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +0 -0
  349. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +0 -0
  350. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
  351. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
  352. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
  353. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
  354. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
  355. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
  356. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/onedrive.py +0 -0
  357. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
  358. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
  359. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
  360. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
  361. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
  362. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/sharepoint.py +0 -0
  363. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
  364. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/utils.py +0 -0
  365. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
  366. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
  367. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
  368. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
  369. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +0 -0
  370. {unstructured_ingest-0.7.2/test/integration/connectors/sql → unstructured_ingest-1.0.2/unstructured_ingest/processes/connectors/zendesk}/__init__.py +0 -0
  371. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
  372. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/processes/uncompress.py +0 -0
  373. {unstructured_ingest-0.7.2/test/integration/connectors → unstructured_ingest-1.0.2/unstructured_ingest/processes}/utils/__init__.py +0 -0
  374. {unstructured_ingest-0.7.2/test/integration/connectors/utils/validation → unstructured_ingest-1.0.2/unstructured_ingest/utils}/__init__.py +0 -0
  375. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/utils/chunking.py +0 -0
  376. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/utils/compression.py +0 -0
  377. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/utils/constants.py +0 -0
  378. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/utils/dep_check.py +0 -0
  379. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/utils/html.py +0 -0
  380. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/utils/ndjson.py +0 -0
  381. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/utils/pydantic_models.py +0 -0
  382. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
  383. {unstructured_ingest-0.7.2 → unstructured_ingest-1.0.2}/unstructured_ingest/utils/table.py +0 -0
@@ -0,0 +1,212 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ figures/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ pip-wheel-metadata/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Pycharm
42
+ .idea/
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ *.py,cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+ db.sqlite3-journal
67
+
68
+ # Flask stuff:
69
+ instance/
70
+ .webassets-cache
71
+
72
+ # Scrapy stuff:
73
+ .scrapy
74
+
75
+ # Sphinx documentation
76
+ docs/_build/
77
+
78
+ # PyBuilder
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+ nbs/
84
+
85
+ # IPython
86
+ profile_default/
87
+ ipython_config.py
88
+
89
+ # pyenv
90
+ .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100
+ __pypackages__/
101
+
102
+ # Celery stuff
103
+ celerybeat-schedule
104
+ celerybeat.pid
105
+
106
+ # SageMath parsed files
107
+ *.sage.py
108
+
109
+ # Environments
110
+ .env
111
+ .venv
112
+ env/
113
+ venv/
114
+ ENV/
115
+ env.bak/
116
+ venv.bak/
117
+
118
+ # Spyder project settings
119
+ .spyderproject
120
+ .spyproject
121
+
122
+ # Rope project settings
123
+ .ropeproject
124
+
125
+ # mkdocs documentation
126
+ /site
127
+
128
+ # mypy
129
+ .mypy_cache/
130
+ .dmypy.json
131
+ dmypy.json
132
+
133
+ # Pyre type checker
134
+ .pyre/
135
+
136
+ # pyright (Python LSP/type-checker in VSCode) config
137
+ /pyrightconfig.json
138
+
139
+ # ingest outputs
140
+ /structured-output
141
+ test_unstructured_ingest/workdir/
142
+ test_unstructured_ingest/delta-table-dest/
143
+ test_unstructured_ingest/skipped-files.txt
144
+ test_unstructured_ingest/chroma-dest/
145
+
146
+ # suggested ingest mirror directory
147
+ /mirror
148
+
149
+ ## https://github.com/github/gitignore/blob/main/Global/Emacs.gitignore (partial)
150
+
151
+ *~
152
+ \#*\#
153
+ /.emacs.desktop
154
+ /.emacs.desktop.lock
155
+ *.elc
156
+ auto-save-list
157
+ tramp
158
+ .\#*
159
+
160
+ ## https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
161
+ .vscode/*
162
+ !.vscode/tasks.json
163
+ !.vscode/launch.json
164
+ !.vscode/extensions.json
165
+ !.vscode/*.code-snippets
166
+
167
+ # Local History for Visual Studio Code
168
+ .history/
169
+
170
+ # Built Visual Studio Code Extensions
171
+ *.vsix
172
+
173
+ ## https://github.com/github/gitignore/blob/main/Global/Vim.gitignore
174
+ # Swap
175
+ [._]*.s[a-v][a-z]
176
+ !*.svg # comment out if you don't need vector files
177
+ [._]*.sw[a-p]
178
+ [._]s[a-rt-v][a-z]
179
+ [._]ss[a-gi-z]
180
+ [._]sw[a-p]
181
+
182
+ # Session
183
+ Session.vim
184
+ Sessionx.vim
185
+
186
+ # Temporary
187
+ .netrwhist
188
+ # Auto-generated tag files
189
+ tags
190
+ # Persistent undo
191
+ [._]*.un~
192
+
193
+ .DS_Store
194
+
195
+ # Ruff cache
196
+ .ruff_cache/
197
+
198
+ .ppm
199
+ .vs
200
+
201
+ example-docs/*_images
202
+ examples/**/output/
203
+
204
+ outputdiff.txt
205
+ metricsdiff.txt
206
+
207
+ # analysis
208
+ annotated/
209
+
210
+ tmp_ingest/
211
+ .vs
212
+ .report.json
@@ -0,0 +1,226 @@
1
+ Metadata-Version: 2.4
2
+ Name: unstructured_ingest
3
+ Version: 1.0.2
4
+ Summary: Local ETL data pipeline to get data RAG ready
5
+ Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
+ License-Expression: Apache-2.0
7
+ License-File: LICENSE.md
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Education
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: <3.13,>=3.9
21
+ Requires-Dist: click
22
+ Requires-Dist: dataclasses-json
23
+ Requires-Dist: opentelemetry-sdk
24
+ Requires-Dist: pydantic>=2.7
25
+ Requires-Dist: python-dateutil
26
+ Requires-Dist: tqdm
27
+ Provides-Extra: airtable
28
+ Requires-Dist: pandas; extra == 'airtable'
29
+ Requires-Dist: pyairtable; extra == 'airtable'
30
+ Provides-Extra: astradb
31
+ Requires-Dist: astrapy>2.0.0; extra == 'astradb'
32
+ Provides-Extra: azure
33
+ Requires-Dist: adlfs; extra == 'azure'
34
+ Requires-Dist: fsspec; extra == 'azure'
35
+ Provides-Extra: azure-ai-search
36
+ Requires-Dist: azure-search-documents; extra == 'azure-ai-search'
37
+ Provides-Extra: bedrock
38
+ Requires-Dist: aioboto3; extra == 'bedrock'
39
+ Requires-Dist: boto3; extra == 'bedrock'
40
+ Provides-Extra: biomed
41
+ Requires-Dist: bs4; extra == 'biomed'
42
+ Requires-Dist: requests; extra == 'biomed'
43
+ Provides-Extra: box
44
+ Requires-Dist: boxfs; extra == 'box'
45
+ Requires-Dist: fsspec; extra == 'box'
46
+ Provides-Extra: chroma
47
+ Requires-Dist: chromadb; extra == 'chroma'
48
+ Provides-Extra: clarifai
49
+ Requires-Dist: clarifai; extra == 'clarifai'
50
+ Provides-Extra: confluence
51
+ Requires-Dist: atlassian-python-api; extra == 'confluence'
52
+ Requires-Dist: requests; extra == 'confluence'
53
+ Provides-Extra: couchbase
54
+ Requires-Dist: couchbase; extra == 'couchbase'
55
+ Provides-Extra: databricks-delta-tables
56
+ Requires-Dist: databricks-sql-connector; extra == 'databricks-delta-tables'
57
+ Requires-Dist: pandas; extra == 'databricks-delta-tables'
58
+ Provides-Extra: databricks-volumes
59
+ Requires-Dist: databricks-sdk; extra == 'databricks-volumes'
60
+ Provides-Extra: delta-table
61
+ Requires-Dist: boto3; extra == 'delta-table'
62
+ Requires-Dist: deltalake; extra == 'delta-table'
63
+ Requires-Dist: pandas; extra == 'delta-table'
64
+ Provides-Extra: discord
65
+ Requires-Dist: discord-py; extra == 'discord'
66
+ Provides-Extra: doc
67
+ Requires-Dist: unstructured[doc]; extra == 'doc'
68
+ Provides-Extra: docx
69
+ Requires-Dist: unstructured[docx]; extra == 'docx'
70
+ Provides-Extra: dropbox
71
+ Requires-Dist: dropboxdrivefs; extra == 'dropbox'
72
+ Requires-Dist: fsspec; extra == 'dropbox'
73
+ Provides-Extra: duckdb
74
+ Requires-Dist: duckdb; extra == 'duckdb'
75
+ Requires-Dist: pandas; extra == 'duckdb'
76
+ Provides-Extra: elasticsearch
77
+ Requires-Dist: elasticsearch[async]; extra == 'elasticsearch'
78
+ Provides-Extra: epub
79
+ Requires-Dist: unstructured[epub]; extra == 'epub'
80
+ Provides-Extra: gcs
81
+ Requires-Dist: bs4; extra == 'gcs'
82
+ Requires-Dist: fsspec; extra == 'gcs'
83
+ Requires-Dist: gcsfs; extra == 'gcs'
84
+ Provides-Extra: github
85
+ Requires-Dist: pygithub>1.58.0; extra == 'github'
86
+ Requires-Dist: requests; extra == 'github'
87
+ Provides-Extra: gitlab
88
+ Requires-Dist: python-gitlab; extra == 'gitlab'
89
+ Provides-Extra: google-drive
90
+ Requires-Dist: google-api-python-client; extra == 'google-drive'
91
+ Provides-Extra: hubspot
92
+ Requires-Dist: hubspot-api-client; extra == 'hubspot'
93
+ Requires-Dist: urllib3; extra == 'hubspot'
94
+ Provides-Extra: huggingface
95
+ Requires-Dist: sentence-transformers; extra == 'huggingface'
96
+ Provides-Extra: ibm-watsonx-s3
97
+ Requires-Dist: httpx; extra == 'ibm-watsonx-s3'
98
+ Requires-Dist: pandas; extra == 'ibm-watsonx-s3'
99
+ Requires-Dist: pyarrow; extra == 'ibm-watsonx-s3'
100
+ Requires-Dist: pyiceberg; extra == 'ibm-watsonx-s3'
101
+ Requires-Dist: tenacity; extra == 'ibm-watsonx-s3'
102
+ Provides-Extra: image
103
+ Requires-Dist: unstructured[image]; extra == 'image'
104
+ Provides-Extra: jira
105
+ Requires-Dist: atlassian-python-api; extra == 'jira'
106
+ Provides-Extra: kafka
107
+ Requires-Dist: confluent-kafka; extra == 'kafka'
108
+ Provides-Extra: kdbai
109
+ Requires-Dist: kdbai-client>=1.4.0; extra == 'kdbai'
110
+ Requires-Dist: pandas; extra == 'kdbai'
111
+ Provides-Extra: lancedb
112
+ Requires-Dist: lancedb; extra == 'lancedb'
113
+ Provides-Extra: md
114
+ Requires-Dist: unstructured[md]; extra == 'md'
115
+ Provides-Extra: milvus
116
+ Requires-Dist: pymilvus; extra == 'milvus'
117
+ Provides-Extra: mixedbreadai
118
+ Requires-Dist: mixedbread-ai; extra == 'mixedbreadai'
119
+ Provides-Extra: mongodb
120
+ Requires-Dist: pymongo; extra == 'mongodb'
121
+ Provides-Extra: msg
122
+ Requires-Dist: unstructured[msg]; extra == 'msg'
123
+ Provides-Extra: neo4j
124
+ Requires-Dist: cymple; extra == 'neo4j'
125
+ Requires-Dist: neo4j-rust-ext; extra == 'neo4j'
126
+ Requires-Dist: networkx; extra == 'neo4j'
127
+ Provides-Extra: notion
128
+ Requires-Dist: backoff; extra == 'notion'
129
+ Requires-Dist: htmlbuilder; extra == 'notion'
130
+ Requires-Dist: httpx; extra == 'notion'
131
+ Requires-Dist: notion-client; extra == 'notion'
132
+ Provides-Extra: octoai
133
+ Requires-Dist: openai; extra == 'octoai'
134
+ Requires-Dist: tiktoken; extra == 'octoai'
135
+ Provides-Extra: odt
136
+ Requires-Dist: unstructured[odt]; extra == 'odt'
137
+ Provides-Extra: onedrive
138
+ Requires-Dist: msal; extra == 'onedrive'
139
+ Requires-Dist: office365-rest-python-client; extra == 'onedrive'
140
+ Requires-Dist: requests; extra == 'onedrive'
141
+ Provides-Extra: openai
142
+ Requires-Dist: openai; extra == 'openai'
143
+ Requires-Dist: tiktoken; extra == 'openai'
144
+ Provides-Extra: opensearch
145
+ Requires-Dist: opensearch-py; extra == 'opensearch'
146
+ Provides-Extra: org
147
+ Requires-Dist: unstructured[org]; extra == 'org'
148
+ Provides-Extra: outlook
149
+ Requires-Dist: msal; extra == 'outlook'
150
+ Requires-Dist: office365-rest-python-client; extra == 'outlook'
151
+ Provides-Extra: pdf
152
+ Requires-Dist: unstructured[pdf]; extra == 'pdf'
153
+ Provides-Extra: pinecone
154
+ Requires-Dist: pinecone; extra == 'pinecone'
155
+ Provides-Extra: postgres
156
+ Requires-Dist: pandas; extra == 'postgres'
157
+ Requires-Dist: psycopg2-binary; extra == 'postgres'
158
+ Provides-Extra: ppt
159
+ Requires-Dist: unstructured[ppt]; extra == 'ppt'
160
+ Provides-Extra: pptx
161
+ Requires-Dist: unstructured[pptx]; extra == 'pptx'
162
+ Provides-Extra: qdrant
163
+ Requires-Dist: qdrant-client; extra == 'qdrant'
164
+ Provides-Extra: reddit
165
+ Requires-Dist: praw; extra == 'reddit'
166
+ Provides-Extra: redis
167
+ Requires-Dist: redis; extra == 'redis'
168
+ Provides-Extra: remote
169
+ Requires-Dist: unstructured-client>=0.30.0; extra == 'remote'
170
+ Provides-Extra: rst
171
+ Requires-Dist: unstructured[rst]; extra == 'rst'
172
+ Provides-Extra: rtf
173
+ Requires-Dist: unstructured[rtf]; extra == 'rtf'
174
+ Provides-Extra: s3
175
+ Requires-Dist: fsspec; extra == 's3'
176
+ Requires-Dist: s3fs; extra == 's3'
177
+ Provides-Extra: salesforce
178
+ Requires-Dist: simple-salesforce; extra == 'salesforce'
179
+ Provides-Extra: sftp
180
+ Requires-Dist: fsspec; extra == 'sftp'
181
+ Requires-Dist: paramiko; extra == 'sftp'
182
+ Provides-Extra: sharepoint
183
+ Requires-Dist: msal; extra == 'sharepoint'
184
+ Requires-Dist: office365-rest-python-client; extra == 'sharepoint'
185
+ Requires-Dist: requests; extra == 'sharepoint'
186
+ Provides-Extra: singlestore
187
+ Requires-Dist: pandas; extra == 'singlestore'
188
+ Requires-Dist: singlestoredb; extra == 'singlestore'
189
+ Provides-Extra: slack
190
+ Requires-Dist: slack-sdk[optional]; extra == 'slack'
191
+ Provides-Extra: snowflake
192
+ Requires-Dist: pandas; extra == 'snowflake'
193
+ Requires-Dist: psycopg2-binary; extra == 'snowflake'
194
+ Requires-Dist: snowflake-connector-python; extra == 'snowflake'
195
+ Provides-Extra: togetherai
196
+ Requires-Dist: together; extra == 'togetherai'
197
+ Provides-Extra: tsv
198
+ Requires-Dist: unstructured[tsv]; extra == 'tsv'
199
+ Provides-Extra: vastdb
200
+ Requires-Dist: ibis; extra == 'vastdb'
201
+ Requires-Dist: pandas; extra == 'vastdb'
202
+ Requires-Dist: pyarrow; extra == 'vastdb'
203
+ Requires-Dist: vastdb; extra == 'vastdb'
204
+ Provides-Extra: vectara
205
+ Requires-Dist: aiofiles; extra == 'vectara'
206
+ Requires-Dist: httpx; extra == 'vectara'
207
+ Requires-Dist: requests; extra == 'vectara'
208
+ Provides-Extra: vertexai
209
+ Requires-Dist: vertexai; extra == 'vertexai'
210
+ Provides-Extra: voyageai
211
+ Requires-Dist: voyageai; extra == 'voyageai'
212
+ Provides-Extra: weaviate
213
+ Requires-Dist: weaviate-client; extra == 'weaviate'
214
+ Provides-Extra: wikipedia
215
+ Requires-Dist: wikipedia; extra == 'wikipedia'
216
+ Provides-Extra: xlsx
217
+ Requires-Dist: unstructured[xlsx]; extra == 'xlsx'
218
+ Provides-Extra: zendesk
219
+ Requires-Dist: aiofiles; extra == 'zendesk'
220
+ Requires-Dist: bs4; extra == 'zendesk'
221
+ Requires-Dist: httpx; extra == 'zendesk'
222
+ Description-Content-Type: text/markdown
223
+
224
+ # Unstructured Ingest
225
+
226
+ For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.
@@ -0,0 +1,211 @@
1
+ [project]
2
+ name = "unstructured_ingest"
3
+ description = "Local ETL data pipeline to get data RAG ready"
4
+ requires-python = ">=3.9, <3.13"
5
+ authors = [{name = "Unstructured Technologies", email = "devops@unstructuredai.io"}]
6
+ classifiers = [
7
+ "Development Status :: 4 - Beta",
8
+ "Intended Audience :: Developers",
9
+ "Intended Audience :: Education",
10
+ "Intended Audience :: Science/Research",
11
+ "License :: OSI Approved :: Apache Software License",
12
+ "Operating System :: OS Independent",
13
+ "Programming Language :: Python :: 3",
14
+ "Programming Language :: Python :: 3.9",
15
+ "Programming Language :: Python :: 3.10",
16
+ "Programming Language :: Python :: 3.11",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
19
+ ]
20
+ readme = "README.md"
21
+ license = "Apache-2.0"
22
+ dynamic = ["version", "dependencies", "optional-dependencies"]
23
+
24
+ [tool.hatch.metadata.hooks.requirements_txt]
25
+ files = ["requirements/base.txt"]
26
+
27
+ [tool.hatch.metadata.hooks.requirements_txt.optional-dependencies]
28
+ # Connectors
29
+ airtable = ["requirements/connectors/airtable.txt"]
30
+ astradb = ["requirements/connectors/astradb.txt"]
31
+ azure-ai-search = ["requirements/connectors/azure-ai-search.txt"]
32
+ azure = ["requirements/connectors/azure.txt"]
33
+ biomed = ["requirements/connectors/biomed.txt"]
34
+ box = ["requirements/connectors/box.txt"]
35
+ chroma = ["requirements/connectors/chroma.txt"]
36
+ clarifai = ["requirements/connectors/clarifai.txt"]
37
+ confluence = ["requirements/connectors/confluence.txt"]
38
+ couchbase = ["requirements/connectors/couchbase.txt"]
39
+ databricks-delta-tables = ["requirements/connectors/databricks-delta-tables.txt"]
40
+ databricks-volumes = ["requirements/connectors/databricks-volumes.txt"]
41
+ delta-table = ["requirements/connectors/delta-table.txt"]
42
+ discord = ["requirements/connectors/discord.txt"]
43
+ dropbox = ["requirements/connectors/dropbox.txt"]
44
+ duckdb = ["requirements/connectors/duckdb.txt"]
45
+ elasticsearch = ["requirements/connectors/elasticsearch.txt"]
46
+ gcs = ["requirements/connectors/gcs.txt"]
47
+ github = ["requirements/connectors/github.txt"]
48
+ gitlab = ["requirements/connectors/gitlab.txt"]
49
+ google-drive = ["requirements/connectors/google-drive.txt"]
50
+ hubspot = ["requirements/connectors/hubspot.txt"]
51
+ ibm-watsonx-s3 = ["requirements/connectors/ibm-watsonx-s3.txt"]
52
+ jira = ["requirements/connectors/jira.txt"]
53
+ kafka = ["requirements/connectors/kafka.txt"]
54
+ kdbai = ["requirements/connectors/kdbai.txt"]
55
+ lancedb = ["requirements/connectors/lancedb.txt"]
56
+ milvus = ["requirements/connectors/milvus.txt"]
57
+ mongodb = ["requirements/connectors/mongodb.txt"]
58
+ neo4j = ["requirements/connectors/neo4j.txt"]
59
+ notion = ["requirements/connectors/notion.txt"]
60
+ onedrive = ["requirements/connectors/onedrive.txt"]
61
+ opensearch = ["requirements/connectors/opensearch.txt"]
62
+ outlook = ["requirements/connectors/outlook.txt"]
63
+ pinecone = ["requirements/connectors/pinecone.txt"]
64
+ postgres = ["requirements/connectors/postgres.txt"]
65
+ qdrant = ["requirements/connectors/qdrant.txt"]
66
+ reddit = ["requirements/connectors/reddit.txt"]
67
+ redis = ["requirements/connectors/redis.txt"]
68
+ s3 = ["requirements/connectors/s3.txt"]
69
+ salesforce = ["requirements/connectors/salesforce.txt"]
70
+ sftp = ["requirements/connectors/sftp.txt"]
71
+ sharepoint = ["requirements/connectors/sharepoint.txt"]
72
+ singlestore = ["requirements/connectors/singlestore.txt"]
73
+ slack = ["requirements/connectors/slack.txt"]
74
+ snowflake = ["requirements/connectors/snowflake.txt"]
75
+ vastdb = ["requirements/connectors/vastdb.txt"]
76
+ vectara = ["requirements/connectors/vectara.txt"]
77
+ weaviate = ["requirements/connectors/weaviate.txt"]
78
+ wikipedia = ["requirements/connectors/wikipedia.txt"]
79
+ zendesk = ["requirements/connectors/zendesk.txt"]
80
+
81
+ # Embedders
82
+ bedrock = ["requirements/embed/bedrock.txt"]
83
+ huggingface = ["requirements/embed/huggingface.txt"]
84
+ mixedbreadai = ["requirements/embed/mixedbreadai.txt"]
85
+ octoai = ["requirements/embed/octoai.txt"]
86
+ openai = ["requirements/embed/openai.txt"]
87
+ togetherai = ["requirements/embed/togetherai.txt"]
88
+ vertexai = ["requirements/embed/vertexai.txt"]
89
+ voyageai = ["requirements/embed/voyageai.txt"]
90
+
91
+ # remote
92
+ remote = ["requirements/remote/client.txt"]
93
+
94
+ # local partition
95
+ doc = ["requirements/local_partition/doc.txt"]
96
+ docx = ["requirements/local_partition/docx.txt"]
97
+ epub = ["requirements/local_partition/epub.txt"]
98
+ image = ["requirements/local_partition/image.txt"]
99
+ md = ["requirements/local_partition/md.txt"]
100
+ msg = ["requirements/local_partition/msg.txt"]
101
+ odt = ["requirements/local_partition/odt.txt"]
102
+ org = ["requirements/local_partition/org.txt"]
103
+ pdf = ["requirements/local_partition/pdf.txt"]
104
+ ppt = ["requirements/local_partition/ppt.txt"]
105
+ pptx = ["requirements/local_partition/pptx.txt"]
106
+ rst = ["requirements/local_partition/rst.txt"]
107
+ rtf = ["requirements/local_partition/rtf.txt"]
108
+ tsv = ["requirements/local_partition/tsv.txt"]
109
+ xlsx = ["requirements/local_partition/xlsx.txt"]
110
+
111
+
112
+ [tool.hatch.version]
113
+ path = "unstructured_ingest/__version__.py"
114
+
115
+ [dependency-groups]
116
+ release = [
117
+ "twine",
118
+ "wheel",
119
+ "build"
120
+ ]
121
+ lint = [
122
+ "ruff",
123
+ ]
124
+ test = [
125
+ "pytest",
126
+ "pytest-cov",
127
+ "pytest-mock",
128
+ "pytest-check",
129
+ "pytest-asyncio",
130
+ "pytest_tagging",
131
+ "pytest-json-report",
132
+ "pytest-timeout",
133
+ "faker",
134
+ "docker",
135
+ "universal_pathlib",
136
+ "deepdiff",
137
+ "bs4",
138
+ "pandas",
139
+
140
+ # Connector specific deps
141
+ "cryptography",
142
+ "fsspec",
143
+ "vertexai",
144
+ "pyiceberg",
145
+ "pyarrow",
146
+ ]
147
+ # Add constraints needed for CI
148
+ ci = [
149
+ # consistency with local-inference-pin
150
+ "protobuf<4.24",
151
+ "grpcio>=1.65.5",
152
+ # TODO: Pinned in transformers package, remove when that gets updated
153
+ "tokenizers>=0.19,<0.20",
154
+ # TODO: Constaint due to boto, with python before 3.10 not requiring openssl 1.1.1, remove when that gets
155
+ # updated or we drop support for 3.9
156
+ "urllib3<1.27",
157
+ # TODO: Constriant due to aiobotocore, remove when that gets updates:
158
+ "botocore<1.34.132",
159
+ # TODO: Constriant due to both 8.5.0 and 8.4.0 being installed during pip-compile
160
+ "importlib-metadata>=8.5.0",
161
+ # TODO: Constraint due to boto, with python before 3.10 not requiring openssl 1.1.1, remove when that gets
162
+ # updated or we drop support for 3.9
163
+ "urllib3<1.27",
164
+ "unstructured-client>= 0.25.8",
165
+ "fsspec==2024.5.0",
166
+ # python 3.12 support
167
+ "wrapt>=1.14.0",
168
+ "numpy<2",
169
+ # deltalake >=0.23.0 currently has a bug with the versio of pyarrow it installs
170
+ "deltalake<=0.22.0",
171
+ # TODO: investigate breaking changed introduced in lancedb>0.15.0
172
+ "lancedb<=0.15.0",
173
+ # TODO: versions higher than this are missing the macos wheel
174
+ "pykx==2.5.3",
175
+ ]
176
+
177
+ [project.scripts]
178
+ unstructured-ingest = "unstructured_ingest.main:main"
179
+
180
+
181
+ [build-system]
182
+ requires = ["hatchling", "hatch-requirements-txt"]
183
+ build-backend = "hatchling.build"
184
+
185
+ [tool.ruff]
186
+ line-length = 100
187
+
188
+ [tool.ruff.lint]
189
+ select = [
190
+ # pycodestyle
191
+ "E",
192
+ # Pyflakes
193
+ "F",
194
+ # flake8-simplify
195
+ "SIM",
196
+ # isort
197
+ "I",
198
+ ]
199
+
200
+ [tool.pytest.ini_options]
201
+ asyncio_mode = "auto"
202
+
203
+ [tool.coverage.report]
204
+ # TODO: Update as this improves
205
+ fail_under = 0
206
+
207
+ [tool.hatch.build.targets.wheel]
208
+ packages = ["/unstructured_ingest"]
209
+
210
+ [tool.hatch.build.targets.sdist]
211
+ packages = ["/unstructured_ingest"]
@@ -0,0 +1 @@
1
+ __version__ = "1.0.2" # pragma: no cover
@@ -0,0 +1,28 @@
1
+ # Ingest CLI
2
+ This package helps map user input via a cli to the underlying ingest code to run a small ETL pipeline.
3
+
4
+ ## Design Reference
5
+ [cli.py](cli.py) is the main entrypoint to run the cli itself. The key points for this is the interaction between all
6
+ source and destination connectors.
7
+
8
+ To manually run the cli:
9
+ ```shell
10
+ PYTHONPATH=. python unstructured_ingest/v2/main.py --help
11
+ ```
12
+
13
+ The `main.py` file simply wraps the generated Click command created in `cli.py`.
14
+
15
+ ### Source Commands
16
+ All source commands are added as sub commands to the parent ingest Click group. This allows each command to map to
17
+ different connectors with shared and unique parameters.
18
+
19
+ ### Destination Commands
20
+ All destination commands are added as sub commands to each parent source command. This allows each invocation of the source
21
+ sub command to display all possible destination subcommands. The code un [utils.py](./utils.py) helps structure the
22
+ generated text from the Click library to be more intuitive on this approach (i.e. list sub commands as `Destinations`).
23
+
24
+ ### Configs
25
+ The configs in [configs/](./configs) and connector specific ones in [cmds/](./cmds) help surface all user parameters that
26
+ are needed to marshall the input dictionary from Click into all the respective configs needed to create a full pipeline run.
27
+ Because click returns a flat dictionary of user inputs, the `extract_config` method in `utils.py` helps deserialize this dictionary
28
+ into dataclasses that have nested fields (such as access configs).
@@ -114,7 +114,6 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
114
114
 
115
115
  @dataclass
116
116
  class AsyncMixedbreadAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
117
-
118
117
  config: MixedbreadAIEmbeddingConfig
119
118
 
120
119
  async def get_exemplary_embedding(self) -> list[float]: