unstructured-ingest 0.0.21__tar.gz → 0.0.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (364) hide show
  1. unstructured_ingest-0.0.22/PKG-INFO +186 -0
  2. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/test/test_utils_v2.py +2 -2
  3. unstructured_ingest-0.0.22/unstructured_ingest/__version__.py +1 -0
  4. unstructured_ingest-0.0.22/unstructured_ingest/embed/bedrock.py +107 -0
  5. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/embed/huggingface.py +22 -22
  6. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/embed/interfaces.py +11 -4
  7. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/embed/mixedbreadai.py +17 -17
  8. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/embed/octoai.py +7 -7
  9. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/embed/openai.py +15 -20
  10. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/embed/vertexai.py +25 -17
  11. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/embed/voyageai.py +22 -17
  12. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/cli/base/cmd.py +1 -1
  13. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/interfaces/connector.py +1 -1
  14. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/pipeline.py +3 -1
  15. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/steps/chunk.py +1 -1
  16. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/steps/download.py +6 -2
  17. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/steps/embed.py +1 -1
  18. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/steps/filter.py +1 -1
  19. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/steps/index.py +4 -2
  20. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/steps/partition.py +1 -1
  21. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/steps/stage.py +3 -1
  22. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/steps/uncompress.py +1 -1
  23. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/steps/upload.py +6 -2
  24. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/airtable.py +1 -1
  25. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/databricks_volumes.py +1 -1
  26. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/elasticsearch.py +2 -2
  27. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/fsspec/azure.py +31 -5
  28. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/fsspec/box.py +31 -2
  29. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +36 -8
  30. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +25 -77
  31. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +30 -1
  32. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/fsspec/s3.py +15 -18
  33. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +22 -1
  34. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/milvus.py +2 -2
  35. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/opensearch.py +2 -2
  36. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/utils.py +1 -1
  37. unstructured_ingest-0.0.22/unstructured_ingest.egg-info/PKG-INFO +186 -0
  38. unstructured_ingest-0.0.22/unstructured_ingest.egg-info/requires.txt +223 -0
  39. unstructured-ingest-0.0.21/PKG-INFO +0 -93
  40. unstructured-ingest-0.0.21/unstructured_ingest/__version__.py +0 -1
  41. unstructured-ingest-0.0.21/unstructured_ingest/embed/bedrock.py +0 -70
  42. unstructured-ingest-0.0.21/unstructured_ingest.egg-info/PKG-INFO +0 -93
  43. unstructured-ingest-0.0.21/unstructured_ingest.egg-info/requires.txt +0 -676
  44. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/LICENSE.md +0 -0
  45. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/README.md +0 -0
  46. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/pyproject.toml +0 -0
  47. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/setup.cfg +0 -0
  48. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/setup.py +0 -0
  49. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/test/test_chunking_utils.py +0 -0
  50. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/test/test_error.py +0 -0
  51. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/test/test_interfaces.py +0 -0
  52. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/test/test_logger.py +0 -0
  53. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/test/test_utils.py +0 -0
  54. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/__init__.py +0 -0
  55. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/__init__.py +0 -0
  56. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/base/__init__.py +0 -0
  57. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/base/cmd.py +0 -0
  58. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/base/dest.py +0 -0
  59. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/base/src.py +0 -0
  60. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cli.py +0 -0
  61. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmd_factory.py +0 -0
  62. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/__init__.py +0 -0
  63. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/airtable.py +0 -0
  64. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/astradb.py +0 -0
  65. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/azure_cognitive_search.py +0 -0
  66. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/biomed.py +0 -0
  67. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/chroma.py +0 -0
  68. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/clarifai.py +0 -0
  69. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/confluence.py +0 -0
  70. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/databricks_volumes.py +0 -0
  71. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/delta_table.py +0 -0
  72. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/discord.py +0 -0
  73. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/elasticsearch.py +0 -0
  74. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
  75. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/fsspec/azure.py +0 -0
  76. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/fsspec/box.py +0 -0
  77. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -0
  78. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -0
  79. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -0
  80. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/fsspec/s3.py +0 -0
  81. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -0
  82. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/github.py +0 -0
  83. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/gitlab.py +0 -0
  84. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/google_drive.py +0 -0
  85. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/hubspot.py +0 -0
  86. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/jira.py +0 -0
  87. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/kafka.py +0 -0
  88. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/local.py +0 -0
  89. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/mongodb.py +0 -0
  90. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/notion.py +0 -0
  91. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/onedrive.py +0 -0
  92. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/opensearch.py +0 -0
  93. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/outlook.py +0 -0
  94. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/pinecone.py +0 -0
  95. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/qdrant.py +0 -0
  96. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/reddit.py +0 -0
  97. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/salesforce.py +0 -0
  98. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/sharepoint.py +0 -0
  99. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/slack.py +0 -0
  100. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/sql.py +0 -0
  101. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/vectara.py +0 -0
  102. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/weaviate.py +0 -0
  103. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/cmds/wikipedia.py +0 -0
  104. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/common.py +0 -0
  105. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/interfaces.py +0 -0
  106. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/cli/utils.py +0 -0
  107. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/__init__.py +0 -0
  108. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/airtable.py +0 -0
  109. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/astradb.py +0 -0
  110. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/azure_cognitive_search.py +0 -0
  111. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/biomed.py +0 -0
  112. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/chroma.py +0 -0
  113. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/clarifai.py +0 -0
  114. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/confluence.py +0 -0
  115. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/databricks_volumes.py +0 -0
  116. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/delta_table.py +0 -0
  117. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/discord.py +0 -0
  118. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/elasticsearch.py +0 -0
  119. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/fsspec/__init__.py +0 -0
  120. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/fsspec/azure.py +0 -0
  121. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/fsspec/box.py +0 -0
  122. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/fsspec/dropbox.py +0 -0
  123. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/fsspec/fsspec.py +0 -0
  124. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/fsspec/gcs.py +0 -0
  125. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/fsspec/s3.py +0 -0
  126. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/fsspec/sftp.py +0 -0
  127. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/git.py +0 -0
  128. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/github.py +0 -0
  129. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/gitlab.py +0 -0
  130. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/google_drive.py +0 -0
  131. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/hubspot.py +0 -0
  132. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/jira.py +0 -0
  133. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/kafka.py +0 -0
  134. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/local.py +0 -0
  135. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/mongodb.py +0 -0
  136. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/__init__.py +0 -0
  137. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/client.py +0 -0
  138. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/connector.py +0 -0
  139. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/helpers.py +0 -0
  140. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/interfaces.py +0 -0
  141. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/__init__.py +0 -0
  142. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/block.py +0 -0
  143. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/__init__.py +0 -0
  144. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -0
  145. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +0 -0
  146. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +0 -0
  147. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/callout.py +0 -0
  148. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -0
  149. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/child_page.py +0 -0
  150. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/code.py +0 -0
  151. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -0
  152. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/divider.py +0 -0
  153. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/embed.py +0 -0
  154. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/equation.py +0 -0
  155. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/file.py +0 -0
  156. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/heading.py +0 -0
  157. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/image.py +0 -0
  158. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -0
  159. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/link_to_page.py +0 -0
  160. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -0
  161. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/paragraph.py +0 -0
  162. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/pdf.py +0 -0
  163. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/quote.py +0 -0
  164. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -0
  165. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/table.py +0 -0
  166. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -0
  167. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/template.py +0 -0
  168. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/todo.py +0 -0
  169. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/toggle.py +0 -0
  170. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -0
  171. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/blocks/video.py +0 -0
  172. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database.py +0 -0
  173. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -0
  174. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -0
  175. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/created_by.py +0 -0
  176. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/created_time.py +0 -0
  177. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/date.py +0 -0
  178. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/email.py +0 -0
  179. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/files.py +0 -0
  180. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -0
  181. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +0 -0
  182. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -0
  183. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -0
  184. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/number.py +0 -0
  185. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/people.py +0 -0
  186. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -0
  187. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -0
  188. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/rich_text.py +0 -0
  189. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/rollup.py +0 -0
  190. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/select.py +0 -0
  191. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/status.py +0 -0
  192. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/title.py +0 -0
  193. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -0
  194. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/url.py +0 -0
  195. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/database_properties/verification.py +0 -0
  196. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/date.py +0 -0
  197. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/file.py +0 -0
  198. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/page.py +0 -0
  199. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/parent.py +0 -0
  200. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/rich_text.py +0 -0
  201. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/notion/types/user.py +0 -0
  202. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/onedrive.py +0 -0
  203. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/opensearch.py +0 -0
  204. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/outlook.py +0 -0
  205. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/pinecone.py +0 -0
  206. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/qdrant.py +0 -0
  207. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/reddit.py +0 -0
  208. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/registry.py +0 -0
  209. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/salesforce.py +0 -0
  210. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/sharepoint.py +0 -0
  211. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/slack.py +0 -0
  212. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/sql.py +0 -0
  213. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/vectara.py +0 -0
  214. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/weaviate.py +0 -0
  215. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/connector/wikipedia.py +0 -0
  216. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/embed/__init__.py +0 -0
  217. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/enhanced_dataclass/__init__.py +0 -0
  218. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/enhanced_dataclass/core.py +0 -0
  219. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -0
  220. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -0
  221. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/error.py +0 -0
  222. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/ingest_backoff/__init__.py +0 -0
  223. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/ingest_backoff/_common.py +0 -0
  224. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/ingest_backoff/_wrapper.py +0 -0
  225. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/interfaces.py +0 -0
  226. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/logger.py +0 -0
  227. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/main.py +0 -0
  228. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/pipeline/__init__.py +0 -0
  229. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/pipeline/copy.py +0 -0
  230. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/pipeline/doc_factory.py +0 -0
  231. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/pipeline/interfaces.py +0 -0
  232. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/pipeline/partition.py +0 -0
  233. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/pipeline/permissions.py +0 -0
  234. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/pipeline/pipeline.py +0 -0
  235. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/pipeline/reformat/__init__.py +0 -0
  236. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/pipeline/reformat/chunking.py +0 -0
  237. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/pipeline/reformat/embedding.py +0 -0
  238. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/pipeline/source.py +0 -0
  239. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/pipeline/utils.py +0 -0
  240. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/pipeline/write.py +0 -0
  241. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/processor.py +0 -0
  242. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/__init__.py +0 -0
  243. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/airtable.py +0 -0
  244. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/astradb.py +0 -0
  245. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/base_runner.py +0 -0
  246. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/biomed.py +0 -0
  247. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/confluence.py +0 -0
  248. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/delta_table.py +0 -0
  249. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/discord.py +0 -0
  250. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/elasticsearch.py +0 -0
  251. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/fsspec/__init__.py +0 -0
  252. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/fsspec/azure.py +0 -0
  253. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/fsspec/box.py +0 -0
  254. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/fsspec/dropbox.py +0 -0
  255. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/fsspec/fsspec.py +0 -0
  256. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/fsspec/gcs.py +0 -0
  257. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/fsspec/s3.py +0 -0
  258. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/fsspec/sftp.py +0 -0
  259. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/github.py +0 -0
  260. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/gitlab.py +0 -0
  261. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/google_drive.py +0 -0
  262. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/hubspot.py +0 -0
  263. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/jira.py +0 -0
  264. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/kafka.py +0 -0
  265. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/local.py +0 -0
  266. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/mongodb.py +0 -0
  267. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/notion.py +0 -0
  268. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/onedrive.py +0 -0
  269. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/opensearch.py +0 -0
  270. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/outlook.py +0 -0
  271. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/reddit.py +0 -0
  272. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/salesforce.py +0 -0
  273. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/sharepoint.py +0 -0
  274. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/slack.py +0 -0
  275. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/utils.py +0 -0
  276. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/wikipedia.py +0 -0
  277. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/__init__.py +0 -0
  278. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/astradb.py +0 -0
  279. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/azure_cognitive_search.py +0 -0
  280. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/base_writer.py +0 -0
  281. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/chroma.py +0 -0
  282. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/clarifai.py +0 -0
  283. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/databricks_volumes.py +0 -0
  284. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/delta_table.py +0 -0
  285. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/elasticsearch.py +0 -0
  286. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
  287. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/fsspec/azure.py +0 -0
  288. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/fsspec/box.py +0 -0
  289. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -0
  290. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/fsspec/gcs.py +0 -0
  291. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/fsspec/s3.py +0 -0
  292. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/kafka.py +0 -0
  293. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/mongodb.py +0 -0
  294. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/opensearch.py +0 -0
  295. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/pinecone.py +0 -0
  296. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/qdrant.py +0 -0
  297. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/sql.py +0 -0
  298. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/vectara.py +0 -0
  299. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/runner/writers/weaviate.py +0 -0
  300. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/utils/__init__.py +0 -0
  301. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/utils/chunking.py +0 -0
  302. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/utils/compression.py +0 -0
  303. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/utils/data_prep.py +0 -0
  304. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/utils/dep_check.py +0 -0
  305. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/utils/google_filetype.py +0 -0
  306. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
  307. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/utils/table.py +0 -0
  308. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/__init__.py +0 -0
  309. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/cli/__init__.py +0 -0
  310. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/cli/base/__init__.py +0 -0
  311. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/cli/base/dest.py +0 -0
  312. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/cli/base/importer.py +0 -0
  313. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/cli/base/src.py +0 -0
  314. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/cli/cli.py +0 -0
  315. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/cli/cmds.py +0 -0
  316. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  317. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/cli/utils/click.py +0 -0
  318. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/cli/utils/model_conversion.py +0 -0
  319. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/interfaces/__init__.py +0 -0
  320. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/interfaces/downloader.py +0 -0
  321. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/interfaces/file_data.py +0 -0
  322. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/interfaces/indexer.py +0 -0
  323. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/interfaces/process.py +0 -0
  324. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/interfaces/processor.py +0 -0
  325. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/interfaces/upload_stager.py +0 -0
  326. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/interfaces/uploader.py +0 -0
  327. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/logger.py +0 -0
  328. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/main.py +0 -0
  329. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/otel.py +0 -0
  330. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/__init__.py +0 -0
  331. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/interfaces.py +0 -0
  332. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/otel.py +0 -0
  333. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  334. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/__init__.py +0 -0
  335. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/chunker.py +0 -0
  336. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connector_registry.py +0 -0
  337. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/__init__.py +0 -0
  338. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/astradb.py +0 -0
  339. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +0 -0
  340. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/chroma.py +0 -0
  341. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/couchbase.py +0 -0
  342. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +0 -0
  343. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/fsspec/utils.py +0 -0
  344. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/google_drive.py +0 -0
  345. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/kdbai.py +0 -0
  346. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/local.py +0 -0
  347. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/mongodb.py +0 -0
  348. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/onedrive.py +0 -0
  349. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/pinecone.py +0 -0
  350. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/salesforce.py +0 -0
  351. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/sharepoint.py +0 -0
  352. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/singlestore.py +0 -0
  353. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/sql.py +0 -0
  354. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/utils.py +0 -0
  355. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/connectors/weaviate.py +0 -0
  356. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/embedder.py +0 -0
  357. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/filter.py +0 -0
  358. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/partitioner.py +0 -0
  359. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/processes/uncompress.py +0 -0
  360. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest/v2/unstructured_api.py +0 -0
  361. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest.egg-info/SOURCES.txt +0 -0
  362. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest.egg-info/dependency_links.txt +0 -0
  363. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest.egg-info/entry_points.txt +0 -0
  364. {unstructured-ingest-0.0.21 → unstructured_ingest-0.0.22}/unstructured_ingest.egg-info/top_level.txt +0 -0
@@ -0,0 +1,186 @@
1
+ Metadata-Version: 2.1
2
+ Name: unstructured-ingest
3
+ Version: 0.0.22
4
+ Summary: A library that prepares raw documents for downstream ML tasks.
5
+ Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
+ Author: Unstructured Technologies
7
+ Author-email: devops@unstructuredai.io
8
+ License: Apache-2.0
9
+ Keywords: NLP PDF HTML CV XML parsing preprocessing
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Education
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.9.0,<3.13
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE.md
25
+ Requires-Dist: pandas
26
+ Requires-Dist: pydantic>=2.7
27
+ Requires-Dist: click
28
+ Requires-Dist: opentelemetry-sdk
29
+ Requires-Dist: dataclasses_json
30
+ Requires-Dist: tqdm
31
+ Requires-Dist: python-dateutil
32
+ Provides-Extra: remote
33
+ Requires-Dist: unstructured-client>=0.25.8; extra == "remote"
34
+ Provides-Extra: csv
35
+ Requires-Dist: unstructured[tsv]; extra == "csv"
36
+ Provides-Extra: doc
37
+ Requires-Dist: unstructured[docx]; extra == "doc"
38
+ Provides-Extra: docx
39
+ Requires-Dist: unstructured[docx]; extra == "docx"
40
+ Provides-Extra: epub
41
+ Requires-Dist: unstructured[epub]; extra == "epub"
42
+ Provides-Extra: md
43
+ Requires-Dist: unstructured[md]; extra == "md"
44
+ Provides-Extra: msg
45
+ Requires-Dist: unstructured[msg]; extra == "msg"
46
+ Provides-Extra: odt
47
+ Requires-Dist: unstructured[odt]; extra == "odt"
48
+ Provides-Extra: org
49
+ Requires-Dist: unstructured[org]; extra == "org"
50
+ Provides-Extra: pdf
51
+ Requires-Dist: unstructured[pdf]; extra == "pdf"
52
+ Provides-Extra: ppt
53
+ Requires-Dist: unstructured[pptx]; extra == "ppt"
54
+ Provides-Extra: pptx
55
+ Requires-Dist: unstructured[pptx]; extra == "pptx"
56
+ Provides-Extra: rtf
57
+ Requires-Dist: unstructured[rtf]; extra == "rtf"
58
+ Provides-Extra: rst
59
+ Requires-Dist: unstructured[rst]; extra == "rst"
60
+ Provides-Extra: tsv
61
+ Requires-Dist: unstructured[tsv]; extra == "tsv"
62
+ Provides-Extra: xlsx
63
+ Requires-Dist: unstructured[xlsx]; extra == "xlsx"
64
+ Provides-Extra: airtable
65
+ Requires-Dist: pyairtable; extra == "airtable"
66
+ Provides-Extra: astradb
67
+ Requires-Dist: astrapy; extra == "astradb"
68
+ Provides-Extra: azure
69
+ Requires-Dist: adlfs; extra == "azure"
70
+ Requires-Dist: fsspec; extra == "azure"
71
+ Provides-Extra: azure-cognitive-search
72
+ Requires-Dist: azure-search-documents; extra == "azure-cognitive-search"
73
+ Provides-Extra: biomed
74
+ Requires-Dist: requests; extra == "biomed"
75
+ Requires-Dist: bs4; extra == "biomed"
76
+ Provides-Extra: box
77
+ Requires-Dist: fsspec; extra == "box"
78
+ Requires-Dist: boxfs; extra == "box"
79
+ Provides-Extra: chroma
80
+ Requires-Dist: chromadb; extra == "chroma"
81
+ Provides-Extra: clarifai
82
+ Requires-Dist: clarifai; extra == "clarifai"
83
+ Provides-Extra: confluence
84
+ Requires-Dist: requests; extra == "confluence"
85
+ Requires-Dist: atlassian-python-api; extra == "confluence"
86
+ Provides-Extra: couchbase
87
+ Requires-Dist: couchbase; extra == "couchbase"
88
+ Provides-Extra: delta-table
89
+ Requires-Dist: deltalake; extra == "delta-table"
90
+ Requires-Dist: fsspec; extra == "delta-table"
91
+ Provides-Extra: discord
92
+ Requires-Dist: discord-py; extra == "discord"
93
+ Provides-Extra: dropbox
94
+ Requires-Dist: fsspec; extra == "dropbox"
95
+ Requires-Dist: dropboxdrivefs; extra == "dropbox"
96
+ Provides-Extra: elasticsearch
97
+ Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
98
+ Provides-Extra: gcs
99
+ Requires-Dist: gcsfs; extra == "gcs"
100
+ Requires-Dist: fsspec; extra == "gcs"
101
+ Requires-Dist: bs4; extra == "gcs"
102
+ Provides-Extra: github
103
+ Requires-Dist: requests; extra == "github"
104
+ Requires-Dist: pygithub>1.58.0; extra == "github"
105
+ Provides-Extra: gitlab
106
+ Requires-Dist: python-gitlab; extra == "gitlab"
107
+ Provides-Extra: google-drive
108
+ Requires-Dist: google-api-python-client; extra == "google-drive"
109
+ Provides-Extra: hubspot
110
+ Requires-Dist: urllib3; extra == "hubspot"
111
+ Requires-Dist: hubspot-api-client; extra == "hubspot"
112
+ Provides-Extra: jira
113
+ Requires-Dist: atlassian-python-api; extra == "jira"
114
+ Provides-Extra: kafka
115
+ Requires-Dist: confluent-kafka; extra == "kafka"
116
+ Provides-Extra: kdbai
117
+ Requires-Dist: kdbai-client; extra == "kdbai"
118
+ Provides-Extra: milvus
119
+ Requires-Dist: pymilvus; extra == "milvus"
120
+ Provides-Extra: mongodb
121
+ Requires-Dist: pymongo; extra == "mongodb"
122
+ Provides-Extra: notion
123
+ Requires-Dist: httpx; extra == "notion"
124
+ Requires-Dist: htmlBuilder; extra == "notion"
125
+ Requires-Dist: notion-client; extra == "notion"
126
+ Requires-Dist: backoff; extra == "notion"
127
+ Provides-Extra: onedrive
128
+ Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
129
+ Requires-Dist: msal; extra == "onedrive"
130
+ Requires-Dist: bs4; extra == "onedrive"
131
+ Provides-Extra: opensearch
132
+ Requires-Dist: opensearch-py; extra == "opensearch"
133
+ Provides-Extra: outlook
134
+ Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
135
+ Requires-Dist: msal; extra == "outlook"
136
+ Provides-Extra: pinecone
137
+ Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
138
+ Provides-Extra: postgres
139
+ Requires-Dist: psycopg2-binary; extra == "postgres"
140
+ Provides-Extra: qdrant
141
+ Requires-Dist: qdrant-client; extra == "qdrant"
142
+ Provides-Extra: reddit
143
+ Requires-Dist: praw; extra == "reddit"
144
+ Provides-Extra: s3
145
+ Requires-Dist: s3fs; extra == "s3"
146
+ Requires-Dist: fsspec; extra == "s3"
147
+ Provides-Extra: sharepoint
148
+ Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
149
+ Requires-Dist: msal; extra == "sharepoint"
150
+ Provides-Extra: salesforce
151
+ Requires-Dist: simple-salesforce; extra == "salesforce"
152
+ Provides-Extra: sftp
153
+ Requires-Dist: paramiko; extra == "sftp"
154
+ Requires-Dist: fsspec; extra == "sftp"
155
+ Provides-Extra: slack
156
+ Requires-Dist: slack_sdk; extra == "slack"
157
+ Provides-Extra: wikipedia
158
+ Requires-Dist: wikipedia; extra == "wikipedia"
159
+ Provides-Extra: weaviate
160
+ Requires-Dist: weaviate-client; extra == "weaviate"
161
+ Provides-Extra: databricks-volumes
162
+ Requires-Dist: databricks-sdk; extra == "databricks-volumes"
163
+ Provides-Extra: singlestore
164
+ Requires-Dist: singlestoredb; extra == "singlestore"
165
+ Provides-Extra: vectara
166
+ Requires-Dist: requests; extra == "vectara"
167
+ Provides-Extra: embed-huggingface
168
+ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
169
+ Provides-Extra: embed-octoai
170
+ Requires-Dist: tiktoken; extra == "embed-octoai"
171
+ Requires-Dist: openai; extra == "embed-octoai"
172
+ Provides-Extra: embed-vertexai
173
+ Requires-Dist: vertexai; extra == "embed-vertexai"
174
+ Provides-Extra: embed-voyageai
175
+ Requires-Dist: voyageai; extra == "embed-voyageai"
176
+ Provides-Extra: embed-mixedbreadai
177
+ Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
178
+ Provides-Extra: openai
179
+ Requires-Dist: tiktoken; extra == "openai"
180
+ Requires-Dist: openai; extra == "openai"
181
+ Provides-Extra: bedrock
182
+ Requires-Dist: boto3; extra == "bedrock"
183
+
184
+ # Unstructured Ingest
185
+
186
+ For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.
@@ -34,7 +34,7 @@ model = MockBaseModel(
34
34
 
35
35
  def test_serialize_base_model():
36
36
 
37
- serialized_dict = model.dict()
37
+ serialized_dict = model.model_dump()
38
38
  assert isinstance(serialized_dict["secret_str"], _SecretBase)
39
39
  assert isinstance(serialized_dict["secret_child_base"], _SecretBase)
40
40
 
@@ -57,7 +57,7 @@ def test_serialize_base_model():
57
57
 
58
58
 
59
59
  def test_serialize_base_model_json():
60
- serialized_json = model.json()
60
+ serialized_json = model.model_dump_json()
61
61
  serialized_dict = json.loads(serialized_json)
62
62
  expected_dict = {
63
63
  "secret_str": "**********",
@@ -0,0 +1 @@
1
+ __version__ = "0.0.22" # pragma: no cover
@@ -0,0 +1,107 @@
1
+ import json
2
+ import os
3
+ from dataclasses import dataclass
4
+ from typing import TYPE_CHECKING
5
+
6
+ import numpy as np
7
+ from pydantic import Field, SecretStr
8
+
9
+ from unstructured_ingest.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig
10
+ from unstructured_ingest.utils.dep_check import requires_dependencies
11
+
12
+ if TYPE_CHECKING:
13
+ from botocore.client import BaseClient
14
+
15
+ class BedrockClient(BaseClient):
16
+ def invoke_model(self, body: str, modelId: str, trace: str) -> dict:
17
+ pass
18
+
19
+
20
+ class BedrockEmbeddingConfig(EmbeddingConfig):
21
+ aws_access_key_id: SecretStr
22
+ aws_secret_access_key: SecretStr
23
+ region_name: str = "us-west-2"
24
+ embed_model_name: str = Field(default="amazon.titan-embed-text-v1", alias="model_name")
25
+
26
+ @requires_dependencies(
27
+ ["boto3", "numpy", "botocore"],
28
+ extras="bedrock",
29
+ )
30
+ def get_client(self) -> "BedrockClient":
31
+ # delay import only when needed
32
+ import boto3
33
+
34
+ bedrock_client = boto3.client(
35
+ service_name="bedrock-runtime",
36
+ aws_access_key_id=self.aws_access_key_id.get_secret_value(),
37
+ aws_secret_access_key=self.aws_secret_access_key.get_secret_value(),
38
+ region_name=self.region_name,
39
+ )
40
+
41
+ return bedrock_client
42
+
43
+
44
+ @dataclass
45
+ class BedrockEmbeddingEncoder(BaseEmbeddingEncoder):
46
+ config: BedrockEmbeddingConfig
47
+
48
+ def get_exemplary_embedding(self) -> list[float]:
49
+ return self.embed_query(query="Q")
50
+
51
+ def num_of_dimensions(self) -> tuple[int, ...]:
52
+ exemplary_embedding = self.get_exemplary_embedding()
53
+ return np.shape(exemplary_embedding)
54
+
55
+ def is_unit_vector(self) -> bool:
56
+ exemplary_embedding = self.get_exemplary_embedding()
57
+ return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
58
+
59
+ def embed_query(self, query: str) -> list[float]:
60
+ """Call out to Bedrock embedding endpoint."""
61
+ # replace newlines, which can negatively affect performance.
62
+ text = query.replace(os.linesep, " ")
63
+
64
+ # format input body for provider
65
+ provider = self.config.embed_model_name.split(".")[0]
66
+ input_body = {}
67
+ if provider == "cohere":
68
+ if "input_type" not in input_body:
69
+ input_body["input_type"] = "search_document"
70
+ input_body["texts"] = [text]
71
+ else:
72
+ # includes common provider == "amazon"
73
+ input_body["inputText"] = text
74
+ body = json.dumps(input_body)
75
+
76
+ try:
77
+ bedrock_client = self.config.get_client()
78
+ # invoke bedrock API
79
+ response = bedrock_client.invoke_model(
80
+ body=body,
81
+ modelId=self.config.embed_model_name,
82
+ accept="application/json",
83
+ contentType="application/json",
84
+ )
85
+
86
+ # format output based on provider
87
+ response_body = json.loads(response.get("body").read())
88
+ if provider == "cohere":
89
+ return response_body.get("embeddings")[0]
90
+ else:
91
+ # includes common provider == "amazon"
92
+ return response_body.get("embedding")
93
+ except Exception as e:
94
+ raise ValueError(f"Error raised by inference endpoint: {e}")
95
+
96
+ def embed_documents(self, elements: list[dict]) -> list[dict]:
97
+ embeddings = [self.embed_query(query=e.get("text", "")) for e in elements]
98
+ elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
99
+ return elements_with_embeddings
100
+
101
+ def _add_embeddings_to_elements(self, elements, embeddings) -> list[dict]:
102
+ assert len(elements) == len(embeddings)
103
+ elements_w_embedding = []
104
+ for i, element in enumerate(elements):
105
+ element["embeddings"] = embeddings[i]
106
+ elements_w_embedding.append(element)
107
+ return elements
@@ -1,5 +1,5 @@
1
1
  from dataclasses import dataclass
2
- from typing import TYPE_CHECKING, List, Optional
2
+ from typing import TYPE_CHECKING, Optional
3
3
 
4
4
  import numpy as np
5
5
  from pydantic import Field
@@ -8,7 +8,7 @@ from unstructured_ingest.embed.interfaces import BaseEmbeddingEncoder, Embedding
8
8
  from unstructured_ingest.utils.dep_check import requires_dependencies
9
9
 
10
10
  if TYPE_CHECKING:
11
- from langchain_huggingface.embeddings import HuggingFaceEmbeddings
11
+ from sentence_transformers import SentenceTransformer
12
12
 
13
13
 
14
14
  class HuggingFaceEmbeddingConfig(EmbeddingConfig):
@@ -19,51 +19,51 @@ class HuggingFaceEmbeddingConfig(EmbeddingConfig):
19
19
  default_factory=lambda: {"device": "cpu"}, alias="model_kwargs"
20
20
  )
21
21
  encode_kwargs: Optional[dict] = Field(default_factory=lambda: {"normalize_embeddings": False})
22
- cache_folder: Optional[dict] = Field(default=None)
22
+ cache_folder: Optional[str] = Field(default=None)
23
23
 
24
24
  @requires_dependencies(
25
- ["langchain_huggingface"],
25
+ ["sentence_transformers"],
26
26
  extras="embed-huggingface",
27
27
  )
28
- def get_client(self) -> "HuggingFaceEmbeddings":
29
- """Creates a langchain Huggingface python client to embed elements."""
30
- from langchain_huggingface.embeddings import HuggingFaceEmbeddings
31
-
32
- client = HuggingFaceEmbeddings(
33
- model_name=self.embedder_model_name,
34
- model_kwargs=self.embedder_model_kwargs,
35
- encode_kwargs=self.encode_kwargs,
28
+ def get_client(self) -> "SentenceTransformer":
29
+ from sentence_transformers import SentenceTransformer
30
+
31
+ return SentenceTransformer(
32
+ model_name_or_path=self.embedder_model_name,
36
33
  cache_folder=self.cache_folder,
34
+ **self.embedder_model_kwargs,
37
35
  )
38
- return client
39
36
 
40
37
 
41
38
  @dataclass
42
39
  class HuggingFaceEmbeddingEncoder(BaseEmbeddingEncoder):
43
40
  config: HuggingFaceEmbeddingConfig
44
41
 
45
- def get_exemplary_embedding(self) -> List[float]:
42
+ def get_exemplary_embedding(self) -> list[float]:
46
43
  return self.embed_query(query="Q")
47
44
 
48
- def num_of_dimensions(self):
45
+ def num_of_dimensions(self) -> tuple[int, ...]:
49
46
  exemplary_embedding = self.get_exemplary_embedding()
50
47
  return np.shape(exemplary_embedding)
51
48
 
52
- def is_unit_vector(self):
49
+ def is_unit_vector(self) -> bool:
53
50
  exemplary_embedding = self.get_exemplary_embedding()
54
51
  return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
55
52
 
56
- def embed_query(self, query):
57
- client = self.config.get_client()
58
- return client.embed_query(str(query))
53
+ def embed_query(self, query: str) -> list[float]:
54
+ return self._embed_documents(texts=[query])[0]
59
55
 
60
- def embed_documents(self, elements: List[dict]) -> List[dict]:
56
+ def _embed_documents(self, texts: list[str]) -> list[list[float]]:
61
57
  client = self.config.get_client()
62
- embeddings = client.embed_documents([e.get("text", "") for e in elements])
58
+ embeddings = client.encode(texts, **self.config.encode_kwargs)
59
+ return embeddings.tolist()
60
+
61
+ def embed_documents(self, elements: list[dict]) -> list[dict]:
62
+ embeddings = self._embed_documents([e.get("text", "") for e in elements])
63
63
  elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
64
64
  return elements_with_embeddings
65
65
 
66
- def _add_embeddings_to_elements(self, elements: list[dict], embeddings: list) -> List[dict]:
66
+ def _add_embeddings_to_elements(self, elements: list[dict], embeddings: list) -> list[dict]:
67
67
  assert len(elements) == len(embeddings)
68
68
  elements_w_embedding = []
69
69
 
@@ -1,6 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from dataclasses import dataclass
3
- from typing import List, Tuple
4
3
 
5
4
  from pydantic import BaseModel
6
5
 
@@ -19,7 +18,7 @@ class BaseEmbeddingEncoder(ABC):
19
18
 
20
19
  @property
21
20
  @abstractmethod
22
- def num_of_dimensions(self) -> Tuple[int]:
21
+ def num_of_dimensions(self) -> tuple[int, ...]:
23
22
  """Number of dimensions for the embedding vector."""
24
23
 
25
24
  @property
@@ -28,9 +27,17 @@ class BaseEmbeddingEncoder(ABC):
28
27
  """Denotes if the embedding vector is a unit vector."""
29
28
 
30
29
  @abstractmethod
31
- def embed_documents(self, elements: List[dict]) -> List[dict]:
30
+ def embed_documents(self, elements: list[dict]) -> list[dict]:
32
31
  pass
33
32
 
34
33
  @abstractmethod
35
- def embed_query(self, query: str) -> List[float]:
34
+ def embed_query(self, query: str) -> list[float]:
36
35
  pass
36
+
37
+ def _embed_documents(self, elements: list[str]) -> list[list[float]]:
38
+ results = []
39
+ for text in elements:
40
+ response = self.embed_query(query=text)
41
+ results.append(response)
42
+
43
+ return results
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  from dataclasses import dataclass, field
3
- from typing import TYPE_CHECKING, List, Optional
3
+ from typing import TYPE_CHECKING, Optional
4
4
 
5
5
  import numpy as np
6
6
  from pydantic import Field, SecretStr
@@ -67,10 +67,10 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
67
67
 
68
68
  config: MixedbreadAIEmbeddingConfig
69
69
 
70
- _exemplary_embedding: Optional[List[float]] = field(init=False, default=None)
70
+ _exemplary_embedding: Optional[list[float]] = field(init=False, default=None)
71
71
  _request_options: Optional["RequestOptions"] = field(init=False, default=None)
72
72
 
73
- def get_exemplary_embedding(self) -> List[float]:
73
+ def get_exemplary_embedding(self) -> list[float]:
74
74
  """Get an exemplary embedding to determine dimensions and unit vector status."""
75
75
  return self._embed(["Q"])[0]
76
76
 
@@ -91,7 +91,7 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
91
91
  )
92
92
 
93
93
  @property
94
- def num_of_dimensions(self):
94
+ def num_of_dimensions(self) -> tuple[int, ...]:
95
95
  """Get the number of dimensions for the embeddings."""
96
96
  exemplary_embedding = self.get_exemplary_embedding()
97
97
  return np.shape(exemplary_embedding)
@@ -102,15 +102,15 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
102
102
  exemplary_embedding = self.get_exemplary_embedding()
103
103
  return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
104
104
 
105
- def _embed(self, texts: List[str]) -> List[List[float]]:
105
+ def _embed(self, texts: list[str]) -> list[list[float]]:
106
106
  """
107
107
  Embed a list of texts using the Mixedbread AI API.
108
108
 
109
109
  Args:
110
- texts (List[str]): List of texts to embed.
110
+ texts (list[str]): List of texts to embed.
111
111
 
112
112
  Returns:
113
- List[List[float]]: List of embeddings.
113
+ list[list[float]]: List of embeddings.
114
114
  """
115
115
  batch_size = BATCH_SIZE
116
116
  batch_itr = range(0, len(texts), batch_size)
@@ -132,17 +132,17 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
132
132
 
133
133
  @staticmethod
134
134
  def _add_embeddings_to_elements(
135
- elements: List[dict], embeddings: List[List[float]]
136
- ) -> List[dict]:
135
+ elements: list[dict], embeddings: list[list[float]]
136
+ ) -> list[dict]:
137
137
  """
138
138
  Add embeddings to elements.
139
139
 
140
140
  Args:
141
- elements (List[Element]): List of elements.
142
- embeddings (List[List[float]]): List of embeddings.
141
+ elements (list[Element]): List of elements.
142
+ embeddings (list[list[float]]): List of embeddings.
143
143
 
144
144
  Returns:
145
- List[Element]: Elements with embeddings added.
145
+ list[Element]: Elements with embeddings added.
146
146
  """
147
147
  assert len(elements) == len(embeddings)
148
148
  elements_w_embedding = []
@@ -151,20 +151,20 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
151
151
  elements_w_embedding.append(element)
152
152
  return elements
153
153
 
154
- def embed_documents(self, elements: List[dict]) -> List[dict]:
154
+ def embed_documents(self, elements: list[dict]) -> list[dict]:
155
155
  """
156
156
  Embed a list of document elements.
157
157
 
158
158
  Args:
159
- elements (List[Element]): List of document elements.
159
+ elements (list[Element]): List of document elements.
160
160
 
161
161
  Returns:
162
- List[Element]: Elements with embeddings.
162
+ list[Element]: Elements with embeddings.
163
163
  """
164
164
  embeddings = self._embed([e.get("text", "") for e in elements])
165
165
  return self._add_embeddings_to_elements(elements, embeddings)
166
166
 
167
- def embed_query(self, query: str) -> List[float]:
167
+ def embed_query(self, query: str) -> list[float]:
168
168
  """
169
169
  Embed a query string.
170
170
 
@@ -172,6 +172,6 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
172
172
  query (str): Query string to embed.
173
173
 
174
174
  Returns:
175
- List[float]: Embedding of the query.
175
+ list[float]: Embedding of the query.
176
176
  """
177
177
  return self._embed([query])[0]
@@ -1,5 +1,5 @@
1
1
  from dataclasses import dataclass, field
2
- from typing import TYPE_CHECKING, List, Optional
2
+ from typing import TYPE_CHECKING, Optional
3
3
 
4
4
  import numpy as np
5
5
  from pydantic import Field, SecretStr
@@ -31,16 +31,16 @@ class OctoAiEmbeddingConfig(EmbeddingConfig):
31
31
  class OctoAIEmbeddingEncoder(BaseEmbeddingEncoder):
32
32
  config: OctoAiEmbeddingConfig
33
33
  # Uses the OpenAI SDK
34
- _exemplary_embedding: Optional[List[float]] = field(init=False, default=None)
34
+ _exemplary_embedding: Optional[list[float]] = field(init=False, default=None)
35
35
 
36
- def get_exemplary_embedding(self) -> List[float]:
36
+ def get_exemplary_embedding(self) -> list[float]:
37
37
  return self.embed_query("Q")
38
38
 
39
- def num_of_dimensions(self):
39
+ def num_of_dimensions(self) -> tuple[int, ...]:
40
40
  exemplary_embedding = self.get_exemplary_embedding()
41
41
  return np.shape(exemplary_embedding)
42
42
 
43
- def is_unit_vector(self):
43
+ def is_unit_vector(self) -> bool:
44
44
  exemplary_embedding = self.get_exemplary_embedding()
45
45
  return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
46
46
 
@@ -49,12 +49,12 @@ class OctoAIEmbeddingEncoder(BaseEmbeddingEncoder):
49
49
  response = client.embeddings.create(input=query, model=self.config.embedder_model_name)
50
50
  return response.data[0].embedding
51
51
 
52
- def embed_documents(self, elements: List[dict]) -> List[dict]:
52
+ def embed_documents(self, elements: list[dict]) -> list[dict]:
53
53
  embeddings = [self.embed_query(e.get("text", "")) for e in elements]
54
54
  elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
55
55
  return elements_with_embeddings
56
56
 
57
- def _add_embeddings_to_elements(self, elements, embeddings) -> List[dict]:
57
+ def _add_embeddings_to_elements(self, elements, embeddings) -> list[dict]:
58
58
  assert len(elements) == len(embeddings)
59
59
  elements_w_embedding = []
60
60
  for i, element in enumerate(elements):
@@ -1,5 +1,5 @@
1
1
  from dataclasses import dataclass
2
- from typing import TYPE_CHECKING, List
2
+ from typing import TYPE_CHECKING
3
3
 
4
4
  import numpy as np
5
5
  from pydantic import Field, SecretStr
@@ -8,51 +8,46 @@ from unstructured_ingest.embed.interfaces import BaseEmbeddingEncoder, Embedding
8
8
  from unstructured_ingest.utils.dep_check import requires_dependencies
9
9
 
10
10
  if TYPE_CHECKING:
11
- from langchain_openai.embeddings import OpenAIEmbeddings
11
+ from openai import OpenAI
12
12
 
13
13
 
14
14
  class OpenAIEmbeddingConfig(EmbeddingConfig):
15
15
  api_key: SecretStr
16
16
  embedder_model_name: str = Field(default="text-embedding-ada-002", alias="model_name")
17
17
 
18
- @requires_dependencies(["langchain_openai"], extras="openai")
19
- def get_client(self) -> "OpenAIEmbeddings":
20
- """Creates a langchain OpenAI python client to embed elements."""
21
- from langchain_openai import OpenAIEmbeddings
18
+ @requires_dependencies(["openai"], extras="openai")
19
+ def get_client(self) -> "OpenAI":
20
+ from openai import OpenAI
22
21
 
23
- openai_client = OpenAIEmbeddings(
24
- openai_api_key=self.api_key.get_secret_value(),
25
- model=self.embedder_model_name, # type:ignore
26
- )
27
- return openai_client
22
+ return OpenAI(api_key=self.api_key.get_secret_value())
28
23
 
29
24
 
30
25
  @dataclass
31
26
  class OpenAIEmbeddingEncoder(BaseEmbeddingEncoder):
32
27
  config: OpenAIEmbeddingConfig
33
28
 
34
- def get_exemplary_embedding(self) -> List[float]:
29
+ def get_exemplary_embedding(self) -> list[float]:
35
30
  return self.embed_query(query="Q")
36
31
 
37
- def num_of_dimensions(self):
32
+ def num_of_dimensions(self) -> tuple[int, ...]:
38
33
  exemplary_embedding = self.get_exemplary_embedding()
39
34
  return np.shape(exemplary_embedding)
40
35
 
41
- def is_unit_vector(self):
36
+ def is_unit_vector(self) -> bool:
42
37
  exemplary_embedding = self.get_exemplary_embedding()
43
38
  return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
44
39
 
45
- def embed_query(self, query):
40
+ def embed_query(self, query: str) -> list[float]:
46
41
  client = self.config.get_client()
47
- return client.embed_query(str(query))
42
+ response = client.embeddings.create(input=query, model=self.config.embedder_model_name)
43
+ return response.data[0].embedding
48
44
 
49
- def embed_documents(self, elements: List[dict]) -> List[dict]:
50
- client = self.config.get_client()
51
- embeddings = client.embed_documents([e.get("text", "") for e in elements])
45
+ def embed_documents(self, elements: list[dict]) -> list[dict]:
46
+ embeddings = self._embed_documents([e.get("text", "") for e in elements])
52
47
  elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
53
48
  return elements_with_embeddings
54
49
 
55
- def _add_embeddings_to_elements(self, elements, embeddings) -> List[dict]:
50
+ def _add_embeddings_to_elements(self, elements, embeddings) -> list[dict]:
56
51
  assert len(elements) == len(embeddings)
57
52
  elements_w_embedding = []
58
53
  for i, element in enumerate(elements):