unstructured-ingest 0.0.24__tar.gz → 0.0.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (361) hide show
  1. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/PKG-INFO +16 -16
  2. unstructured_ingest-0.0.25/unstructured_ingest/__version__.py +1 -0
  3. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/dep_check.py +12 -0
  4. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/__init__.py +12 -1
  5. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/databricks_volumes.py +125 -32
  6. unstructured_ingest-0.0.25/unstructured_ingest/v2/processes/connectors/outlook.py +239 -0
  7. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/pinecone.py +9 -1
  8. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest.egg-info/PKG-INFO +16 -16
  9. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest.egg-info/SOURCES.txt +1 -0
  10. unstructured_ingest-0.0.24/unstructured_ingest/__version__.py +0 -1
  11. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/LICENSE.md +0 -0
  12. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/README.md +0 -0
  13. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/pyproject.toml +0 -0
  14. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/setup.cfg +0 -0
  15. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/setup.py +0 -0
  16. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/test/test_chunking_utils.py +0 -0
  17. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/test/test_error.py +0 -0
  18. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/test/test_interfaces.py +0 -0
  19. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/test/test_logger.py +0 -0
  20. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/test/test_utils.py +0 -0
  21. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/test/test_utils_v2.py +0 -0
  22. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/__init__.py +0 -0
  23. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/__init__.py +0 -0
  24. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/base/__init__.py +0 -0
  25. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/base/cmd.py +0 -0
  26. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/base/dest.py +0 -0
  27. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/base/src.py +0 -0
  28. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cli.py +0 -0
  29. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmd_factory.py +0 -0
  30. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/__init__.py +0 -0
  31. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/airtable.py +0 -0
  32. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/astradb.py +0 -0
  33. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/azure_cognitive_search.py +0 -0
  34. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/biomed.py +0 -0
  35. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/chroma.py +0 -0
  36. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/clarifai.py +0 -0
  37. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/confluence.py +0 -0
  38. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/databricks_volumes.py +0 -0
  39. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/delta_table.py +0 -0
  40. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/discord.py +0 -0
  41. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/elasticsearch.py +0 -0
  42. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
  43. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/azure.py +0 -0
  44. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/box.py +0 -0
  45. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -0
  46. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -0
  47. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -0
  48. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/s3.py +0 -0
  49. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -0
  50. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/github.py +0 -0
  51. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/gitlab.py +0 -0
  52. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/google_drive.py +0 -0
  53. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/hubspot.py +0 -0
  54. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/jira.py +0 -0
  55. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/kafka.py +0 -0
  56. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/local.py +0 -0
  57. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/mongodb.py +0 -0
  58. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/notion.py +0 -0
  59. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/onedrive.py +0 -0
  60. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/opensearch.py +0 -0
  61. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/outlook.py +0 -0
  62. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/pinecone.py +0 -0
  63. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/qdrant.py +0 -0
  64. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/reddit.py +0 -0
  65. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/salesforce.py +0 -0
  66. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/sharepoint.py +0 -0
  67. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/slack.py +0 -0
  68. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/sql.py +0 -0
  69. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/vectara.py +0 -0
  70. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/weaviate.py +0 -0
  71. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/wikipedia.py +0 -0
  72. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/common.py +0 -0
  73. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/interfaces.py +0 -0
  74. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/utils.py +0 -0
  75. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/__init__.py +0 -0
  76. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/airtable.py +0 -0
  77. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/astradb.py +0 -0
  78. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/azure_cognitive_search.py +0 -0
  79. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/biomed.py +0 -0
  80. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/chroma.py +0 -0
  81. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/clarifai.py +0 -0
  82. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/confluence.py +0 -0
  83. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/databricks_volumes.py +0 -0
  84. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/delta_table.py +0 -0
  85. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/discord.py +0 -0
  86. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/elasticsearch.py +0 -0
  87. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/__init__.py +0 -0
  88. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/azure.py +0 -0
  89. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/box.py +0 -0
  90. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/dropbox.py +0 -0
  91. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/fsspec.py +0 -0
  92. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/gcs.py +0 -0
  93. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/s3.py +0 -0
  94. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/sftp.py +0 -0
  95. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/git.py +0 -0
  96. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/github.py +0 -0
  97. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/gitlab.py +0 -0
  98. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/google_drive.py +0 -0
  99. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/hubspot.py +0 -0
  100. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/jira.py +0 -0
  101. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/kafka.py +0 -0
  102. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/local.py +0 -0
  103. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/mongodb.py +0 -0
  104. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/__init__.py +0 -0
  105. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/client.py +0 -0
  106. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/connector.py +0 -0
  107. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/helpers.py +0 -0
  108. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/interfaces.py +0 -0
  109. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/__init__.py +0 -0
  110. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/block.py +0 -0
  111. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/__init__.py +0 -0
  112. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -0
  113. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +0 -0
  114. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +0 -0
  115. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/callout.py +0 -0
  116. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -0
  117. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/child_page.py +0 -0
  118. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/code.py +0 -0
  119. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -0
  120. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/divider.py +0 -0
  121. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/embed.py +0 -0
  122. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/equation.py +0 -0
  123. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/file.py +0 -0
  124. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/heading.py +0 -0
  125. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/image.py +0 -0
  126. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -0
  127. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/link_to_page.py +0 -0
  128. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -0
  129. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/paragraph.py +0 -0
  130. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/pdf.py +0 -0
  131. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/quote.py +0 -0
  132. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -0
  133. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/table.py +0 -0
  134. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -0
  135. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/template.py +0 -0
  136. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/todo.py +0 -0
  137. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/toggle.py +0 -0
  138. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -0
  139. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/video.py +0 -0
  140. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database.py +0 -0
  141. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -0
  142. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -0
  143. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/created_by.py +0 -0
  144. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/created_time.py +0 -0
  145. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/date.py +0 -0
  146. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/email.py +0 -0
  147. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/files.py +0 -0
  148. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -0
  149. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +0 -0
  150. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -0
  151. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -0
  152. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/number.py +0 -0
  153. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/people.py +0 -0
  154. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -0
  155. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -0
  156. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/rich_text.py +0 -0
  157. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/rollup.py +0 -0
  158. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/select.py +0 -0
  159. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/status.py +0 -0
  160. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/title.py +0 -0
  161. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -0
  162. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/url.py +0 -0
  163. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/verification.py +0 -0
  164. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/date.py +0 -0
  165. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/file.py +0 -0
  166. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/page.py +0 -0
  167. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/parent.py +0 -0
  168. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/rich_text.py +0 -0
  169. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/user.py +0 -0
  170. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/onedrive.py +0 -0
  171. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/opensearch.py +0 -0
  172. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/outlook.py +0 -0
  173. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/pinecone.py +0 -0
  174. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/qdrant.py +0 -0
  175. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/reddit.py +0 -0
  176. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/registry.py +0 -0
  177. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/salesforce.py +0 -0
  178. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/sharepoint.py +0 -0
  179. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/slack.py +0 -0
  180. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/sql.py +0 -0
  181. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/vectara.py +0 -0
  182. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/weaviate.py +0 -0
  183. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/wikipedia.py +0 -0
  184. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/__init__.py +0 -0
  185. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/bedrock.py +0 -0
  186. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/huggingface.py +0 -0
  187. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/interfaces.py +0 -0
  188. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/mixedbreadai.py +0 -0
  189. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/octoai.py +0 -0
  190. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/openai.py +0 -0
  191. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/vertexai.py +0 -0
  192. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/voyageai.py +0 -0
  193. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/enhanced_dataclass/__init__.py +0 -0
  194. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/enhanced_dataclass/core.py +0 -0
  195. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -0
  196. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -0
  197. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/error.py +0 -0
  198. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/ingest_backoff/__init__.py +0 -0
  199. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/ingest_backoff/_common.py +0 -0
  200. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/ingest_backoff/_wrapper.py +0 -0
  201. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/interfaces.py +0 -0
  202. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/logger.py +0 -0
  203. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/main.py +0 -0
  204. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/__init__.py +0 -0
  205. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/copy.py +0 -0
  206. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/doc_factory.py +0 -0
  207. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/interfaces.py +0 -0
  208. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/partition.py +0 -0
  209. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/permissions.py +0 -0
  210. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/pipeline.py +0 -0
  211. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/reformat/__init__.py +0 -0
  212. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/reformat/chunking.py +0 -0
  213. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/reformat/embedding.py +0 -0
  214. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/source.py +0 -0
  215. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/utils.py +0 -0
  216. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/write.py +0 -0
  217. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/processor.py +0 -0
  218. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/__init__.py +0 -0
  219. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/airtable.py +0 -0
  220. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/astradb.py +0 -0
  221. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/base_runner.py +0 -0
  222. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/biomed.py +0 -0
  223. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/confluence.py +0 -0
  224. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/delta_table.py +0 -0
  225. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/discord.py +0 -0
  226. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/elasticsearch.py +0 -0
  227. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/__init__.py +0 -0
  228. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/azure.py +0 -0
  229. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/box.py +0 -0
  230. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/dropbox.py +0 -0
  231. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/fsspec.py +0 -0
  232. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/gcs.py +0 -0
  233. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/s3.py +0 -0
  234. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/sftp.py +0 -0
  235. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/github.py +0 -0
  236. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/gitlab.py +0 -0
  237. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/google_drive.py +0 -0
  238. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/hubspot.py +0 -0
  239. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/jira.py +0 -0
  240. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/kafka.py +0 -0
  241. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/local.py +0 -0
  242. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/mongodb.py +0 -0
  243. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/notion.py +0 -0
  244. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/onedrive.py +0 -0
  245. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/opensearch.py +0 -0
  246. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/outlook.py +0 -0
  247. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/reddit.py +0 -0
  248. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/salesforce.py +0 -0
  249. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/sharepoint.py +0 -0
  250. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/slack.py +0 -0
  251. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/utils.py +0 -0
  252. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/wikipedia.py +0 -0
  253. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/__init__.py +0 -0
  254. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/astradb.py +0 -0
  255. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/azure_cognitive_search.py +0 -0
  256. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/base_writer.py +0 -0
  257. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/chroma.py +0 -0
  258. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/clarifai.py +0 -0
  259. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/databricks_volumes.py +0 -0
  260. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/delta_table.py +0 -0
  261. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/elasticsearch.py +0 -0
  262. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
  263. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/fsspec/azure.py +0 -0
  264. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/fsspec/box.py +0 -0
  265. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -0
  266. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/fsspec/gcs.py +0 -0
  267. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/fsspec/s3.py +0 -0
  268. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/kafka.py +0 -0
  269. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/mongodb.py +0 -0
  270. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/opensearch.py +0 -0
  271. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/pinecone.py +0 -0
  272. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/qdrant.py +0 -0
  273. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/sql.py +0 -0
  274. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/vectara.py +0 -0
  275. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/weaviate.py +0 -0
  276. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/__init__.py +0 -0
  277. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/chunking.py +0 -0
  278. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/compression.py +0 -0
  279. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/data_prep.py +0 -0
  280. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/google_filetype.py +0 -0
  281. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
  282. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/table.py +0 -0
  283. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/__init__.py +0 -0
  284. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/__init__.py +0 -0
  285. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/base/__init__.py +0 -0
  286. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/base/cmd.py +0 -0
  287. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/base/dest.py +0 -0
  288. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/base/importer.py +0 -0
  289. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/base/src.py +0 -0
  290. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/cli.py +0 -0
  291. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/cmds.py +0 -0
  292. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  293. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/utils/click.py +0 -0
  294. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/utils/model_conversion.py +0 -0
  295. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/__init__.py +0 -0
  296. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/connector.py +0 -0
  297. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/downloader.py +0 -0
  298. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/file_data.py +0 -0
  299. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/indexer.py +0 -0
  300. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/process.py +0 -0
  301. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/processor.py +0 -0
  302. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/upload_stager.py +0 -0
  303. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/uploader.py +0 -0
  304. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/logger.py +0 -0
  305. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/main.py +0 -0
  306. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/otel.py +0 -0
  307. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/__init__.py +0 -0
  308. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/interfaces.py +0 -0
  309. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/otel.py +0 -0
  310. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/pipeline.py +0 -0
  311. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  312. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/chunk.py +0 -0
  313. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/download.py +0 -0
  314. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/embed.py +0 -0
  315. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/filter.py +0 -0
  316. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/index.py +0 -0
  317. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/partition.py +0 -0
  318. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/stage.py +0 -0
  319. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/uncompress.py +0 -0
  320. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/upload.py +0 -0
  321. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/__init__.py +0 -0
  322. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/chunker.py +0 -0
  323. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connector_registry.py +0 -0
  324. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/airtable.py +0 -0
  325. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/astradb.py +0 -0
  326. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +0 -0
  327. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/chroma.py +0 -0
  328. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/couchbase.py +0 -0
  329. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/elasticsearch.py +0 -0
  330. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +0 -0
  331. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/azure.py +0 -0
  332. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/box.py +0 -0
  333. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +0 -0
  334. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +0 -0
  335. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +0 -0
  336. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/s3.py +0 -0
  337. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +0 -0
  338. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/utils.py +0 -0
  339. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/google_drive.py +0 -0
  340. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/kdbai.py +0 -0
  341. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/local.py +0 -0
  342. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/milvus.py +0 -0
  343. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/mongodb.py +0 -0
  344. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/onedrive.py +0 -0
  345. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/opensearch.py +0 -0
  346. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/salesforce.py +0 -0
  347. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/sharepoint.py +0 -0
  348. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/singlestore.py +0 -0
  349. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/sql.py +0 -0
  350. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/utils.py +0 -0
  351. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/weaviate.py +0 -0
  352. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/embedder.py +0 -0
  353. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/filter.py +0 -0
  354. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/partitioner.py +0 -0
  355. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/uncompress.py +0 -0
  356. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/unstructured_api.py +0 -0
  357. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/utils.py +0 -0
  358. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest.egg-info/dependency_links.txt +0 -0
  359. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest.egg-info/entry_points.txt +0 -0
  360. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest.egg-info/requires.txt +15 -15
  361. {unstructured_ingest-0.0.24 → unstructured_ingest-0.0.25}/unstructured_ingest.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.0.24
3
+ Version: 0.0.25
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -23,11 +23,11 @@ Requires-Python: >=3.9.0,<3.13
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
25
  Requires-Dist: dataclasses_json
26
- Requires-Dist: tqdm
27
- Requires-Dist: pydantic>=2.7
28
26
  Requires-Dist: click
29
27
  Requires-Dist: pandas
30
28
  Requires-Dist: python-dateutil
29
+ Requires-Dist: tqdm
30
+ Requires-Dist: pydantic>=2.7
31
31
  Requires-Dist: opentelemetry-sdk
32
32
  Provides-Extra: remote
33
33
  Requires-Dist: unstructured-client>=0.25.8; extra == "remote"
@@ -66,16 +66,16 @@ Requires-Dist: pyairtable; extra == "airtable"
66
66
  Provides-Extra: astradb
67
67
  Requires-Dist: astrapy; extra == "astradb"
68
68
  Provides-Extra: azure
69
- Requires-Dist: fsspec; extra == "azure"
70
69
  Requires-Dist: adlfs; extra == "azure"
70
+ Requires-Dist: fsspec; extra == "azure"
71
71
  Provides-Extra: azure-cognitive-search
72
72
  Requires-Dist: azure-search-documents; extra == "azure-cognitive-search"
73
73
  Provides-Extra: biomed
74
- Requires-Dist: bs4; extra == "biomed"
75
74
  Requires-Dist: requests; extra == "biomed"
75
+ Requires-Dist: bs4; extra == "biomed"
76
76
  Provides-Extra: box
77
- Requires-Dist: boxfs; extra == "box"
78
77
  Requires-Dist: fsspec; extra == "box"
78
+ Requires-Dist: boxfs; extra == "box"
79
79
  Provides-Extra: chroma
80
80
  Requires-Dist: chromadb; extra == "chroma"
81
81
  Provides-Extra: clarifai
@@ -96,8 +96,8 @@ Requires-Dist: dropboxdrivefs; extra == "dropbox"
96
96
  Provides-Extra: elasticsearch
97
97
  Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
98
98
  Provides-Extra: gcs
99
- Requires-Dist: bs4; extra == "gcs"
100
99
  Requires-Dist: gcsfs; extra == "gcs"
100
+ Requires-Dist: bs4; extra == "gcs"
101
101
  Requires-Dist: fsspec; extra == "gcs"
102
102
  Provides-Extra: github
103
103
  Requires-Dist: pygithub>1.58.0; extra == "github"
@@ -107,8 +107,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
107
107
  Provides-Extra: google-drive
108
108
  Requires-Dist: google-api-python-client; extra == "google-drive"
109
109
  Provides-Extra: hubspot
110
- Requires-Dist: urllib3; extra == "hubspot"
111
110
  Requires-Dist: hubspot-api-client; extra == "hubspot"
111
+ Requires-Dist: urllib3; extra == "hubspot"
112
112
  Provides-Extra: jira
113
113
  Requires-Dist: atlassian-python-api; extra == "jira"
114
114
  Provides-Extra: kafka
@@ -120,19 +120,19 @@ Requires-Dist: pymilvus; extra == "milvus"
120
120
  Provides-Extra: mongodb
121
121
  Requires-Dist: pymongo; extra == "mongodb"
122
122
  Provides-Extra: notion
123
- Requires-Dist: notion-client; extra == "notion"
124
123
  Requires-Dist: httpx; extra == "notion"
125
- Requires-Dist: htmlBuilder; extra == "notion"
126
124
  Requires-Dist: backoff; extra == "notion"
125
+ Requires-Dist: notion-client; extra == "notion"
126
+ Requires-Dist: htmlBuilder; extra == "notion"
127
127
  Provides-Extra: onedrive
128
+ Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
128
129
  Requires-Dist: bs4; extra == "onedrive"
129
130
  Requires-Dist: msal; extra == "onedrive"
130
- Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
131
131
  Provides-Extra: opensearch
132
132
  Requires-Dist: opensearch-py; extra == "opensearch"
133
133
  Provides-Extra: outlook
134
- Requires-Dist: msal; extra == "outlook"
135
134
  Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
135
+ Requires-Dist: msal; extra == "outlook"
136
136
  Provides-Extra: pinecone
137
137
  Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
138
138
  Provides-Extra: postgres
@@ -145,13 +145,13 @@ Provides-Extra: s3
145
145
  Requires-Dist: fsspec; extra == "s3"
146
146
  Requires-Dist: s3fs; extra == "s3"
147
147
  Provides-Extra: sharepoint
148
- Requires-Dist: msal; extra == "sharepoint"
149
148
  Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
149
+ Requires-Dist: msal; extra == "sharepoint"
150
150
  Provides-Extra: salesforce
151
151
  Requires-Dist: simple-salesforce; extra == "salesforce"
152
152
  Provides-Extra: sftp
153
- Requires-Dist: fsspec; extra == "sftp"
154
153
  Requires-Dist: paramiko; extra == "sftp"
154
+ Requires-Dist: fsspec; extra == "sftp"
155
155
  Provides-Extra: slack
156
156
  Requires-Dist: slack_sdk; extra == "slack"
157
157
  Provides-Extra: wikipedia
@@ -167,8 +167,8 @@ Requires-Dist: requests; extra == "vectara"
167
167
  Provides-Extra: embed-huggingface
168
168
  Requires-Dist: sentence-transformers; extra == "embed-huggingface"
169
169
  Provides-Extra: embed-octoai
170
- Requires-Dist: openai; extra == "embed-octoai"
171
170
  Requires-Dist: tiktoken; extra == "embed-octoai"
171
+ Requires-Dist: openai; extra == "embed-octoai"
172
172
  Provides-Extra: embed-vertexai
173
173
  Requires-Dist: vertexai; extra == "embed-vertexai"
174
174
  Provides-Extra: embed-voyageai
@@ -176,8 +176,8 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
176
176
  Provides-Extra: embed-mixedbreadai
177
177
  Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
178
178
  Provides-Extra: openai
179
- Requires-Dist: openai; extra == "openai"
180
179
  Requires-Dist: tiktoken; extra == "openai"
180
+ Requires-Dist: openai; extra == "openai"
181
181
  Provides-Extra: bedrock
182
182
  Requires-Dist: boto3; extra == "bedrock"
183
183
 
@@ -0,0 +1 @@
1
+ __version__ = "0.0.25" # pragma: no cover
@@ -20,6 +20,18 @@ def requires_dependencies(
20
20
  dependencies: str | list[str],
21
21
  extras: Optional[str] = None,
22
22
  ) -> Callable[[Callable[_P, _T]], Callable[_P, _T]]:
23
+ """Decorator ensuring required modules are installed.
24
+
25
+ Use on functions with local imports to ensure required modules are available and log
26
+ an installation instruction if they're not.
27
+
28
+ Args:
29
+ dependencies: Name(s) of module(s) required by the decorated function.
30
+ extras: unstructured-ingest extra which installs required `dependencies`. Defaults to None.
31
+
32
+ Raises:
33
+ ImportError: When at least one of the `dependencies` is not available.
34
+ """
23
35
  if isinstance(dependencies, str):
24
36
  dependencies = [dependencies]
25
37
 
@@ -17,7 +17,10 @@ from .chroma import chroma_destination_entry
17
17
  from .couchbase import CONNECTOR_TYPE as COUCHBASE_CONNECTOR_TYPE
18
18
  from .couchbase import couchbase_destination_entry, couchbase_source_entry
19
19
  from .databricks_volumes import CONNECTOR_TYPE as DATABRICKS_VOLUMES_CONNECTOR_TYPE
20
- from .databricks_volumes import databricks_volumes_destination_entry
20
+ from .databricks_volumes import (
21
+ databricks_volumes_destination_entry,
22
+ databricks_volumes_source_entry,
23
+ )
21
24
  from .elasticsearch import CONNECTOR_TYPE as ELASTICSEARCH_CONNECTOR_TYPE
22
25
  from .elasticsearch import elasticsearch_destination_entry, elasticsearch_source_entry
23
26
  from .google_drive import CONNECTOR_TYPE as GOOGLE_DRIVE_CONNECTOR_TYPE
@@ -34,6 +37,8 @@ from .onedrive import CONNECTOR_TYPE as ONEDRIVE_CONNECTOR_TYPE
34
37
  from .onedrive import onedrive_source_entry
35
38
  from .opensearch import CONNECTOR_TYPE as OPENSEARCH_CONNECTOR_TYPE
36
39
  from .opensearch import opensearch_destination_entry, opensearch_source_entry
40
+ from .outlook import CONNECTOR_TYPE as OUTLOOK_CONNECTOR_TYPE
41
+ from .outlook import outlook_source_entry
37
42
  from .pinecone import CONNECTOR_TYPE as PINECONE_CONNECTOR_TYPE
38
43
  from .pinecone import pinecone_destination_entry
39
44
  from .salesforce import CONNECTOR_TYPE as SALESFORCE_CONNECTOR_TYPE
@@ -78,6 +83,10 @@ add_destination_entry(destination_type=WEAVIATE_CONNECTOR_TYPE, entry=weaviate_d
78
83
  add_destination_entry(
79
84
  destination_type=DATABRICKS_VOLUMES_CONNECTOR_TYPE, entry=databricks_volumes_destination_entry
80
85
  )
86
+ add_source_entry(
87
+ source_type=DATABRICKS_VOLUMES_CONNECTOR_TYPE, entry=databricks_volumes_source_entry
88
+ )
89
+
81
90
 
82
91
  add_destination_entry(destination_type=SQL_CONNECTOR_TYPE, entry=sql_destination_entry)
83
92
 
@@ -95,3 +104,5 @@ add_destination_entry(
95
104
 
96
105
  add_destination_entry(destination_type=KDBAI_CONNECTOR_TYPE, entry=kdbai_destination_entry)
97
106
  add_source_entry(source_type=AIRTABLE_CONNECTOR_TYPE, entry=airtable_source_entry)
107
+
108
+ add_source_entry(source_type=OUTLOOK_CONNECTOR_TYPE, entry=outlook_source_entry)
@@ -1,21 +1,35 @@
1
1
  import os
2
2
  from dataclasses import dataclass
3
3
  from pathlib import Path
4
- from typing import TYPE_CHECKING, Any, Optional
4
+ from typing import TYPE_CHECKING, Any, Generator, Optional
5
5
 
6
6
  from pydantic import Field, Secret
7
7
 
8
- from unstructured_ingest.error import DestinationConnectionError
8
+ from unstructured_ingest.error import (
9
+ DestinationConnectionError,
10
+ SourceConnectionError,
11
+ SourceConnectionNetworkError,
12
+ )
9
13
  from unstructured_ingest.utils.dep_check import requires_dependencies
10
14
  from unstructured_ingest.v2.interfaces import (
11
15
  AccessConfig,
12
16
  ConnectionConfig,
17
+ Downloader,
18
+ DownloaderConfig,
19
+ DownloadResponse,
13
20
  FileData,
21
+ FileDataSourceMetadata,
22
+ Indexer,
23
+ IndexerConfig,
24
+ SourceIdentifiers,
14
25
  Uploader,
15
26
  UploaderConfig,
16
27
  )
17
28
  from unstructured_ingest.v2.logger import logger
18
- from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
29
+ from unstructured_ingest.v2.processes.connector_registry import (
30
+ DestinationRegistryEntry,
31
+ SourceRegistryEntry,
32
+ )
19
33
 
20
34
  if TYPE_CHECKING:
21
35
  from databricks.sdk import WorkspaceClient
@@ -32,16 +46,6 @@ class DatabricksVolumesAccessConfig(AccessConfig):
32
46
  "https://accounts.azuredatabricks.net/ (Azure), "
33
47
  "or https://accounts.gcp.databricks.com/ (GCP).",
34
48
  )
35
- username: Optional[str] = Field(
36
- default=None,
37
- description="The Databricks username part of basic authentication. "
38
- "Only possible when Host is *.cloud.databricks.com (AWS).",
39
- )
40
- password: Optional[str] = Field(
41
- default=None,
42
- description="The Databricks password part of basic authentication. "
43
- "Only possible when Host is *.cloud.databricks.com (AWS).",
44
- )
45
49
  client_id: Optional[str] = Field(default=None, description="Client ID of the OAuth app.")
46
50
  client_secret: Optional[str] = Field(
47
51
  default=None, description="Client Secret of the OAuth app."
@@ -78,7 +82,6 @@ class DatabricksVolumesAccessConfig(AccessConfig):
78
82
  "argument. This argument also holds the currently "
79
83
  "selected auth.",
80
84
  )
81
- cluster_id: Optional[str] = None
82
85
  google_credentials: Optional[str] = None
83
86
  google_service_account: Optional[str] = None
84
87
 
@@ -93,17 +96,11 @@ class DatabricksVolumesConnectionConfig(ConnectionConfig):
93
96
  "Databricks workspace endpoint or the "
94
97
  "Databricks accounts endpoint.",
95
98
  )
96
-
97
-
98
- class DatabricksVolumesUploaderConfig(UploaderConfig):
99
99
  volume: str = Field(description="Name of volume in the Unity Catalog")
100
100
  catalog: str = Field(description="Name of the catalog in the Databricks Unity Catalog service")
101
101
  volume_path: Optional[str] = Field(
102
102
  default=None, description="Optional path within the volume to write to"
103
103
  )
104
- overwrite: bool = Field(
105
- default=False, description="If true, an existing file will be overwritten."
106
- )
107
104
  databricks_schema: str = Field(
108
105
  default="default",
109
106
  alias="schema",
@@ -117,33 +114,121 @@ class DatabricksVolumesUploaderConfig(UploaderConfig):
117
114
  path = f"{path}/{self.volume_path}"
118
115
  return path
119
116
 
120
-
121
- @dataclass
122
- class DatabricksVolumesUploader(Uploader):
123
- connector_type: str = CONNECTOR_TYPE
124
- upload_config: DatabricksVolumesUploaderConfig
125
- connection_config: DatabricksVolumesConnectionConfig
126
-
127
117
  @requires_dependencies(dependencies=["databricks.sdk"], extras="databricks-volumes")
128
118
  def get_client(self) -> "WorkspaceClient":
129
119
  from databricks.sdk import WorkspaceClient
130
120
 
131
121
  return WorkspaceClient(
132
- host=self.connection_config.host,
133
- **self.connection_config.access_config.get_secret_value().model_dump(),
122
+ host=self.host,
123
+ **self.access_config.get_secret_value().model_dump(),
134
124
  )
135
125
 
126
+
127
+ @dataclass
128
+ class DatabricksVolumesIndexerConfig(IndexerConfig):
129
+ recursive: bool = False
130
+
131
+
132
+ @dataclass
133
+ class DatabricksVolumesIndexer(Indexer):
134
+ index_config: DatabricksVolumesIndexerConfig
135
+ connection_config: DatabricksVolumesConnectionConfig
136
+ connector_type: str = CONNECTOR_TYPE
137
+
136
138
  def precheck(self) -> None:
137
139
  try:
138
- assert self.get_client().current_user.me().active
140
+ self.connection_config.get_client()
141
+ except Exception as e:
142
+ logger.error(f"failed to validate connection: {e}", exc_info=True)
143
+ raise SourceConnectionError(f"failed to validate connection: {e}")
144
+
145
+ def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
146
+ for file_info in self.connection_config.get_client().dbfs.list(
147
+ path=self.connection_config.path, recursive=self.index_config.recursive
148
+ ):
149
+ if file_info.is_dir:
150
+ continue
151
+ rel_path = file_info.path.replace(self.connection_config.path, "")
152
+ if rel_path.startswith("/"):
153
+ rel_path = rel_path[1:]
154
+ filename = Path(file_info.path).name
155
+ yield FileData(
156
+ identifier=file_info.path,
157
+ connector_type=CONNECTOR_TYPE,
158
+ source_identifiers=SourceIdentifiers(
159
+ filename=filename,
160
+ rel_path=rel_path,
161
+ fullpath=file_info.path,
162
+ ),
163
+ additional_metadata={
164
+ "catalog": self.connection_config.catalog,
165
+ },
166
+ metadata=FileDataSourceMetadata(
167
+ url=file_info.path, date_modified=str(file_info.modification_time)
168
+ ),
169
+ )
170
+
171
+
172
+ @dataclass
173
+ class DatabricksVolumesDownloaderConfig(DownloaderConfig):
174
+ pass
175
+
176
+
177
+ @dataclass
178
+ class DatabricksVolumesDownloader(Downloader):
179
+ download_config: DatabricksVolumesDownloaderConfig
180
+ connection_config: DatabricksVolumesConnectionConfig
181
+ connector_type: str = CONNECTOR_TYPE
182
+
183
+ def precheck(self) -> None:
184
+ try:
185
+ self.connection_config.get_client()
186
+ except Exception as e:
187
+ logger.error(f"failed to validate connection: {e}", exc_info=True)
188
+ raise SourceConnectionError(f"failed to validate connection: {e}")
189
+
190
+ def get_download_path(self, file_data: FileData) -> Path:
191
+ return self.download_config.download_dir / Path(file_data.source_identifiers.relative_path)
192
+
193
+ def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
194
+ download_path = self.get_download_path(file_data=file_data)
195
+ download_path.parent.mkdir(parents=True, exist_ok=True)
196
+ logger.info(f"Writing {file_data.identifier} to {download_path}")
197
+ try:
198
+ with self.connection_config.get_client().dbfs.download(path=file_data.identifier) as c:
199
+ read_content = c._read_handle.read()
200
+ with open(download_path, "wb") as f:
201
+ f.write(read_content)
202
+ except Exception as e:
203
+ logger.error(f"failed to download file {file_data.identifier}: {e}", exc_info=True)
204
+ raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}")
205
+
206
+ return self.generate_download_response(file_data=file_data, download_path=download_path)
207
+
208
+
209
+ class DatabricksVolumesUploaderConfig(UploaderConfig):
210
+ overwrite: bool = Field(
211
+ default=False, description="If true, an existing file will be overwritten."
212
+ )
213
+
214
+
215
+ @dataclass
216
+ class DatabricksVolumesUploader(Uploader):
217
+ upload_config: DatabricksVolumesUploaderConfig
218
+ connection_config: DatabricksVolumesConnectionConfig
219
+ connector_type: str = CONNECTOR_TYPE
220
+
221
+ def precheck(self) -> None:
222
+ try:
223
+ assert self.connection_config.get_client().current_user.me().active
139
224
  except Exception as e:
140
225
  logger.error(f"failed to validate connection: {e}", exc_info=True)
141
226
  raise DestinationConnectionError(f"failed to validate connection: {e}")
142
227
 
143
228
  def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
144
- output_path = os.path.join(self.upload_config.path, path.name)
229
+ output_path = os.path.join(self.connection_config.path, path.name)
145
230
  with open(path, "rb") as elements_file:
146
- self.get_client().files.upload(
231
+ self.connection_config.get_client().files.upload(
147
232
  file_path=output_path,
148
233
  contents=elements_file,
149
234
  overwrite=self.upload_config.overwrite,
@@ -155,3 +240,11 @@ databricks_volumes_destination_entry = DestinationRegistryEntry(
155
240
  uploader=DatabricksVolumesUploader,
156
241
  uploader_config=DatabricksVolumesUploaderConfig,
157
242
  )
243
+
244
+ databricks_volumes_source_entry = SourceRegistryEntry(
245
+ connection_config=DatabricksVolumesConnectionConfig,
246
+ indexer=DatabricksVolumesIndexer,
247
+ indexer_config=DatabricksVolumesIndexerConfig,
248
+ downloader=DatabricksVolumesDownloader,
249
+ downloader_config=DatabricksVolumesDownloaderConfig,
250
+ )
@@ -0,0 +1,239 @@
1
+ import hashlib
2
+ import time
3
+ from dataclasses import dataclass, field
4
+ from datetime import timezone
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, Any, Coroutine, Generator
7
+
8
+ from pydantic import Field, Secret
9
+
10
+ from unstructured_ingest.error import SourceConnectionError
11
+ from unstructured_ingest.logger import logger
12
+ from unstructured_ingest.utils.dep_check import requires_dependencies
13
+ from unstructured_ingest.v2.interfaces import (
14
+ AccessConfig,
15
+ ConnectionConfig,
16
+ Downloader,
17
+ DownloaderConfig,
18
+ FileData,
19
+ Indexer,
20
+ IndexerConfig,
21
+ download_responses,
22
+ )
23
+ from unstructured_ingest.v2.interfaces.file_data import FileDataSourceMetadata, SourceIdentifiers
24
+ from unstructured_ingest.v2.processes.connector_registry import SourceRegistryEntry
25
+
26
+ MAX_EMAILS_PER_FOLDER = 1_000_000 # Maximum number of emails per folder
27
+
28
+ if TYPE_CHECKING:
29
+ from office365.graph_client import GraphClient
30
+ from office365.outlook.mail.folders.folder import MailFolder
31
+ from office365.outlook.mail.messages.message import Message
32
+
33
+
34
+ CONNECTOR_TYPE = "outlook"
35
+
36
+
37
+ class OutlookAccessConfig(AccessConfig):
38
+ client_credential: str = Field(description="Azure AD App client secret", alias="client_cred")
39
+
40
+
41
+ class OutlookConnectionConfig(ConnectionConfig):
42
+ access_config: Secret[OutlookAccessConfig]
43
+ client_id: str = Field(description="Azure AD App client ID")
44
+ tenant: str = Field(
45
+ default="common", description="ID or domain name associated with your Azure AD instance"
46
+ )
47
+ authority_url: str = Field(
48
+ default="https://login.microsoftonline.com",
49
+ description="Authentication token provider for Microsoft apps",
50
+ )
51
+
52
+ @requires_dependencies(["msal"], extras="outlook")
53
+ def _acquire_token(self):
54
+ """Acquire token via MSAL"""
55
+ from msal import ConfidentialClientApplication
56
+
57
+ # NOTE: It'd be nice to use `msal.authority.AuthorityBuilder` here paired with AZURE_PUBLIC
58
+ # constant as default in the future but they do not fit well with `authority_url` right now
59
+ authority_url = f"{self.authority_url.rstrip('/')}/{self.tenant}"
60
+ app = ConfidentialClientApplication(
61
+ authority=authority_url,
62
+ client_id=self.client_id,
63
+ client_credential=self.access_config.get_secret_value().client_credential,
64
+ )
65
+ token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
66
+ return token
67
+
68
+ @requires_dependencies(["office365"], extras="outlook")
69
+ @SourceConnectionError.wrap
70
+ def get_client(self) -> "GraphClient":
71
+ from office365.graph_client import GraphClient
72
+
73
+ return GraphClient(self._acquire_token)
74
+
75
+
76
+ class OutlookIndexerConfig(IndexerConfig):
77
+ outlook_folders: list[str] = Field(
78
+ description="Folders to download email messages from. Do not specify subfolders. "
79
+ "Use quotes if there are spaces in folder names."
80
+ )
81
+ recursive: bool = Field(
82
+ default=False,
83
+ description="Recursively download files in their respective folders otherwise stop at the"
84
+ " files in provided folder level.",
85
+ )
86
+ user_email: str = Field(description="Outlook email to download messages from.")
87
+
88
+
89
+ @dataclass
90
+ class OutlookIndexer(Indexer):
91
+ index_config: OutlookIndexerConfig
92
+ connection_config: OutlookConnectionConfig
93
+ connector_type: str = CONNECTOR_TYPE
94
+
95
+ def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
96
+ messages = self._list_messages(recursive=self.index_config.recursive)
97
+
98
+ for message in messages:
99
+ yield self._message_to_file_data(message)
100
+
101
+ def run_async(self, **kwargs: Any) -> Coroutine[Any, Any, Any]:
102
+ raise NotImplementedError
103
+
104
+ @SourceConnectionError.wrap
105
+ def precheck(self) -> None:
106
+ client = self.connection_config.get_client()
107
+ client.users[self.index_config.user_email].get().execute_query()
108
+
109
+ def is_async(self) -> bool:
110
+ return False
111
+
112
+ def _list_messages(self, recursive: bool) -> list["Message"]:
113
+ mail_folders = self._get_selected_root_folders()
114
+ messages = []
115
+
116
+ while mail_folders:
117
+ mail_folder = mail_folders.pop()
118
+ messages += list(mail_folder.messages.get().top(MAX_EMAILS_PER_FOLDER).execute_query())
119
+
120
+ if recursive:
121
+ mail_folders += list(mail_folder.child_folders.get().execute_query())
122
+
123
+ return messages
124
+
125
+ def _get_selected_root_folders(self) -> list["MailFolder"]:
126
+ client_user = self.connection_config.get_client().users[self.index_config.user_email]
127
+ root_mail_folders = client_user.mail_folders.get().execute_query()
128
+
129
+ selected_names_normalized = [
130
+ folder_name.lower() for folder_name in self.index_config.outlook_folders
131
+ ]
132
+ selected_root_mail_folders = [
133
+ folder
134
+ for folder in root_mail_folders
135
+ if folder.display_name.lower() in selected_names_normalized
136
+ ]
137
+
138
+ if not selected_root_mail_folders:
139
+ logger.error(
140
+ f"Root folders selected in configuration: {self.index_config.outlook_folders}"
141
+ f"not found for user email {self.index_config.user_email}. Aborting."
142
+ )
143
+ raise ValueError("Root folders selected in configuration not found.")
144
+
145
+ return selected_root_mail_folders
146
+
147
+ def _message_to_file_data(self, message: "Message") -> FileData:
148
+ fullpath = self._generate_fullpath(message)
149
+
150
+ return FileData(
151
+ identifier=message.id,
152
+ connector_type=CONNECTOR_TYPE,
153
+ source_identifiers=SourceIdentifiers(filename=fullpath.name, fullpath=str(fullpath)),
154
+ metadata=FileDataSourceMetadata(
155
+ url=message.resource_url,
156
+ version=message.change_key,
157
+ date_modified=str(
158
+ message.last_modified_datetime.replace(tzinfo=timezone.utc).timestamp()
159
+ ),
160
+ date_created=str(message.created_datetime.replace(tzinfo=timezone.utc).timestamp()),
161
+ date_processed=str(time.time()),
162
+ record_locator={
163
+ "message_id": message.id,
164
+ "user_email": self.index_config.user_email,
165
+ },
166
+ ),
167
+ additional_metadata={
168
+ "sent_from": str(message.sent_from),
169
+ "to_recipients": [str(recipient) for recipient in message.to_recipients],
170
+ "bcc_recipients": [str(recipient) for recipient in message.to_recipients],
171
+ "subject": message.subject,
172
+ "conversation_id": message.conversation_id,
173
+ "is_draft": message.is_draft,
174
+ "is_read": message.is_read,
175
+ "has_attachments": message.has_attachments,
176
+ "importance": message.importance,
177
+ },
178
+ )
179
+
180
+ def _generate_fullpath(self, message: "Message") -> Path:
181
+ return Path(hashlib.sha256(message.id.encode("utf-8")).hexdigest()[:16] + ".eml")
182
+
183
+
184
+ class OutlookDownloaderConfig(DownloaderConfig):
185
+ pass
186
+
187
+
188
+ @dataclass
189
+ class OutlookDownloader(Downloader):
190
+ connector_type: str = CONNECTOR_TYPE
191
+ connection_config: OutlookConnectionConfig
192
+ download_config: OutlookDownloaderConfig = field(default_factory=OutlookDownloaderConfig)
193
+
194
+ def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
195
+ # NOTE: Indexer should provide source identifiers required to generate the download path
196
+ download_path = self.get_download_path(file_data)
197
+ if download_path is None:
198
+ logger.error(
199
+ "Generated download path is None, source_identifiers might be missing"
200
+ "from FileData."
201
+ )
202
+ raise ValueError("Generated invalid download path.")
203
+
204
+ self._download_message(file_data, download_path)
205
+ return self.generate_download_response(file_data, download_path)
206
+
207
+ def is_async(self) -> bool:
208
+ return False
209
+
210
+ def _download_message(self, file_data: FileData, download_path: Path) -> None:
211
+ # NOTE: Indexer should supply the record locator in metadata
212
+ if (
213
+ file_data.metadata.record_locator is None
214
+ or "user_email" not in file_data.metadata.record_locator
215
+ or "message_id" not in file_data.metadata.record_locator
216
+ ):
217
+ logger.error(
218
+ f"Invalid record locator in metadata: {file_data.metadata.record_locator}."
219
+ "Keys 'user_email' and 'message_id' must be present."
220
+ )
221
+ raise ValueError("Invalid record locator.")
222
+
223
+ user_email = file_data.metadata.record_locator["user_email"]
224
+ message_id = file_data.metadata.record_locator["message_id"]
225
+
226
+ message = self.connection_config.get_client().users[user_email].messages[message_id]
227
+ download_path.parent.mkdir(exist_ok=True, parents=True)
228
+
229
+ with open(download_path, "wb") as file:
230
+ message.download(file).execute_query()
231
+
232
+
233
+ outlook_source_entry = SourceRegistryEntry(
234
+ indexer=OutlookIndexer,
235
+ indexer_config=OutlookIndexerConfig,
236
+ downloader=OutlookDownloader,
237
+ downloader_config=OutlookDownloaderConfig,
238
+ connection_config=OutlookConnectionConfig,
239
+ )
@@ -94,6 +94,10 @@ class PineconeUploaderConfig(UploaderConfig):
94
94
  pool_threads: Optional[int] = Field(
95
95
  default=1, description="Optional limit on number of threads to use for upload"
96
96
  )
97
+ namespace: Optional[str] = Field(
98
+ default=None,
99
+ description="The namespace to write to. If not specified, the default namespace is used",
100
+ )
97
101
 
98
102
 
99
103
  @dataclass
@@ -183,7 +187,11 @@ class PineconeUploader(Uploader):
183
187
  pool_threads = max_pool_threads
184
188
  index = self.connection_config.get_index(pool_threads=pool_threads)
185
189
  with index:
186
- async_results = [index.upsert(vectors=chunk, async_req=True) for chunk in chunks]
190
+ upsert_kwargs = [{"vectors": chunk, "async_req": True} for chunk in chunks]
191
+ if namespace := self.upload_config.namespace:
192
+ for kwargs in upsert_kwargs:
193
+ kwargs["namespace"] = namespace
194
+ async_results = [index.upsert(**kwarg) for kwarg in upsert_kwargs]
187
195
  # Wait for and retrieve responses (this raises in case of error)
188
196
  try:
189
197
  results = [async_result.get() for async_result in async_results]