unstructured-ingest 0.0.3__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (385) hide show
  1. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/PKG-INFO +5 -2
  2. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/setup.py +6 -2
  3. unstructured-ingest-0.0.4/test/test_chunking_utils.py +36 -0
  4. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/test/test_interfaces.py +2 -2
  5. unstructured-ingest-0.0.4/test/test_utils_v2.py +82 -0
  6. unstructured-ingest-0.0.4/unstructured_ingest/__version__.py +1 -0
  7. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cli.py +6 -1
  8. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/__init__.py +4 -4
  9. unstructured-ingest-0.0.3/unstructured_ingest/cli/cmds/astra.py → unstructured-ingest-0.0.4/unstructured_ingest/cli/cmds/astradb.py +9 -9
  10. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/interfaces.py +13 -6
  11. unstructured-ingest-0.0.3/unstructured_ingest/connector/astra.py → unstructured-ingest-0.0.4/unstructured_ingest/connector/astradb.py +29 -29
  12. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/biomed.py +12 -5
  13. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/confluence.py +3 -3
  14. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/github.py +3 -2
  15. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/google_drive.py +1 -2
  16. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/mongodb.py +1 -2
  17. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/client.py +31 -16
  18. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/connector.py +3 -2
  19. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/registry.py +2 -2
  20. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/vectara.py +7 -2
  21. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/interfaces.py +13 -9
  22. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/pipeline/interfaces.py +8 -3
  23. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/pipeline/reformat/chunking.py +13 -9
  24. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/pipeline/reformat/embedding.py +3 -3
  25. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/__init__.py +2 -2
  26. unstructured-ingest-0.0.3/unstructured_ingest/runner/astra.py → unstructured-ingest-0.0.4/unstructured_ingest/runner/astradb.py +7 -7
  27. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/__init__.py +2 -2
  28. unstructured-ingest-0.0.3/unstructured_ingest/runner/writers/astra.py → unstructured-ingest-0.0.4/unstructured_ingest/runner/writers/astradb.py +7 -7
  29. unstructured-ingest-0.0.4/unstructured_ingest/utils/chunking.py +45 -0
  30. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/utils/dep_check.py +1 -1
  31. unstructured-ingest-0.0.4/unstructured_ingest/utils/google_filetype.py +9 -0
  32. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/cli/base/cmd.py +57 -13
  33. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/cli/base/dest.py +21 -12
  34. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/cli/base/src.py +35 -23
  35. unstructured-ingest-0.0.4/unstructured_ingest/v2/cli/cmds.py +14 -0
  36. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/utils.py → unstructured-ingest-0.0.4/unstructured_ingest/v2/cli/utils/click.py +36 -89
  37. unstructured-ingest-0.0.4/unstructured_ingest/v2/cli/utils/model_conversion.py +199 -0
  38. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/interfaces/connector.py +5 -7
  39. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/interfaces/downloader.py +8 -5
  40. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/interfaces/file_data.py +8 -2
  41. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/interfaces/indexer.py +3 -4
  42. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/interfaces/processor.py +10 -10
  43. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/interfaces/upload_stager.py +3 -3
  44. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/interfaces/uploader.py +3 -3
  45. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/pipeline/pipeline.py +1 -5
  46. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/pipeline/steps/chunk.py +5 -11
  47. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/pipeline/steps/download.py +13 -11
  48. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/pipeline/steps/embed.py +5 -11
  49. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/pipeline/steps/filter.py +1 -6
  50. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/pipeline/steps/index.py +14 -10
  51. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/pipeline/steps/partition.py +5 -5
  52. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/pipeline/steps/stage.py +4 -7
  53. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/pipeline/steps/uncompress.py +1 -6
  54. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/pipeline/steps/upload.py +2 -9
  55. unstructured-ingest-0.0.4/unstructured_ingest/v2/processes/__init__.py +18 -0
  56. unstructured-ingest-0.0.4/unstructured_ingest/v2/processes/chunker.py +143 -0
  57. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connector_registry.py +8 -2
  58. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/__init__.py +13 -3
  59. unstructured-ingest-0.0.3/unstructured_ingest/v2/processes/connectors/astra.py → unstructured-ingest-0.0.4/unstructured_ingest/v2/processes/connectors/astradb.py +45 -35
  60. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +30 -27
  61. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/chroma.py +30 -21
  62. unstructured-ingest-0.0.4/unstructured_ingest/v2/processes/connectors/couchbase.py +151 -0
  63. unstructured-ingest-0.0.4/unstructured_ingest/v2/processes/connectors/databricks_volumes.py +160 -0
  64. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/elasticsearch.py +70 -45
  65. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/fsspec/azure.py +39 -16
  66. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/fsspec/box.py +15 -13
  67. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +10 -11
  68. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +20 -34
  69. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +38 -13
  70. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/fsspec/s3.py +31 -17
  71. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +19 -28
  72. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/google_drive.py +40 -34
  73. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/local.py +22 -14
  74. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/milvus.py +22 -18
  75. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/mongodb.py +22 -18
  76. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/onedrive.py +17 -14
  77. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/opensearch.py +66 -56
  78. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/pinecone.py +23 -20
  79. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/salesforce.py +26 -18
  80. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/sharepoint.py +51 -26
  81. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/singlestore.py +11 -15
  82. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/sql.py +29 -31
  83. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/weaviate.py +22 -13
  84. unstructured-ingest-0.0.4/unstructured_ingest/v2/processes/embedder.py +135 -0
  85. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/filter.py +11 -5
  86. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/partitioner.py +79 -33
  87. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/uncompress.py +3 -3
  88. unstructured-ingest-0.0.4/unstructured_ingest/v2/utils.py +45 -0
  89. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest.egg-info/PKG-INFO +5 -2
  90. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest.egg-info/SOURCES.txt +15 -40
  91. unstructured-ingest-0.0.4/unstructured_ingest.egg-info/requires.txt +607 -0
  92. unstructured-ingest-0.0.3/unstructured_ingest/__version__.py +0 -1
  93. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/__init__.py +0 -89
  94. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/astra.py +0 -85
  95. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/azure_cognitive_search.py +0 -72
  96. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/chroma.py +0 -108
  97. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/databricks_volumes.py +0 -161
  98. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/elasticsearch.py +0 -159
  99. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/fsspec/azure.py +0 -84
  100. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/fsspec/box.py +0 -58
  101. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/fsspec/dropbox.py +0 -58
  102. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py +0 -69
  103. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/fsspec/gcs.py +0 -81
  104. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/fsspec/s3.py +0 -84
  105. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/fsspec/sftp.py +0 -80
  106. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/google_drive.py +0 -74
  107. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/local.py +0 -52
  108. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/milvus.py +0 -72
  109. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/mongodb.py +0 -62
  110. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/onedrive.py +0 -91
  111. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/opensearch.py +0 -93
  112. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/pinecone.py +0 -62
  113. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/salesforce.py +0 -79
  114. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/sharepoint.py +0 -112
  115. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/singlestore.py +0 -96
  116. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/sql.py +0 -84
  117. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/weaviate.py +0 -100
  118. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/configs/__init__.py +0 -13
  119. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/configs/chunk.py +0 -89
  120. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/configs/embed.py +0 -74
  121. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/configs/filter.py +0 -28
  122. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/configs/partition.py +0 -99
  123. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/configs/processor.py +0 -88
  124. unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/interfaces.py +0 -27
  125. unstructured-ingest-0.0.3/unstructured_ingest/v2/pipeline/utils.py +0 -15
  126. unstructured-ingest-0.0.3/unstructured_ingest/v2/processes/__init__.py +0 -0
  127. unstructured-ingest-0.0.3/unstructured_ingest/v2/processes/chunker.py +0 -97
  128. unstructured-ingest-0.0.3/unstructured_ingest/v2/processes/connectors/databricks_volumes.py +0 -105
  129. unstructured-ingest-0.0.3/unstructured_ingest/v2/processes/embedder.py +0 -76
  130. unstructured-ingest-0.0.3/unstructured_ingest.egg-info/requires.txt +0 -208
  131. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/README.md +0 -0
  132. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/pyproject.toml +0 -0
  133. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/setup.cfg +0 -0
  134. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/test/test_error.py +0 -0
  135. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/test/test_logger.py +0 -0
  136. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/test/test_utils.py +0 -0
  137. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/__init__.py +0 -0
  138. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/__init__.py +0 -0
  139. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/base/__init__.py +0 -0
  140. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/base/cmd.py +0 -0
  141. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/base/dest.py +0 -0
  142. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/base/src.py +0 -0
  143. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmd_factory.py +0 -0
  144. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/airtable.py +0 -0
  145. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/azure_cognitive_search.py +0 -0
  146. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/biomed.py +0 -0
  147. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/chroma.py +0 -0
  148. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/clarifai.py +0 -0
  149. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/confluence.py +0 -0
  150. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/databricks_volumes.py +0 -0
  151. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/delta_table.py +0 -0
  152. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/discord.py +0 -0
  153. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/elasticsearch.py +0 -0
  154. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
  155. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/fsspec/azure.py +0 -0
  156. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/fsspec/box.py +0 -0
  157. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -0
  158. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -0
  159. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -0
  160. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/fsspec/s3.py +0 -0
  161. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -0
  162. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/github.py +0 -0
  163. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/gitlab.py +0 -0
  164. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/google_drive.py +0 -0
  165. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/hubspot.py +0 -0
  166. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/jira.py +0 -0
  167. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/kafka.py +0 -0
  168. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/local.py +0 -0
  169. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/mongodb.py +0 -0
  170. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/notion.py +0 -0
  171. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/onedrive.py +0 -0
  172. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/opensearch.py +0 -0
  173. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/outlook.py +0 -0
  174. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/pinecone.py +0 -0
  175. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/qdrant.py +0 -0
  176. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/reddit.py +0 -0
  177. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/salesforce.py +0 -0
  178. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/sharepoint.py +0 -0
  179. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/slack.py +0 -0
  180. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/sql.py +0 -0
  181. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/vectara.py +0 -0
  182. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/weaviate.py +0 -0
  183. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/cmds/wikipedia.py +0 -0
  184. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/common.py +0 -0
  185. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/cli/utils.py +0 -0
  186. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/__init__.py +0 -0
  187. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/airtable.py +0 -0
  188. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/azure_cognitive_search.py +0 -0
  189. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/chroma.py +0 -0
  190. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/clarifai.py +0 -0
  191. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/databricks_volumes.py +0 -0
  192. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/delta_table.py +0 -0
  193. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/discord.py +0 -0
  194. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/elasticsearch.py +0 -0
  195. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/fsspec/__init__.py +0 -0
  196. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/fsspec/azure.py +0 -0
  197. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/fsspec/box.py +0 -0
  198. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/fsspec/dropbox.py +0 -0
  199. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/fsspec/fsspec.py +0 -0
  200. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/fsspec/gcs.py +0 -0
  201. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/fsspec/s3.py +0 -0
  202. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/fsspec/sftp.py +0 -0
  203. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/git.py +0 -0
  204. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/gitlab.py +0 -0
  205. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/hubspot.py +0 -0
  206. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/jira.py +0 -0
  207. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/kafka.py +0 -0
  208. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/local.py +0 -0
  209. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/__init__.py +0 -0
  210. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/helpers.py +0 -0
  211. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/interfaces.py +0 -0
  212. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/__init__.py +0 -0
  213. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/block.py +0 -0
  214. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/__init__.py +0 -0
  215. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -0
  216. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +0 -0
  217. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +0 -0
  218. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/callout.py +0 -0
  219. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -0
  220. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/child_page.py +0 -0
  221. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/code.py +0 -0
  222. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -0
  223. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/divider.py +0 -0
  224. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/embed.py +0 -0
  225. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/equation.py +0 -0
  226. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/file.py +0 -0
  227. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/heading.py +0 -0
  228. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/image.py +0 -0
  229. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -0
  230. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/link_to_page.py +0 -0
  231. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -0
  232. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/paragraph.py +0 -0
  233. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/pdf.py +0 -0
  234. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/quote.py +0 -0
  235. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -0
  236. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/table.py +0 -0
  237. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -0
  238. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/template.py +0 -0
  239. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/todo.py +0 -0
  240. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/toggle.py +0 -0
  241. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -0
  242. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/blocks/video.py +0 -0
  243. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database.py +0 -0
  244. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -0
  245. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -0
  246. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/created_by.py +0 -0
  247. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/created_time.py +0 -0
  248. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/date.py +0 -0
  249. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/email.py +0 -0
  250. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/files.py +0 -0
  251. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -0
  252. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +0 -0
  253. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -0
  254. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -0
  255. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/number.py +0 -0
  256. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/people.py +0 -0
  257. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -0
  258. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -0
  259. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/rich_text.py +0 -0
  260. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/rollup.py +0 -0
  261. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/select.py +0 -0
  262. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/status.py +0 -0
  263. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/title.py +0 -0
  264. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -0
  265. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/url.py +0 -0
  266. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/database_properties/verification.py +0 -0
  267. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/date.py +0 -0
  268. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/file.py +0 -0
  269. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/page.py +0 -0
  270. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/parent.py +0 -0
  271. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/rich_text.py +0 -0
  272. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/notion/types/user.py +0 -0
  273. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/onedrive.py +0 -0
  274. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/opensearch.py +0 -0
  275. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/outlook.py +0 -0
  276. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/pinecone.py +0 -0
  277. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/qdrant.py +0 -0
  278. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/reddit.py +0 -0
  279. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/salesforce.py +0 -0
  280. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/sharepoint.py +0 -0
  281. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/slack.py +0 -0
  282. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/sql.py +0 -0
  283. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/weaviate.py +0 -0
  284. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/connector/wikipedia.py +0 -0
  285. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/enhanced_dataclass/__init__.py +0 -0
  286. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/enhanced_dataclass/core.py +0 -0
  287. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -0
  288. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -0
  289. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/error.py +0 -0
  290. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/evaluate.py +0 -0
  291. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/ingest_backoff/__init__.py +0 -0
  292. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/ingest_backoff/_common.py +0 -0
  293. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/ingest_backoff/_wrapper.py +0 -0
  294. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/logger.py +0 -0
  295. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/main.py +0 -0
  296. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/pipeline/__init__.py +0 -0
  297. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/pipeline/copy.py +0 -0
  298. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/pipeline/doc_factory.py +0 -0
  299. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/pipeline/partition.py +0 -0
  300. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/pipeline/permissions.py +0 -0
  301. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/pipeline/pipeline.py +0 -0
  302. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/pipeline/reformat/__init__.py +0 -0
  303. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/pipeline/source.py +0 -0
  304. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/pipeline/utils.py +0 -0
  305. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/pipeline/write.py +0 -0
  306. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/processor.py +0 -0
  307. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/airtable.py +0 -0
  308. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/base_runner.py +0 -0
  309. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/biomed.py +0 -0
  310. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/confluence.py +0 -0
  311. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/delta_table.py +0 -0
  312. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/discord.py +0 -0
  313. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/elasticsearch.py +0 -0
  314. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/fsspec/__init__.py +0 -0
  315. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/fsspec/azure.py +0 -0
  316. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/fsspec/box.py +0 -0
  317. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/fsspec/dropbox.py +0 -0
  318. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/fsspec/fsspec.py +0 -0
  319. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/fsspec/gcs.py +0 -0
  320. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/fsspec/s3.py +0 -0
  321. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/fsspec/sftp.py +0 -0
  322. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/github.py +0 -0
  323. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/gitlab.py +0 -0
  324. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/google_drive.py +0 -0
  325. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/hubspot.py +0 -0
  326. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/jira.py +0 -0
  327. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/kafka.py +0 -0
  328. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/local.py +0 -0
  329. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/mongodb.py +0 -0
  330. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/notion.py +0 -0
  331. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/onedrive.py +0 -0
  332. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/opensearch.py +0 -0
  333. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/outlook.py +0 -0
  334. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/reddit.py +0 -0
  335. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/salesforce.py +0 -0
  336. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/sharepoint.py +0 -0
  337. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/slack.py +0 -0
  338. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/utils.py +0 -0
  339. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/wikipedia.py +0 -0
  340. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/azure_cognitive_search.py +0 -0
  341. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/base_writer.py +0 -0
  342. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/chroma.py +0 -0
  343. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/clarifai.py +0 -0
  344. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/databricks_volumes.py +0 -0
  345. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/delta_table.py +0 -0
  346. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/elasticsearch.py +0 -0
  347. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
  348. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/fsspec/azure.py +0 -0
  349. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/fsspec/box.py +0 -0
  350. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -0
  351. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/fsspec/gcs.py +0 -0
  352. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/fsspec/s3.py +0 -0
  353. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/kafka.py +0 -0
  354. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/mongodb.py +0 -0
  355. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/opensearch.py +0 -0
  356. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/pinecone.py +0 -0
  357. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/qdrant.py +0 -0
  358. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/sql.py +0 -0
  359. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/vectara.py +0 -0
  360. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/runner/writers/weaviate.py +0 -0
  361. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/utils/__init__.py +0 -0
  362. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/utils/compression.py +0 -0
  363. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/utils/data_prep.py +0 -0
  364. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
  365. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/utils/table.py +0 -0
  366. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/__init__.py +0 -0
  367. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/cli/__init__.py +0 -0
  368. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/cli/base/__init__.py +0 -0
  369. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/cli/base/importer.py +0 -0
  370. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/cli/cli.py +0 -0
  371. {unstructured-ingest-0.0.3/unstructured_ingest/v2/cli/cmds/fsspec → unstructured-ingest-0.0.4/unstructured_ingest/v2/cli/utils}/__init__.py +0 -0
  372. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/example.py +0 -0
  373. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/interfaces/__init__.py +0 -0
  374. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/interfaces/process.py +0 -0
  375. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/logger.py +0 -0
  376. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/main.py +0 -0
  377. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/pipeline/__init__.py +0 -0
  378. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/pipeline/interfaces.py +0 -0
  379. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  380. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +0 -0
  381. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/fsspec/utils.py +0 -0
  382. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest/v2/processes/connectors/utils.py +0 -0
  383. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest.egg-info/dependency_links.txt +0 -0
  384. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest.egg-info/entry_points.txt +0 -0
  385. {unstructured-ingest-0.0.3 → unstructured-ingest-0.0.4}/unstructured_ingest.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.0.3
3
+ Version: 0.0.4
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -21,6 +21,7 @@ Classifier: Programming Language :: Python :: 3.12
21
21
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.13
23
23
  Description-Content-Type: text/markdown
24
+ Provides-Extra: remote
24
25
  Provides-Extra: csv
25
26
  Provides-Extra: doc
26
27
  Provides-Extra: docx
@@ -37,7 +38,7 @@ Provides-Extra: rst
37
38
  Provides-Extra: tsv
38
39
  Provides-Extra: xlsx
39
40
  Provides-Extra: airtable
40
- Provides-Extra: astra
41
+ Provides-Extra: astradb
41
42
  Provides-Extra: azure
42
43
  Provides-Extra: azure-cognitive-search
43
44
  Provides-Extra: biomed
@@ -45,6 +46,7 @@ Provides-Extra: box
45
46
  Provides-Extra: chroma
46
47
  Provides-Extra: clarifai
47
48
  Provides-Extra: confluence
49
+ Provides-Extra: couchbase
48
50
  Provides-Extra: delta-table
49
51
  Provides-Extra: discord
50
52
  Provides-Extra: dropbox
@@ -75,6 +77,7 @@ Provides-Extra: wikipedia
75
77
  Provides-Extra: weaviate
76
78
  Provides-Extra: databricks-volumes
77
79
  Provides-Extra: singlestore
80
+ Provides-Extra: vectara
78
81
  Provides-Extra: embed-huggingface
79
82
  Provides-Extra: embed-octoai
80
83
  Provides-Extra: embed-vertexai
@@ -81,7 +81,7 @@ all_doc_reqs = list(
81
81
  )
82
82
  connectors_reqs = {
83
83
  "airtable": load_requirements("requirements/connectors/airtable.in"),
84
- "astra": load_requirements("requirements/connectors/astra.in"),
84
+ "astradb": load_requirements("requirements/connectors/astradb.in"),
85
85
  "azure": load_requirements("requirements/connectors/azure.in"),
86
86
  "azure-cognitive-search": load_requirements(
87
87
  "requirements/connectors/azure-cognitive-search.in",
@@ -91,6 +91,7 @@ connectors_reqs = {
91
91
  "chroma": load_requirements("requirements/connectors/chroma.in"),
92
92
  "clarifai": load_requirements("requirements/connectors/clarifai.in"),
93
93
  "confluence": load_requirements("requirements/connectors/confluence.in"),
94
+ "couchbase": load_requirements("requirements/connectors/couchbase.in"),
94
95
  "delta-table": load_requirements("requirements/connectors/delta-table.in"),
95
96
  "discord": load_requirements("requirements/connectors/discord.in"),
96
97
  "dropbox": load_requirements("requirements/connectors/dropbox.in"),
@@ -121,6 +122,7 @@ connectors_reqs = {
121
122
  "weaviate": load_requirements("requirements/connectors/weaviate.in"),
122
123
  "databricks-volumes": load_requirements("requirements/connectors/databricks-volumes.in"),
123
124
  "singlestore": load_requirements("requirements/connectors/singlestore.in"),
125
+ "vectara": load_requirements("requirements/connectors/vectara.in"),
124
126
  }
125
127
 
126
128
  embed_reqs = {
@@ -150,7 +152,9 @@ docs_reqs = {
150
152
  "xlsx": xlsx_reqs,
151
153
  }
152
154
 
153
- extras_require = {}
155
+ extras_require = {
156
+ "remote": load_requirements("requirements/remote/client.in"),
157
+ }
154
158
  for d in [docs_reqs, connectors_reqs, embed_reqs]:
155
159
  extras_require.update(d)
156
160
 
@@ -0,0 +1,36 @@
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+ from unstructured.chunking import dispatch
5
+ from unstructured.documents.elements import assign_and_map_hash_ids
6
+ from unstructured.partition.auto import partition
7
+
8
+ from unstructured_ingest.utils.chunking import (
9
+ assign_and_map_hash_ids as new_assign_and_map_hash_ids,
10
+ )
11
+
12
+ test_file_path = Path(__file__).resolve()
13
+ project_root = test_file_path.parent.parent
14
+ docs_path = project_root / "example-docs"
15
+
16
+
17
+ @pytest.mark.parametrize(
18
+ "chunking_strategy",
19
+ ["basic", "by_title"],
20
+ )
21
+ def test_assign_and_map_hash_ids(chunking_strategy):
22
+ # Make sure the new logic working on dict content matches the
23
+ # results if using the unstructured version
24
+ file_path = docs_path / "book-war-and-peace-1p.txt"
25
+ elements = partition(filename=str(file_path.resolve()), strategy="fast")
26
+ chunked_elements = dispatch.chunk(elements=elements, chunking_strategy=chunking_strategy)
27
+ chunked_elements_copy = chunked_elements.copy()
28
+
29
+ hashed_chunked_elements = assign_and_map_hash_ids(chunked_elements)
30
+ og_chunked_elements_dicts = [e.to_dict() for e in hashed_chunked_elements]
31
+
32
+ new_chunked_elements_dicts = [e.to_dict() for e in chunked_elements_copy]
33
+ new_chunked_elements_dicts = new_assign_and_map_hash_ids(new_chunked_elements_dicts)
34
+
35
+ for e1, e2 in zip(og_chunked_elements_dicts, new_chunked_elements_dicts):
36
+ assert e1 == e2
@@ -90,7 +90,7 @@ class ExampleIngestDoc(BaseSingleIngestDoc):
90
90
  pass
91
91
 
92
92
 
93
- @pytest.fixture()
93
+ @pytest.fixture
94
94
  def partition_test_results():
95
95
  # Reusable partition test results, calculated only once
96
96
  result = partition(
@@ -107,7 +107,7 @@ def partition_test_results():
107
107
  return result
108
108
 
109
109
 
110
- @pytest.fixture()
110
+ @pytest.fixture
111
111
  def partition_file_test_results(partition_test_results):
112
112
  # Reusable partition_file test results, calculated only once
113
113
  return elements_to_dicts(partition_test_results)
@@ -0,0 +1,82 @@
1
+ import json
2
+ from typing import Any
3
+
4
+ from pydantic import BaseModel, Field, Secret, SecretStr
5
+ from pydantic.types import _SecretBase
6
+
7
+ from unstructured_ingest.v2.utils import serialize_base_model, serialize_base_model_json
8
+
9
+
10
+ class MockChildBaseModel(BaseModel):
11
+ child_secret_str: SecretStr
12
+ child_secret_float: Secret[float]
13
+ child_not_secret_dict: dict[str, Any] = Field(default_factory=dict)
14
+
15
+
16
+ class MockBaseModel(BaseModel):
17
+ secret_str: SecretStr
18
+ not_secret_bool: bool
19
+ secret_child_base: Secret[MockChildBaseModel]
20
+ not_secret_list: list[int] = Field(default_factory=list)
21
+
22
+
23
+ model = MockBaseModel(
24
+ secret_str="secret string",
25
+ not_secret_bool=False,
26
+ secret_child_base=MockChildBaseModel(
27
+ child_secret_str="child secret string",
28
+ child_secret_float=3.14,
29
+ child_not_secret_dict={"key": "value"},
30
+ ),
31
+ not_secret_list=[1, 2, 3],
32
+ )
33
+
34
+
35
+ def test_serialize_base_model():
36
+
37
+ serialized_dict = model.dict()
38
+ assert isinstance(serialized_dict["secret_str"], _SecretBase)
39
+ assert isinstance(serialized_dict["secret_child_base"], _SecretBase)
40
+
41
+ serialized_dict_w_secrets = serialize_base_model(model=model)
42
+ assert not isinstance(serialized_dict_w_secrets["secret_str"], _SecretBase)
43
+ assert not isinstance(serialized_dict_w_secrets["secret_child_base"], _SecretBase)
44
+
45
+ expected_dict = {
46
+ "secret_str": "secret string",
47
+ "not_secret_bool": False,
48
+ "secret_child_base": {
49
+ "child_secret_str": "child secret string",
50
+ "child_secret_float": 3.14,
51
+ "child_not_secret_dict": {"key": "value"},
52
+ },
53
+ "not_secret_list": [1, 2, 3],
54
+ }
55
+
56
+ assert serialized_dict_w_secrets == expected_dict
57
+
58
+
59
+ def test_serialize_base_model_json():
60
+ serialized_json = model.json()
61
+ serialized_dict = json.loads(serialized_json)
62
+ expected_dict = {
63
+ "secret_str": "**********",
64
+ "not_secret_bool": False,
65
+ "secret_child_base": "**********",
66
+ "not_secret_list": [1, 2, 3],
67
+ }
68
+ assert expected_dict == serialized_dict
69
+
70
+ serialized_json_w_secrets = serialize_base_model_json(model=model)
71
+ serialized_dict_w_secrets = json.loads(serialized_json_w_secrets)
72
+ expected_dict_w_secrets = {
73
+ "secret_str": "secret string",
74
+ "not_secret_bool": False,
75
+ "secret_child_base": {
76
+ "child_secret_str": "child secret string",
77
+ "child_secret_float": 3.14,
78
+ "child_not_secret_dict": {"key": "value"},
79
+ },
80
+ "not_secret_list": [1, 2, 3],
81
+ }
82
+ assert expected_dict_w_secrets == serialized_dict_w_secrets
@@ -0,0 +1 @@
1
+ __version__ = "0.0.4" # pragma: no cover
@@ -1,16 +1,21 @@
1
+ from typing import TYPE_CHECKING
2
+
1
3
  import click
2
4
 
3
5
  from unstructured_ingest.cli import dest, src
4
6
  from unstructured_ingest.v2.cli.cmds import dest as dest_v2
5
7
  from unstructured_ingest.v2.cli.cmds import src as src_v2
6
8
 
9
+ if TYPE_CHECKING:
10
+ from click import Command
11
+
7
12
 
8
13
  @click.group()
9
14
  def ingest():
10
15
  pass
11
16
 
12
17
 
13
- def get_cmd() -> click.Command:
18
+ def get_cmd() -> "Command":
14
19
  """Construct and return a Click command object representing the main command for the CLI.
15
20
 
16
21
  This function adds all dest_subcommand(s) to each src_subcommand, and adds all of those
@@ -7,8 +7,8 @@ from unstructured_ingest.cli.base.src import BaseSrcCmd
7
7
  from unstructured_ingest.cli.cmds.fsspec.sftp import get_base_src_cmd as sftp_base_src_cmd
8
8
 
9
9
  from .airtable import get_base_src_cmd as airtable_base_src_cmd
10
- from .astra import get_base_dest_cmd as astra_base_dest_cmd
11
- from .astra import get_base_src_cmd as astra_base_src_cmd
10
+ from .astradb import get_base_dest_cmd as astradb_base_dest_cmd
11
+ from .astradb import get_base_src_cmd as astradb_base_src_cmd
12
12
  from .azure_cognitive_search import get_base_dest_cmd as azure_cognitive_search_base_dest_cmd
13
13
  from .biomed import get_base_src_cmd as biomed_base_src_cmd
14
14
  from .chroma import get_base_dest_cmd as chroma_base_dest_cmd
@@ -63,7 +63,7 @@ if t.TYPE_CHECKING:
63
63
 
64
64
  base_src_cmd_fns: t.List[t.Callable[[], BaseSrcCmd]] = [
65
65
  airtable_base_src_cmd,
66
- astra_base_src_cmd,
66
+ astradb_base_src_cmd,
67
67
  azure_base_src_cmd,
68
68
  biomed_base_src_cmd,
69
69
  box_base_src_cmd,
@@ -106,7 +106,7 @@ if src_duplicates:
106
106
  )
107
107
 
108
108
  base_dest_cmd_fns: t.List[t.Callable[[], "BaseDestCmd"]] = [
109
- astra_base_dest_cmd,
109
+ astradb_base_dest_cmd,
110
110
  azure_base_dest_cmd,
111
111
  box_base_dest_cmd,
112
112
  chroma_base_dest_cmd,
@@ -4,11 +4,11 @@ from dataclasses import dataclass
4
4
  import click
5
5
 
6
6
  from unstructured_ingest.cli.interfaces import CliConfig, Dict
7
- from unstructured_ingest.connector.astra import AstraWriteConfig, SimpleAstraConfig
7
+ from unstructured_ingest.connector.astradb import AstraDBWriteConfig, SimpleAstraDBConfig
8
8
 
9
9
 
10
10
  @dataclass
11
- class AstraCliConfig(SimpleAstraConfig, CliConfig):
11
+ class AstraDBCliConfig(SimpleAstraDBConfig, CliConfig):
12
12
  @staticmethod
13
13
  def get_cli_options() -> t.List[click.Option]:
14
14
  options = [
@@ -48,7 +48,7 @@ class AstraCliConfig(SimpleAstraConfig, CliConfig):
48
48
 
49
49
 
50
50
  @dataclass
51
- class AstraCliWriteConfig(AstraWriteConfig, CliConfig):
51
+ class AstraDBCliWriteConfig(AstraDBWriteConfig, CliConfig):
52
52
  @staticmethod
53
53
  def get_cli_options() -> t.List[click.Option]:
54
54
  options = [
@@ -81,8 +81,8 @@ def get_base_src_cmd():
81
81
  from unstructured_ingest.cli.base.src import BaseSrcCmd
82
82
 
83
83
  cmd_cls = BaseSrcCmd(
84
- cmd_name="astra",
85
- cli_config=AstraCliConfig,
84
+ cmd_name="astradb",
85
+ cli_config=AstraDBCliConfig,
86
86
  )
87
87
  return cmd_cls
88
88
 
@@ -91,9 +91,9 @@ def get_base_dest_cmd():
91
91
  from unstructured_ingest.cli.base.dest import BaseDestCmd
92
92
 
93
93
  cmd_cls = BaseDestCmd(
94
- cmd_name="astra",
95
- cli_config=AstraCliConfig,
96
- additional_cli_options=[AstraCliWriteConfig],
97
- write_config=AstraWriteConfig,
94
+ cmd_name="astradb",
95
+ cli_config=AstraDBCliConfig,
96
+ additional_cli_options=[AstraDBCliWriteConfig],
97
+ write_config=AstraDBWriteConfig,
98
98
  )
99
99
  return cmd_cls
@@ -11,7 +11,6 @@ from pathlib import Path
11
11
  import click
12
12
  from dataclasses_json.core import Json
13
13
  from typing_extensions import Self
14
- from unstructured.chunking import CHUNK_MAX_CHARS_DEFAULT, CHUNK_MULTI_PAGE_DEFAULT
15
14
 
16
15
  from unstructured_ingest.interfaces import (
17
16
  BaseConfig,
@@ -25,6 +24,9 @@ from unstructured_ingest.interfaces import (
25
24
  RetryStrategyConfig,
26
25
  )
27
26
 
27
+ CHUNK_MAX_CHARS_DEFAULT: int = 500
28
+ CHUNK_MULTI_PAGE_DEFAULT: bool = True
29
+
28
30
 
29
31
  class Dict(click.ParamType):
30
32
  name = "dict"
@@ -412,14 +414,19 @@ class CliFilesStorageConfig(FileStorageConfig, CliMixin):
412
414
  class CliEmbeddingConfig(EmbeddingConfig, CliMixin):
413
415
  @staticmethod
414
416
  def get_cli_options() -> t.List[click.Option]:
415
- from unstructured.embed import EMBEDDING_PROVIDER_TO_CLASS_MAP
416
-
417
+ embed_providers = [
418
+ "langchain-openai",
419
+ "langchain-huggingface",
420
+ "langchain-aws-bedrock",
421
+ "langchain-vertexai",
422
+ "langchain-voyageai",
423
+ "octoai",
424
+ ]
417
425
  options = [
418
426
  click.Option(
419
427
  ["--embedding-provider"],
420
- help="Type of the embedding class to be used. Can be one of: "
421
- f"{list(EMBEDDING_PROVIDER_TO_CLASS_MAP)}",
422
- type=click.Choice(list(EMBEDDING_PROVIDER_TO_CLASS_MAP)),
428
+ help="Type of the embedding class to be used.",
429
+ type=click.Choice(embed_providers),
423
430
  ),
424
431
  click.Option(
425
432
  ["--embedding-api-key"],
@@ -30,23 +30,23 @@ NON_INDEXED_FIELDS = ["metadata._node_content", "content"]
30
30
 
31
31
 
32
32
  @dataclass
33
- class AstraAccessConfig(AccessConfig):
33
+ class AstraDBAccessConfig(AccessConfig):
34
34
  token: str = enhanced_field(sensitive=True)
35
35
  api_endpoint: str = enhanced_field(sensitive=True)
36
36
 
37
37
 
38
38
  @dataclass
39
- class SimpleAstraConfig(BaseConnectorConfig):
40
- access_config: AstraAccessConfig
39
+ class SimpleAstraDBConfig(BaseConnectorConfig):
40
+ access_config: AstraDBAccessConfig
41
41
  collection_name: str
42
42
  namespace: t.Optional[str] = None
43
43
 
44
44
 
45
45
  @dataclass
46
- class AstraIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
47
- connector_config: SimpleAstraConfig
46
+ class AstraDBIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
47
+ connector_config: SimpleAstraDBConfig
48
48
  metadata: t.Dict[str, str] = field(default_factory=dict)
49
- registry_name: str = "astra"
49
+ registry_name: str = "astradb"
50
50
 
51
51
  @property
52
52
  def filename(self):
@@ -75,7 +75,7 @@ class AstraIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
75
75
  )
76
76
 
77
77
  @SourceConnectionError.wrap
78
- @requires_dependencies(["astrapy"], extras="astra")
78
+ @requires_dependencies(["astrapy"], extras="astradb")
79
79
  @BaseSingleIngestDoc.skip_if_file_exists
80
80
  def get_file(self):
81
81
  self.filename.parent.mkdir(parents=True, exist_ok=True)
@@ -89,19 +89,19 @@ class AstraIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
89
89
 
90
90
 
91
91
  @dataclass
92
- class AstraSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
93
- connector_config: SimpleAstraConfig
92
+ class AstraDBSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
93
+ connector_config: SimpleAstraDBConfig
94
94
  _astra_db: t.Optional["AstraDB"] = field(init=False, default=None)
95
95
  _astra_db_collection: t.Optional["AstraDBCollection"] = field(init=False, default=None)
96
96
 
97
97
  @property
98
- @requires_dependencies(["astrapy"], extras="astra")
98
+ @requires_dependencies(["astrapy"], extras="astradb")
99
99
  def astra_db_collection(self) -> "AstraDBCollection":
100
100
  if self._astra_db_collection is None:
101
101
  from astrapy.db import AstraDB
102
102
 
103
103
  # Build the Astra DB object.
104
- # caller_name/version for AstraDB tracking
104
+ # caller_name/version for Astra DB tracking
105
105
  self._astra_db = AstraDB(
106
106
  api_endpoint=self.connector_config.access_config.api_endpoint,
107
107
  token=self.connector_config.access_config.token,
@@ -116,12 +116,12 @@ class AstraSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
116
116
  )
117
117
  return self._astra_db_collection # type: ignore
118
118
 
119
- @requires_dependencies(["astrapy"], extras="astra")
119
+ @requires_dependencies(["astrapy"], extras="astradb")
120
120
  @SourceConnectionError.wrap # type: ignore
121
121
  def initialize(self):
122
122
  _ = self.astra_db_collection
123
123
 
124
- @requires_dependencies(["astrapy"], extras="astra")
124
+ @requires_dependencies(["astrapy"], extras="astradb")
125
125
  def check_connection(self):
126
126
  try:
127
127
  _ = self.astra_db_collection
@@ -129,14 +129,14 @@ class AstraSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
129
129
  logger.error(f"Failed to validate connection {e}", exc_info=True)
130
130
  raise SourceConnectionError(f"failed to validate connection: {e}")
131
131
 
132
- @requires_dependencies(["astrapy"], extras="astra")
132
+ @requires_dependencies(["astrapy"], extras="astradb")
133
133
  def get_ingest_docs(self): # type: ignore
134
134
  # Perform the find operation
135
- astra_docs = list(self.astra_db_collection.paginated_find())
135
+ astra_db_docs = list(self.astra_db_collection.paginated_find())
136
136
 
137
137
  doc_list = []
138
- for record in astra_docs:
139
- doc = AstraIngestDoc(
138
+ for record in astra_db_docs:
139
+ doc = AstraDBIngestDoc(
140
140
  connector_config=self.connector_config,
141
141
  processor_config=self.processor_config,
142
142
  read_config=self.read_config,
@@ -151,16 +151,16 @@ class AstraSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
151
151
 
152
152
 
153
153
  @dataclass
154
- class AstraWriteConfig(WriteConfig):
154
+ class AstraDBWriteConfig(WriteConfig):
155
155
  embedding_dimension: int
156
156
  requested_indexing_policy: t.Optional[t.Dict[str, t.Any]] = None
157
157
  batch_size: int = 20
158
158
 
159
159
 
160
160
  @dataclass
161
- class AstraDestinationConnector(BaseDestinationConnector):
162
- write_config: AstraWriteConfig
163
- connector_config: SimpleAstraConfig
161
+ class AstraDBDestinationConnector(BaseDestinationConnector):
162
+ write_config: AstraDBWriteConfig
163
+ connector_config: SimpleAstraDBConfig
164
164
  _astra_db: t.Optional["AstraDB"] = field(init=False, default=None)
165
165
  _astra_db_collection: t.Optional["AstraDBCollection"] = field(init=False, default=None)
166
166
 
@@ -179,7 +179,7 @@ class AstraDestinationConnector(BaseDestinationConnector):
179
179
  return _asdict(self_cp, **kwargs)
180
180
 
181
181
  @property
182
- @requires_dependencies(["astrapy"], extras="astra")
182
+ @requires_dependencies(["astrapy"], extras="astradb")
183
183
  def astra_db_collection(self) -> "AstraDBCollection":
184
184
  if self._astra_db_collection is None:
185
185
  from astrapy.db import AstraDB
@@ -187,11 +187,11 @@ class AstraDestinationConnector(BaseDestinationConnector):
187
187
  collection_name = self.connector_config.collection_name
188
188
  embedding_dimension = self.write_config.embedding_dimension
189
189
 
190
- # If the user has requested an indexing policy, pass it to the AstraDB
190
+ # If the user has requested an indexing policy, pass it to the Astra DB
191
191
  requested_indexing_policy = self.write_config.requested_indexing_policy
192
192
  options = {"indexing": requested_indexing_policy} if requested_indexing_policy else None
193
193
 
194
- # caller_name/version for AstraDB tracking
194
+ # caller_name/version for Astra DB tracking
195
195
  self._astra_db = AstraDB(
196
196
  api_endpoint=self.connector_config.access_config.api_endpoint,
197
197
  token=self.connector_config.access_config.token,
@@ -208,12 +208,12 @@ class AstraDestinationConnector(BaseDestinationConnector):
208
208
  )
209
209
  return self._astra_db_collection
210
210
 
211
- @requires_dependencies(["astrapy"], extras="astra")
211
+ @requires_dependencies(["astrapy"], extras="astradb")
212
212
  @DestinationConnectionError.wrap
213
213
  def initialize(self):
214
214
  _ = self.astra_db_collection
215
215
 
216
- @requires_dependencies(["astrapy"], extras="astra")
216
+ @requires_dependencies(["astrapy"], extras="astradb")
217
217
  def check_connection(self):
218
218
  try:
219
219
  _ = self.astra_db_collection
@@ -222,11 +222,11 @@ class AstraDestinationConnector(BaseDestinationConnector):
222
222
  raise DestinationConnectionError(f"failed to validate connection: {e}")
223
223
 
224
224
  def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
225
- logger.info(f"Inserting / updating {len(elements_dict)} documents to Astra.")
225
+ logger.info(f"Inserting / updating {len(elements_dict)} documents to Astra DB.")
226
226
 
227
- astra_batch_size = self.write_config.batch_size
227
+ astra_db_batch_size = self.write_config.batch_size
228
228
 
229
- for batch in batch_generator(elements_dict, astra_batch_size):
229
+ for batch in batch_generator(elements_dict, astra_db_batch_size):
230
230
  self._astra_db_collection.insert_many(batch)
231
231
 
232
232
  def normalize_dict(self, element_dict: dict) -> dict:
@@ -5,9 +5,6 @@ from dataclasses import dataclass
5
5
  from ftplib import FTP, error_perm
6
6
  from pathlib import Path
7
7
 
8
- import requests
9
- from requests.adapters import HTTPAdapter
10
-
11
8
  from unstructured_ingest.error import SourceConnectionError, SourceConnectionNetworkError
12
9
  from unstructured_ingest.interfaces import (
13
10
  BaseConnectorConfig,
@@ -20,6 +17,10 @@ from unstructured_ingest.logger import logger
20
17
  from unstructured_ingest.utils.data_prep import (
21
18
  validate_date_args,
22
19
  )
20
+ from unstructured_ingest.utils.dep_check import requires_dependencies
21
+
22
+ if t.TYPE_CHECKING:
23
+ from requests import Response, Session
23
24
 
24
25
  DOMAIN = "ftp.ncbi.nlm.nih.gov"
25
26
  FTP_DOMAIN = f"ftp://{DOMAIN}"
@@ -165,8 +166,11 @@ class BiomedSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
165
166
 
166
167
  return endpoint_url
167
168
 
169
+ @requires_dependencies(["requests"], extras="biomed")
168
170
  def _list_objects_api(self) -> t.List[BiomedFileMeta]:
169
171
  from bs4 import BeautifulSoup
172
+ from requests import Session
173
+ from requests.adapters import HTTPAdapter
170
174
 
171
175
  def urls_to_metadata(urls):
172
176
  files = []
@@ -193,7 +197,7 @@ class BiomedSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
193
197
  endpoint_url = self.get_base_endpoints_url()
194
198
 
195
199
  while endpoint_url:
196
- session = requests.Session()
200
+ session = Session()
197
201
  adapter = HTTPAdapter()
198
202
  session.mount("http://", adapter)
199
203
  session.mount("https://", adapter)
@@ -213,7 +217,7 @@ class BiomedSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
213
217
  return files
214
218
 
215
219
  @SourceConnectionNetworkError.wrap
216
- def _get_request(self, session: requests.Session, endpoint_url: str) -> requests.Response:
220
+ def _get_request(self, session: "Session", endpoint_url: str) -> "Response":
217
221
  return session.get(endpoint_url, timeout=self.connector_config.max_request_time)
218
222
 
219
223
  def _list_objects(self) -> t.List[BiomedFileMeta]:
@@ -293,7 +297,10 @@ class BiomedSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
293
297
  def initialize(self):
294
298
  pass
295
299
 
300
+ @requires_dependencies(["requests"], extras="biomed")
296
301
  def check_connection(self):
302
+ import requests
303
+
297
304
  resp = requests.head(self.get_base_endpoints_url())
298
305
  try:
299
306
  resp.raise_for_status()
@@ -4,8 +4,6 @@ from dataclasses import dataclass, field
4
4
  from datetime import datetime
5
5
  from pathlib import Path
6
6
 
7
- import requests
8
-
9
7
  from unstructured_ingest.enhanced_dataclass import enhanced_field
10
8
  from unstructured_ingest.error import SourceConnectionError, SourceConnectionNetworkError
11
9
  from unstructured_ingest.interfaces import (
@@ -208,8 +206,10 @@ class ConfluenceSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector
208
206
  )
209
207
  return self._confluence
210
208
 
211
- @requires_dependencies(["atlassian"], extras="Confluence")
209
+ @requires_dependencies(["atlassian", "requests"], extras="Confluence")
212
210
  def check_connection(self):
211
+ import requests
212
+
213
213
  url = "rest/api/space"
214
214
  try:
215
215
  self.confluence.request(method="HEAD", path=url)
@@ -3,8 +3,6 @@ from dataclasses import dataclass
3
3
  from datetime import datetime
4
4
  from urllib.parse import urlparse
5
5
 
6
- import requests
7
-
8
6
  from unstructured_ingest.connector.git import (
9
7
  GitIngestDoc,
10
8
  GitSourceConnector,
@@ -71,7 +69,10 @@ class GitHubIngestDoc(GitIngestDoc):
71
69
  return content_file
72
70
 
73
71
  @SourceConnectionNetworkError.wrap
72
+ @requires_dependencies(["requests"], extras="github")
74
73
  def _fetch_content(self, content_file):
74
+ import requests
75
+
75
76
  contents = b""
76
77
  if (
77
78
  not content_file.content # type: ignore
@@ -7,8 +7,6 @@ from datetime import datetime
7
7
  from mimetypes import guess_extension
8
8
  from pathlib import Path
9
9
 
10
- from unstructured.file_utils.google_filetype import GOOGLE_DRIVE_EXPORT_TYPES
11
-
12
10
  from unstructured_ingest.enhanced_dataclass import enhanced_field
13
11
  from unstructured_ingest.error import SourceConnectionError, SourceConnectionNetworkError
14
12
  from unstructured_ingest.interfaces import (
@@ -25,6 +23,7 @@ from unstructured_ingest.interfaces import (
25
23
  )
26
24
  from unstructured_ingest.logger import logger
27
25
  from unstructured_ingest.utils.dep_check import requires_dependencies
26
+ from unstructured_ingest.utils.google_filetype import GOOGLE_DRIVE_EXPORT_TYPES
28
27
  from unstructured_ingest.utils.string_and_date_utils import json_to_dict
29
28
 
30
29
  if t.TYPE_CHECKING:
@@ -3,8 +3,7 @@ import typing as t
3
3
  from dataclasses import dataclass, field
4
4
  from pathlib import Path
5
5
 
6
- from unstructured.__version__ import __version__ as unstructured_version
7
-
6
+ from unstructured_ingest.__version__ import __version__ as unstructured_version
8
7
  from unstructured_ingest.enhanced_dataclass import enhanced_field
9
8
  from unstructured_ingest.enhanced_dataclass.core import _asdict
10
9
  from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError, WriteError