unstructured-ingest 0.0.7__tar.gz → 0.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (351) hide show
  1. {unstructured-ingest-0.0.7/unstructured_ingest.egg-info → unstructured-ingest-0.0.9}/PKG-INFO +1 -1
  2. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/setup.py +2 -2
  3. unstructured-ingest-0.0.9/unstructured_ingest/__version__.py +1 -0
  4. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/cli/utils/model_conversion.py +17 -1
  5. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/interfaces/file_data.py +1 -0
  6. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/steps/download.py +4 -7
  7. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/steps/uncompress.py +3 -16
  8. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/chroma.py +7 -6
  9. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/google_drive.py +6 -5
  10. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/milvus.py +30 -13
  11. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/opensearch.py +29 -28
  12. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/utils.py +11 -1
  13. unstructured-ingest-0.0.9/unstructured_ingest/v2/processes/uncompress.py +61 -0
  14. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9/unstructured_ingest.egg-info}/PKG-INFO +1 -1
  15. unstructured-ingest-0.0.7/unstructured_ingest/__version__.py +0 -1
  16. unstructured-ingest-0.0.7/unstructured_ingest/v2/processes/uncompress.py +0 -43
  17. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/LICENSE.md +0 -0
  18. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/README.md +0 -0
  19. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/pyproject.toml +0 -0
  20. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/setup.cfg +0 -0
  21. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/test/test_chunking_utils.py +0 -0
  22. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/test/test_error.py +0 -0
  23. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/test/test_interfaces.py +0 -0
  24. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/test/test_logger.py +0 -0
  25. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/test/test_utils.py +0 -0
  26. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/test/test_utils_v2.py +0 -0
  27. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/__init__.py +0 -0
  28. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/__init__.py +0 -0
  29. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/base/__init__.py +0 -0
  30. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/base/cmd.py +0 -0
  31. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/base/dest.py +0 -0
  32. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/base/src.py +0 -0
  33. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cli.py +0 -0
  34. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmd_factory.py +0 -0
  35. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/__init__.py +0 -0
  36. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/airtable.py +0 -0
  37. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/astradb.py +0 -0
  38. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/azure_cognitive_search.py +0 -0
  39. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/biomed.py +0 -0
  40. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/chroma.py +0 -0
  41. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/clarifai.py +0 -0
  42. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/confluence.py +0 -0
  43. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/databricks_volumes.py +0 -0
  44. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/delta_table.py +0 -0
  45. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/discord.py +0 -0
  46. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/elasticsearch.py +0 -0
  47. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
  48. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/fsspec/azure.py +0 -0
  49. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/fsspec/box.py +0 -0
  50. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -0
  51. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -0
  52. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -0
  53. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/fsspec/s3.py +0 -0
  54. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -0
  55. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/github.py +0 -0
  56. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/gitlab.py +0 -0
  57. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/google_drive.py +0 -0
  58. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/hubspot.py +0 -0
  59. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/jira.py +0 -0
  60. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/kafka.py +0 -0
  61. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/local.py +0 -0
  62. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/mongodb.py +0 -0
  63. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/notion.py +0 -0
  64. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/onedrive.py +0 -0
  65. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/opensearch.py +0 -0
  66. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/outlook.py +0 -0
  67. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/pinecone.py +0 -0
  68. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/qdrant.py +0 -0
  69. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/reddit.py +0 -0
  70. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/salesforce.py +0 -0
  71. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/sharepoint.py +0 -0
  72. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/slack.py +0 -0
  73. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/sql.py +0 -0
  74. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/vectara.py +0 -0
  75. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/weaviate.py +0 -0
  76. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/cmds/wikipedia.py +0 -0
  77. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/common.py +0 -0
  78. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/interfaces.py +0 -0
  79. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/cli/utils.py +0 -0
  80. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/__init__.py +0 -0
  81. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/airtable.py +0 -0
  82. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/astradb.py +0 -0
  83. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/azure_cognitive_search.py +0 -0
  84. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/biomed.py +0 -0
  85. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/chroma.py +0 -0
  86. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/clarifai.py +0 -0
  87. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/confluence.py +0 -0
  88. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/databricks_volumes.py +0 -0
  89. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/delta_table.py +0 -0
  90. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/discord.py +0 -0
  91. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/elasticsearch.py +0 -0
  92. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/fsspec/__init__.py +0 -0
  93. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/fsspec/azure.py +0 -0
  94. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/fsspec/box.py +0 -0
  95. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/fsspec/dropbox.py +0 -0
  96. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/fsspec/fsspec.py +0 -0
  97. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/fsspec/gcs.py +0 -0
  98. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/fsspec/s3.py +0 -0
  99. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/fsspec/sftp.py +0 -0
  100. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/git.py +0 -0
  101. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/github.py +0 -0
  102. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/gitlab.py +0 -0
  103. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/google_drive.py +0 -0
  104. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/hubspot.py +0 -0
  105. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/jira.py +0 -0
  106. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/kafka.py +0 -0
  107. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/local.py +0 -0
  108. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/mongodb.py +0 -0
  109. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/__init__.py +0 -0
  110. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/client.py +0 -0
  111. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/connector.py +0 -0
  112. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/helpers.py +0 -0
  113. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/interfaces.py +0 -0
  114. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/__init__.py +0 -0
  115. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/block.py +0 -0
  116. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/__init__.py +0 -0
  117. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -0
  118. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +0 -0
  119. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +0 -0
  120. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/callout.py +0 -0
  121. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -0
  122. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/child_page.py +0 -0
  123. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/code.py +0 -0
  124. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -0
  125. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/divider.py +0 -0
  126. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/embed.py +0 -0
  127. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/equation.py +0 -0
  128. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/file.py +0 -0
  129. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/heading.py +0 -0
  130. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/image.py +0 -0
  131. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -0
  132. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/link_to_page.py +0 -0
  133. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -0
  134. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/paragraph.py +0 -0
  135. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/pdf.py +0 -0
  136. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/quote.py +0 -0
  137. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -0
  138. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/table.py +0 -0
  139. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -0
  140. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/template.py +0 -0
  141. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/todo.py +0 -0
  142. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/toggle.py +0 -0
  143. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -0
  144. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/blocks/video.py +0 -0
  145. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database.py +0 -0
  146. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -0
  147. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -0
  148. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/created_by.py +0 -0
  149. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/created_time.py +0 -0
  150. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/date.py +0 -0
  151. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/email.py +0 -0
  152. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/files.py +0 -0
  153. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -0
  154. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +0 -0
  155. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -0
  156. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -0
  157. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/number.py +0 -0
  158. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/people.py +0 -0
  159. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -0
  160. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -0
  161. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/rich_text.py +0 -0
  162. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/rollup.py +0 -0
  163. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/select.py +0 -0
  164. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/status.py +0 -0
  165. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/title.py +0 -0
  166. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -0
  167. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/url.py +0 -0
  168. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/database_properties/verification.py +0 -0
  169. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/date.py +0 -0
  170. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/file.py +0 -0
  171. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/page.py +0 -0
  172. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/parent.py +0 -0
  173. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/rich_text.py +0 -0
  174. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/notion/types/user.py +0 -0
  175. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/onedrive.py +0 -0
  176. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/opensearch.py +0 -0
  177. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/outlook.py +0 -0
  178. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/pinecone.py +0 -0
  179. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/qdrant.py +0 -0
  180. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/reddit.py +0 -0
  181. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/registry.py +0 -0
  182. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/salesforce.py +0 -0
  183. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/sharepoint.py +0 -0
  184. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/slack.py +0 -0
  185. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/sql.py +0 -0
  186. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/vectara.py +0 -0
  187. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/weaviate.py +0 -0
  188. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/connector/wikipedia.py +0 -0
  189. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/enhanced_dataclass/__init__.py +0 -0
  190. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/enhanced_dataclass/core.py +0 -0
  191. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -0
  192. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -0
  193. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/error.py +0 -0
  194. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/evaluate.py +0 -0
  195. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/ingest_backoff/__init__.py +0 -0
  196. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/ingest_backoff/_common.py +0 -0
  197. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/ingest_backoff/_wrapper.py +0 -0
  198. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/interfaces.py +0 -0
  199. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/logger.py +0 -0
  200. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/main.py +0 -0
  201. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/pipeline/__init__.py +0 -0
  202. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/pipeline/copy.py +0 -0
  203. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/pipeline/doc_factory.py +0 -0
  204. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/pipeline/interfaces.py +0 -0
  205. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/pipeline/partition.py +0 -0
  206. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/pipeline/permissions.py +0 -0
  207. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/pipeline/pipeline.py +0 -0
  208. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/pipeline/reformat/__init__.py +0 -0
  209. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/pipeline/reformat/chunking.py +0 -0
  210. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/pipeline/reformat/embedding.py +0 -0
  211. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/pipeline/source.py +0 -0
  212. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/pipeline/utils.py +0 -0
  213. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/pipeline/write.py +0 -0
  214. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/processor.py +0 -0
  215. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/__init__.py +0 -0
  216. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/airtable.py +0 -0
  217. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/astradb.py +0 -0
  218. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/base_runner.py +0 -0
  219. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/biomed.py +0 -0
  220. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/confluence.py +0 -0
  221. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/delta_table.py +0 -0
  222. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/discord.py +0 -0
  223. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/elasticsearch.py +0 -0
  224. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/fsspec/__init__.py +0 -0
  225. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/fsspec/azure.py +0 -0
  226. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/fsspec/box.py +0 -0
  227. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/fsspec/dropbox.py +0 -0
  228. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/fsspec/fsspec.py +0 -0
  229. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/fsspec/gcs.py +0 -0
  230. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/fsspec/s3.py +0 -0
  231. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/fsspec/sftp.py +0 -0
  232. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/github.py +0 -0
  233. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/gitlab.py +0 -0
  234. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/google_drive.py +0 -0
  235. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/hubspot.py +0 -0
  236. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/jira.py +0 -0
  237. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/kafka.py +0 -0
  238. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/local.py +0 -0
  239. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/mongodb.py +0 -0
  240. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/notion.py +0 -0
  241. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/onedrive.py +0 -0
  242. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/opensearch.py +0 -0
  243. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/outlook.py +0 -0
  244. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/reddit.py +0 -0
  245. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/salesforce.py +0 -0
  246. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/sharepoint.py +0 -0
  247. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/slack.py +0 -0
  248. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/utils.py +0 -0
  249. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/wikipedia.py +0 -0
  250. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/__init__.py +0 -0
  251. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/astradb.py +0 -0
  252. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/azure_cognitive_search.py +0 -0
  253. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/base_writer.py +0 -0
  254. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/chroma.py +0 -0
  255. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/clarifai.py +0 -0
  256. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/databricks_volumes.py +0 -0
  257. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/delta_table.py +0 -0
  258. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/elasticsearch.py +0 -0
  259. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
  260. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/fsspec/azure.py +0 -0
  261. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/fsspec/box.py +0 -0
  262. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -0
  263. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/fsspec/gcs.py +0 -0
  264. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/fsspec/s3.py +0 -0
  265. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/kafka.py +0 -0
  266. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/mongodb.py +0 -0
  267. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/opensearch.py +0 -0
  268. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/pinecone.py +0 -0
  269. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/qdrant.py +0 -0
  270. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/sql.py +0 -0
  271. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/vectara.py +0 -0
  272. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/runner/writers/weaviate.py +0 -0
  273. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/utils/__init__.py +0 -0
  274. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/utils/chunking.py +0 -0
  275. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/utils/compression.py +0 -0
  276. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/utils/data_prep.py +0 -0
  277. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/utils/dep_check.py +0 -0
  278. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/utils/google_filetype.py +0 -0
  279. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
  280. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/utils/table.py +0 -0
  281. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/__init__.py +0 -0
  282. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/cli/__init__.py +0 -0
  283. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/cli/base/__init__.py +0 -0
  284. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/cli/base/cmd.py +0 -0
  285. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/cli/base/dest.py +0 -0
  286. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/cli/base/importer.py +0 -0
  287. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/cli/base/src.py +0 -0
  288. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/cli/cli.py +0 -0
  289. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/cli/cmds.py +0 -0
  290. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  291. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/cli/utils/click.py +0 -0
  292. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/interfaces/__init__.py +0 -0
  293. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/interfaces/connector.py +0 -0
  294. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/interfaces/downloader.py +0 -0
  295. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/interfaces/indexer.py +0 -0
  296. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/interfaces/process.py +0 -0
  297. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/interfaces/processor.py +0 -0
  298. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/interfaces/upload_stager.py +0 -0
  299. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/interfaces/uploader.py +0 -0
  300. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/logger.py +0 -0
  301. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/main.py +0 -0
  302. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/otel.py +0 -0
  303. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/__init__.py +0 -0
  304. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/interfaces.py +0 -0
  305. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/otel.py +0 -0
  306. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/pipeline.py +0 -0
  307. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  308. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/steps/chunk.py +0 -0
  309. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/steps/embed.py +0 -0
  310. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/steps/filter.py +0 -0
  311. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/steps/index.py +0 -0
  312. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/steps/partition.py +0 -0
  313. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/steps/stage.py +0 -0
  314. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/pipeline/steps/upload.py +0 -0
  315. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/__init__.py +0 -0
  316. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/chunker.py +0 -0
  317. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connector_registry.py +0 -0
  318. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/__init__.py +0 -0
  319. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/astradb.py +0 -0
  320. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +0 -0
  321. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/couchbase.py +0 -0
  322. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/databricks_volumes.py +0 -0
  323. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/elasticsearch.py +0 -0
  324. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +0 -0
  325. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/fsspec/azure.py +0 -0
  326. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/fsspec/box.py +0 -0
  327. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +0 -0
  328. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +0 -0
  329. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +0 -0
  330. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/fsspec/s3.py +0 -0
  331. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +0 -0
  332. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/fsspec/utils.py +0 -0
  333. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/kdbai.py +0 -0
  334. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/local.py +0 -0
  335. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/mongodb.py +0 -0
  336. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/onedrive.py +0 -0
  337. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/pinecone.py +0 -0
  338. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/salesforce.py +0 -0
  339. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/sharepoint.py +0 -0
  340. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/singlestore.py +0 -0
  341. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/sql.py +0 -0
  342. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/connectors/weaviate.py +0 -0
  343. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/embedder.py +0 -0
  344. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/filter.py +0 -0
  345. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/processes/partitioner.py +0 -0
  346. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest/v2/utils.py +0 -0
  347. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest.egg-info/SOURCES.txt +0 -0
  348. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest.egg-info/dependency_links.txt +0 -0
  349. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest.egg-info/entry_points.txt +0 -0
  350. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest.egg-info/requires.txt +275 -275
  351. {unstructured-ingest-0.0.7 → unstructured-ingest-0.0.9}/unstructured_ingest.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -191,6 +191,6 @@ setup(
191
191
  },
192
192
  install_requires=load_requirements("requirements/common/base.in"),
193
193
  extras_require=extras_require,
194
- package_dir={"unstructured": "unstructured"},
195
- package_data={"unstructured": ["nlp/*.txt", "py.typed"]},
194
+ package_dir={"unstructured_ingest": "unstructured_ingest"},
195
+ package_data={"unstructured_ingest": ["py.typed"]},
196
196
  )
@@ -0,0 +1 @@
1
+ __version__ = "0.0.9" # pragma: no cover
@@ -3,7 +3,18 @@ import datetime
3
3
  from collections import Counter
4
4
  from enum import EnumMeta
5
5
  from pathlib import Path
6
- from typing import Any, Callable, Literal, Optional, Type, TypedDict, Union, get_args, get_origin
6
+ from typing import (
7
+ Annotated,
8
+ Any,
9
+ Callable,
10
+ Literal,
11
+ Optional,
12
+ Type,
13
+ TypedDict,
14
+ Union,
15
+ get_args,
16
+ get_origin,
17
+ )
7
18
  from uuid import UUID
8
19
 
9
20
  import click
@@ -102,6 +113,11 @@ def get_type_from_annotation(field_type: Any) -> click.ParamType:
102
113
  if field_origin is Union and len(field_args) == 2 and NoneType in field_args:
103
114
  field_type = next(field_arg for field_arg in field_args if field_arg is not None)
104
115
  return get_type_from_annotation(field_type=field_type)
116
+ if field_origin is Annotated:
117
+ field_origin = field_args[0]
118
+ field_metadata = field_args[1]
119
+ if isinstance(field_metadata, click.ParamType):
120
+ return field_metadata
105
121
  if field_origin is Secret and len(field_args) == 1:
106
122
  field_type = next(field_arg for field_arg in field_args if field_arg is not None)
107
123
  return get_type_from_annotation(field_type=field_type)
@@ -42,6 +42,7 @@ class FileData(DataClassJsonMixin):
42
42
  metadata: FileDataSourceMetadata = field(default_factory=lambda: FileDataSourceMetadata())
43
43
  additional_metadata: dict[str, Any] = field(default_factory=dict)
44
44
  reprocess: bool = False
45
+ local_download_path: Optional[str] = None
45
46
 
46
47
  @classmethod
47
48
  def from_file(cls, path: str) -> "FileData":
@@ -68,10 +68,9 @@ class DownloadStep(PipelineStep):
68
68
  def update_file_data(
69
69
  self, file_data: FileData, file_data_path: Path, download_path: Path
70
70
  ) -> None:
71
+ file_data.local_download_path = str(download_path.resolve())
71
72
  file_size_bytes = download_path.stat().st_size
72
- changed = False
73
73
  if not file_data.metadata.filesize_bytes and file_size_bytes:
74
- changed = True
75
74
  file_data.metadata.filesize_bytes = file_size_bytes
76
75
  if (
77
76
  file_data.metadata.filesize_bytes
@@ -82,12 +81,10 @@ class DownloadStep(PipelineStep):
82
81
  f"({file_data.metadata.filesize_bytes}) doesn't "
83
82
  f"match size of local file: {file_size_bytes}, updating"
84
83
  )
85
- changed = True
86
84
  file_data.metadata.filesize_bytes = file_size_bytes
87
- if changed:
88
- logger.debug(f"Updating file data with new content: {file_data.to_dict()}")
89
- with file_data_path.open("w") as file:
90
- json.dump(file_data.to_dict(), file, indent=2)
85
+ logger.debug(f"Updating file data with new content: {file_data.to_dict()}")
86
+ with file_data_path.open("w") as file:
87
+ json.dump(file_data.to_dict(), file, indent=2)
91
88
 
92
89
  async def _run_async(self, fn: Callable, file_data_path: str) -> list[DownloadStepResponse]:
93
90
  file_data = FileData.from_file(path=file_data_path)
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ from dataclasses import dataclass
2
3
  from pathlib import Path
3
4
  from typing import Callable, TypedDict
4
5
 
@@ -15,6 +16,7 @@ class UncompressStepResponse(TypedDict):
15
16
  path: str
16
17
 
17
18
 
19
+ @dataclass
18
20
  class UncompressStep(PipelineStep):
19
21
  process: Uncompressor
20
22
  identifier: str = STEP_ID
@@ -23,21 +25,6 @@ class UncompressStep(PipelineStep):
23
25
  config = self.process.config.json() if self.process.config else None
24
26
  logger.info(f"Created {self.identifier} with configs: {config}")
25
27
 
26
- def _run(self, path: str, file_data_path: str) -> list[UncompressStepResponse]:
27
- file_data = FileData.from_file(path=file_data_path)
28
- new_file_data = self.process.run(file_data=file_data)
29
- responses = []
30
- for new_file in new_file_data:
31
- new_file_data_path = Path(file_data_path).parent / f"{new_file.identifier}.json"
32
- new_file.to_file(path=str(new_file_data_path.resolve()))
33
- responses.append(
34
- UncompressStepResponse(
35
- path=new_file.source_identifiers.fullpath,
36
- file_data_path=str(new_file_data_path),
37
- )
38
- )
39
- return responses
40
-
41
28
  async def _run_async(
42
29
  self, fn: Callable, path: str, file_data_path: str
43
30
  ) -> list[UncompressStepResponse]:
@@ -56,7 +43,7 @@ class UncompressStep(PipelineStep):
56
43
  new_file.to_file(path=str(new_file_data_path.resolve()))
57
44
  responses.append(
58
45
  UncompressStepResponse(
59
- path=new_file.source_identifiers.fullpath,
46
+ path=new_file.local_download_path,
60
47
  file_data_path=str(new_file_data_path),
61
48
  )
62
49
  )
@@ -3,10 +3,11 @@ import uuid
3
3
  from dataclasses import dataclass, field
4
4
  from datetime import date, datetime
5
5
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, Optional
6
+ from typing import TYPE_CHECKING, Annotated, Any, Optional
7
7
 
8
8
  from dateutil import parser
9
9
  from pydantic import Field, Secret
10
+ from pydantic.functional_validators import BeforeValidator
10
11
 
11
12
  from unstructured_ingest.error import DestinationConnectionError
12
13
  from unstructured_ingest.utils.data_prep import batch_generator, flatten_dict
@@ -21,9 +22,9 @@ from unstructured_ingest.v2.interfaces import (
21
22
  UploadStagerConfig,
22
23
  )
23
24
  from unstructured_ingest.v2.logger import logger
24
- from unstructured_ingest.v2.processes.connector_registry import (
25
- DestinationRegistryEntry,
26
- )
25
+ from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
26
+
27
+ from .utils import conform_string_to_dict
27
28
 
28
29
  if TYPE_CHECKING:
29
30
  from chromadb import Client
@@ -32,10 +33,10 @@ CONNECTOR_TYPE = "chroma"
32
33
 
33
34
 
34
35
  class ChromaAccessConfig(AccessConfig):
35
- settings: Optional[dict[str, str]] = Field(
36
+ settings: Optional[Annotated[dict, BeforeValidator(conform_string_to_dict)]] = Field(
36
37
  default=None, description="A dictionary of settings to communicate with the chroma server."
37
38
  )
38
- headers: Optional[dict[str, str]] = Field(
39
+ headers: Optional[Annotated[dict, BeforeValidator(conform_string_to_dict)]] = Field(
39
40
  default=None, description="A dictionary of headers to send to the Chroma server."
40
41
  )
41
42
 
@@ -2,10 +2,11 @@ import io
2
2
  import json
3
3
  from dataclasses import dataclass, field
4
4
  from pathlib import Path
5
- from typing import TYPE_CHECKING, Any, Generator, Optional
5
+ from typing import TYPE_CHECKING, Annotated, Any, Generator, Optional
6
6
 
7
7
  from dateutil import parser
8
8
  from pydantic import Field, Secret
9
+ from pydantic.functional_validators import BeforeValidator
9
10
 
10
11
  from unstructured_ingest.error import (
11
12
  SourceConnectionError,
@@ -26,9 +27,9 @@ from unstructured_ingest.v2.interfaces import (
26
27
  download_responses,
27
28
  )
28
29
  from unstructured_ingest.v2.logger import logger
29
- from unstructured_ingest.v2.processes.connector_registry import (
30
- SourceRegistryEntry,
31
- )
30
+ from unstructured_ingest.v2.processes.connector_registry import SourceRegistryEntry
31
+
32
+ from .utils import conform_string_to_dict
32
33
 
33
34
  CONNECTOR_TYPE = "google_drive"
34
35
 
@@ -38,7 +39,7 @@ if TYPE_CHECKING:
38
39
 
39
40
 
40
41
  class GoogleDriveAccessConfig(AccessConfig):
41
- service_account_key: Optional[dict] = Field(
42
+ service_account_key: Optional[Annotated[dict, BeforeValidator(conform_string_to_dict)]] = Field(
42
43
  default=None, description="Credentials values to use for authentication"
43
44
  )
44
45
  service_account_key_path: Optional[Path] = Field(
@@ -67,7 +67,15 @@ class MilvusConnectionConfig(ConnectionConfig):
67
67
 
68
68
 
69
69
  class MilvusUploadStagerConfig(UploadStagerConfig):
70
- pass
70
+
71
+ fields_to_include: Optional[list[str]] = None
72
+ """If set - list of fields to include in the output.
73
+ Unspecified fields are removed from the elements.
74
+ This action takse place after metadata flattening.
75
+ Missing fields will cause stager to throw KeyError."""
76
+
77
+ flatten_metadata: bool = True
78
+ """If set - flatten "metadata" key and put contents directly into data"""
71
79
 
72
80
 
73
81
  @dataclass
@@ -85,8 +93,26 @@ class MilvusUploadStager(UploadStager):
85
93
  pass
86
94
  return parser.parse(date_string).timestamp()
87
95
 
88
- @classmethod
89
- def conform_dict(cls, data: dict) -> None:
96
+ def conform_dict(self, data: dict) -> None:
97
+ if self.upload_stager_config.flatten_metadata and (metadata := data.pop("metadata", None)):
98
+ data.update(flatten_dict(metadata, keys_to_omit=["data_source_record_locator"]))
99
+
100
+ # TODO: milvus sdk doesn't seem to support defaults via the schema yet,
101
+ # remove once that gets updated
102
+ defaults = {"is_continuation": False}
103
+ for default in defaults:
104
+ if default not in data:
105
+ data[default] = defaults[default]
106
+
107
+ if self.upload_stager_config.fields_to_include:
108
+ data_keys = set(data.keys())
109
+ for data_key in data_keys:
110
+ if data_key not in self.upload_stager_config.fields_to_include:
111
+ data.pop(data_key)
112
+ for field_include_key in self.upload_stager_config.fields_to_include:
113
+ if field_include_key not in data:
114
+ raise KeyError(f"Field '{field_include_key}' is missing in data!")
115
+
90
116
  datetime_columns = [
91
117
  "data_source_date_created",
92
118
  "data_source_date_modified",
@@ -96,21 +122,12 @@ class MilvusUploadStager(UploadStager):
96
122
 
97
123
  json_dumps_fields = ["languages", "data_source_permissions_data"]
98
124
 
99
- # TODO: milvus sdk doesn't seem to support defaults via the schema yet,
100
- # remove once that gets updated
101
- defaults = {"is_continuation": False}
102
-
103
- if metadata := data.pop("metadata", None):
104
- data.update(flatten_dict(metadata, keys_to_omit=["data_source_record_locator"]))
105
125
  for datetime_column in datetime_columns:
106
126
  if datetime_column in data:
107
- data[datetime_column] = cls.parse_date_string(data[datetime_column])
127
+ data[datetime_column] = self.parse_date_string(data[datetime_column])
108
128
  for json_dumps_field in json_dumps_fields:
109
129
  if json_dumps_field in data:
110
130
  data[json_dumps_field] = json.dumps(data[json_dumps_field])
111
- for default in defaults:
112
- if default not in data:
113
- data[default] = defaults[default]
114
131
 
115
132
  def run(
116
133
  self,
@@ -39,22 +39,6 @@ heavily on the Elasticsearch connector code, inheriting the functionality as muc
39
39
 
40
40
  class OpenSearchAccessConfig(AccessConfig):
41
41
  password: Optional[str] = Field(default=None, description="password when using basic auth")
42
- use_ssl: bool = Field(default=False, description="use ssl for the connection")
43
- verify_certs: bool = Field(default=False, description="whether to verify SSL certificates")
44
- ssl_show_warn: bool = Field(
45
- default=False, description="show warning when verify certs is disabled"
46
- )
47
- ca_certs: Optional[Path] = Field(default=None, description="path to CA bundle")
48
- client_cert: Optional[Path] = Field(
49
- default=None,
50
- description="path to the file containing the private key and the certificate,"
51
- " or cert only if using client_key",
52
- )
53
- client_key: Optional[Path] = Field(
54
- default=None,
55
- description="path to the file containing the private key"
56
- " if using separate cert and key files",
57
- )
58
42
 
59
43
 
60
44
  class OpenSearchClientInput(BaseModel):
@@ -75,6 +59,23 @@ class OpenSearchConnectionConfig(ConnectionConfig):
75
59
  examples=["http://localhost:9200"],
76
60
  )
77
61
  username: Optional[str] = Field(default=None, description="username when using basic auth")
62
+ use_ssl: bool = Field(default=False, description="use ssl for the connection")
63
+ verify_certs: bool = Field(default=False, description="whether to verify SSL certificates")
64
+ ssl_show_warn: bool = Field(
65
+ default=False, description="show warning when verify certs is disabled"
66
+ )
67
+ ca_certs: Optional[Path] = Field(default=None, description="path to CA bundle")
68
+ client_cert: Optional[Path] = Field(
69
+ default=None,
70
+ description="path to the file containing the private key and the certificate,"
71
+ " or cert only if using client_key",
72
+ )
73
+ client_key: Optional[Path] = Field(
74
+ default=None,
75
+ description="path to the file containing the private key"
76
+ " if using separate cert and key files",
77
+ )
78
+
78
79
  access_config: Secret[OpenSearchAccessConfig]
79
80
 
80
81
  def get_client_kwargs(self) -> dict:
@@ -85,18 +86,18 @@ class OpenSearchConnectionConfig(ConnectionConfig):
85
86
  client_input_kwargs = {}
86
87
  if self.hosts:
87
88
  client_input_kwargs["hosts"] = self.hosts
88
- if access_config.use_ssl:
89
- client_input_kwargs["use_ssl"] = access_config.use_ssl
90
- if access_config.verify_certs:
91
- client_input_kwargs["verify_certs"] = access_config.verify_certs
92
- if access_config.ssl_show_warn:
93
- client_input_kwargs["ssl_show_warn"] = access_config.ssl_show_warn
94
- if access_config.ca_certs:
95
- client_input_kwargs["ca_certs"] = str(access_config.ca_certs)
96
- if access_config.client_cert:
97
- client_input_kwargs["client_cert"] = str(access_config.client_cert)
98
- if access_config.client_key:
99
- client_input_kwargs["client_key"] = str(access_config.client_key)
89
+ if self.use_ssl:
90
+ client_input_kwargs["use_ssl"] = self.use_ssl
91
+ if self.verify_certs:
92
+ client_input_kwargs["verify_certs"] = self.verify_certs
93
+ if self.ssl_show_warn:
94
+ client_input_kwargs["ssl_show_warn"] = self.ssl_show_warn
95
+ if self.ca_certs:
96
+ client_input_kwargs["ca_certs"] = str(self.ca_certs)
97
+ if self.client_cert:
98
+ client_input_kwargs["client_cert"] = str(self.client_cert)
99
+ if self.client_key:
100
+ client_input_kwargs["client_key"] = str(self.client_key)
100
101
  if self.username and access_config.password:
101
102
  client_input_kwargs["http_auth"] = (self.username, access_config.password)
102
103
  client_input = OpenSearchClientInput(**client_input_kwargs)
@@ -1,7 +1,9 @@
1
+ import json
1
2
  from datetime import datetime
2
- from typing import Union
3
+ from typing import Any, Union
3
4
 
4
5
  from dateutil import parser
6
+ from pydantic import ValidationError
5
7
 
6
8
 
7
9
  def parse_datetime(date_value: Union[int, str, float, datetime]) -> datetime:
@@ -17,3 +19,11 @@ def parse_datetime(date_value: Union[int, str, float, datetime]) -> datetime:
17
19
  return datetime.fromtimestamp(timestamp)
18
20
  except ValueError:
19
21
  return parser.parse(date_value)
22
+
23
+
24
+ def conform_string_to_dict(value: Any) -> dict:
25
+ if isinstance(value, dict):
26
+ return value
27
+ if isinstance(value, str):
28
+ return json.loads(value)
29
+ raise ValidationError(f"Input could not be mapped to a valid dict: {value}")
@@ -0,0 +1,61 @@
1
+ from abc import ABC
2
+ from copy import copy
3
+ from dataclasses import dataclass, field
4
+ from pathlib import Path
5
+ from typing import Any
6
+ from uuid import NAMESPACE_DNS, uuid5
7
+
8
+ from pydantic import BaseModel
9
+
10
+ from unstructured_ingest.utils.compression import TAR_FILE_EXT, ZIP_FILE_EXT, uncompress_file
11
+ from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
12
+ from unstructured_ingest.v2.interfaces.process import BaseProcess
13
+ from unstructured_ingest.v2.logger import logger
14
+
15
+
16
+ class UncompressConfig(BaseModel):
17
+ pass
18
+
19
+
20
+ @dataclass
21
+ class Uncompressor(BaseProcess, ABC):
22
+ config: UncompressConfig = field(default_factory=UncompressConfig)
23
+
24
+ def is_async(self) -> bool:
25
+ return True
26
+
27
+ def run(self, file_data: FileData, **kwargs: Any) -> list[FileData]:
28
+ local_filepath = Path(file_data.local_download_path)
29
+ if local_filepath.suffix not in TAR_FILE_EXT + ZIP_FILE_EXT:
30
+ return [file_data]
31
+ new_path = uncompress_file(filename=str(local_filepath))
32
+ new_files = [i for i in Path(new_path).rglob("*") if i.is_file()]
33
+ responses = []
34
+ logger.debug(
35
+ "uncompressed {} files from original file {}: {}".format(
36
+ len(new_files), local_filepath, ", ".join([str(f) for f in new_files])
37
+ )
38
+ )
39
+ for f in new_files:
40
+ new_file_data = copy(file_data)
41
+ new_file_data.identifier = str(uuid5(NAMESPACE_DNS, str(f)))
42
+ new_file_data.local_download_path = str(f.resolve())
43
+ new_rel_download_path = str(f).replace(str(Path(local_filepath.parent)), "")[1:]
44
+ new_file_data.source_identifiers = SourceIdentifiers(
45
+ filename=f.name,
46
+ fullpath=file_data.source_identifiers.fullpath.replace(
47
+ file_data.source_identifiers.filename, new_rel_download_path
48
+ ),
49
+ rel_path=(
50
+ file_data.source_identifiers.rel_path.replace(
51
+ file_data.source_identifiers.filename, new_rel_download_path
52
+ )
53
+ if file_data.source_identifiers.rel_path
54
+ else None
55
+ ),
56
+ )
57
+ responses.append(new_file_data)
58
+ return responses
59
+
60
+ async def run_async(self, file_data: FileData, **kwargs: Any) -> list[FileData]:
61
+ return self.run(file_data=file_data, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -1 +0,0 @@
1
- __version__ = "0.0.7" # pragma: no cover
@@ -1,43 +0,0 @@
1
- from abc import ABC
2
- from copy import copy
3
- from dataclasses import dataclass, field
4
- from pathlib import Path
5
- from typing import Any
6
-
7
- from pydantic import BaseModel
8
-
9
- from unstructured_ingest.utils.compression import TAR_FILE_EXT, ZIP_FILE_EXT, uncompress_file
10
- from unstructured_ingest.v2.interfaces import FileData
11
- from unstructured_ingest.v2.interfaces.process import BaseProcess
12
-
13
-
14
- class UncompressConfig(BaseModel):
15
- pass
16
-
17
-
18
- @dataclass
19
- class Uncompressor(BaseProcess, ABC):
20
- config: UncompressConfig = field(default_factory=UncompressConfig)
21
-
22
- def is_async(self) -> bool:
23
- return True
24
-
25
- def run(self, file_data: FileData, **kwargs: Any) -> list[FileData]:
26
- local_filepath = Path(file_data.source_identifiers.fullpath)
27
- if local_filepath.suffix not in TAR_FILE_EXT + ZIP_FILE_EXT:
28
- return [file_data]
29
- new_path = uncompress_file(filename=str(local_filepath))
30
- new_files = [i for i in Path(new_path).rglob("*") if i.is_file()]
31
- responses = []
32
- for f in new_files:
33
- new_file_data = copy(file_data)
34
- new_file_data.source_identifiers.fullpath = str(f)
35
- if new_file_data.source_identifiers.rel_path:
36
- new_file_data.source_identifiers.rel_path = str(f).replace(
37
- str(local_filepath.parent), ""
38
- )[1:]
39
- responses.append(new_file_data)
40
- return responses
41
-
42
- async def run_async(self, file_data: FileData, **kwargs: Any) -> list[FileData]:
43
- return self.run(file_data=file_data, **kwargs)