unstructured-ingest 0.3.9__tar.gz → 0.3.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (493) hide show
  1. {unstructured_ingest-0.3.9/unstructured_ingest.egg-info → unstructured_ingest-0.3.10}/PKG-INFO +18 -17
  2. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/sql/test_postgres.py +3 -3
  3. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/sql/test_singlestore.py +3 -3
  4. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/sql/test_sqlite.py +3 -3
  5. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_astradb.py +40 -0
  6. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_kafka.py +2 -2
  7. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_mongodb.py +4 -1
  8. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/utils/validation/source.py +31 -11
  9. unstructured_ingest-0.3.10/unstructured_ingest/__version__.py +1 -0
  10. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/interfaces/__init__.py +3 -1
  11. unstructured_ingest-0.3.10/unstructured_ingest/v2/interfaces/file_data.py +106 -0
  12. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/steps/chunk.py +2 -1
  13. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/steps/download.py +5 -4
  14. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/steps/embed.py +2 -1
  15. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/steps/filter.py +2 -2
  16. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/steps/index.py +4 -4
  17. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/steps/partition.py +3 -2
  18. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/steps/stage.py +2 -2
  19. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/steps/uncompress.py +2 -2
  20. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/steps/upload.py +3 -3
  21. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/__init__.py +3 -0
  22. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/astradb.py +35 -33
  23. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/couchbase.py +50 -41
  24. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +41 -45
  25. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/fsspec/azure.py +12 -35
  26. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/fsspec/box.py +12 -35
  27. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +15 -42
  28. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +33 -29
  29. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +12 -34
  30. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/fsspec/s3.py +13 -37
  31. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +19 -33
  32. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/mongodb.py +95 -100
  33. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/neo4j.py +5 -3
  34. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/onedrive.py +1 -1
  35. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/sql/postgres.py +5 -5
  36. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/sql/singlestore.py +5 -5
  37. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/sql/snowflake.py +5 -5
  38. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/sql/sql.py +31 -26
  39. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/sql/sqlite.py +5 -5
  40. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10/unstructured_ingest.egg-info}/PKG-INFO +18 -17
  41. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest.egg-info/requires.txt +17 -16
  42. unstructured_ingest-0.3.9/unstructured_ingest/__version__.py +0 -1
  43. unstructured_ingest-0.3.9/unstructured_ingest/v2/interfaces/file_data.py +0 -62
  44. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/LICENSE.md +0 -0
  45. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/README.md +0 -0
  46. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/pyproject.toml +0 -0
  47. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/setup.cfg +0 -0
  48. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/setup.py +0 -0
  49. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/__init__.py +0 -0
  50. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/__init__.py +0 -0
  51. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/chunkers/__init__.py +0 -0
  52. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/chunkers/test_chunkers.py +0 -0
  53. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/__init__.py +0 -0
  54. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/conftest.py +0 -0
  55. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/databricks_tests/__init__.py +0 -0
  56. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/databricks_tests/test_volumes_native.py +0 -0
  57. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/duckdb/__init__.py +0 -0
  58. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/duckdb/conftest.py +0 -0
  59. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/duckdb/test_duckdb.py +0 -0
  60. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/duckdb/test_motherduck.py +0 -0
  61. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/elasticsearch/__init__.py +0 -0
  62. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/elasticsearch/conftest.py +0 -0
  63. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/elasticsearch/test_elasticsearch.py +0 -0
  64. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/elasticsearch/test_opensearch.py +0 -0
  65. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/sql/__init__.py +0 -0
  66. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/sql/test_snowflake.py +0 -0
  67. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_azure_ai_search.py +0 -0
  68. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_chroma.py +0 -0
  69. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_confluence.py +0 -0
  70. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_delta_table.py +0 -0
  71. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_lancedb.py +0 -0
  72. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_milvus.py +0 -0
  73. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_neo4j.py +0 -0
  74. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_onedrive.py +0 -0
  75. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_pinecone.py +0 -0
  76. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_qdrant.py +0 -0
  77. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/test_s3.py +0 -0
  78. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/utils/__init__.py +0 -0
  79. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/utils/constants.py +0 -0
  80. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/utils/docker.py +0 -0
  81. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/utils/docker_compose.py +0 -0
  82. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/utils/validation/__init__.py +0 -0
  83. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/utils/validation/destination.py +0 -0
  84. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/utils/validation/equality.py +0 -0
  85. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/utils/validation/utils.py +0 -0
  86. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/weaviate/__init__.py +0 -0
  87. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/weaviate/conftest.py +0 -0
  88. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/weaviate/test_cloud.py +0 -0
  89. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/connectors/weaviate/test_local.py +0 -0
  90. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/embedders/__init__.py +0 -0
  91. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/embedders/conftest.py +0 -0
  92. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/embedders/test_azure_openai.py +0 -0
  93. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/embedders/test_bedrock.py +0 -0
  94. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/embedders/test_huggingface.py +0 -0
  95. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/embedders/test_mixedbread.py +0 -0
  96. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/embedders/test_octoai.py +0 -0
  97. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/embedders/test_openai.py +0 -0
  98. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/embedders/test_togetherai.py +0 -0
  99. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/embedders/test_vertexai.py +0 -0
  100. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/embedders/test_voyageai.py +0 -0
  101. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/embedders/utils.py +0 -0
  102. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/partitioners/__init__.py +0 -0
  103. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/partitioners/test_partitioner.py +0 -0
  104. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/integration/utils.py +0 -0
  105. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/__init__.py +0 -0
  106. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/embed/__init__.py +0 -0
  107. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/embed/test_mixedbreadai.py +0 -0
  108. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/embed/test_octoai.py +0 -0
  109. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/embed/test_openai.py +0 -0
  110. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/embed/test_vertexai.py +0 -0
  111. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/embed/test_voyageai.py +0 -0
  112. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/test_chunking_utils.py +0 -0
  113. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/test_error.py +0 -0
  114. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/test_interfaces.py +0 -0
  115. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/test_logger.py +0 -0
  116. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/test_utils.py +0 -0
  117. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/__init__.py +0 -0
  118. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/chunkers/__init__.py +0 -0
  119. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/chunkers/test_chunkers.py +0 -0
  120. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/connectors/__init__.py +0 -0
  121. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/connectors/test_confluence.py +0 -0
  122. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/embedders/__init__.py +0 -0
  123. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/embedders/test_bedrock.py +0 -0
  124. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/embedders/test_huggingface.py +0 -0
  125. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/embedders/test_mixedbread.py +0 -0
  126. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/embedders/test_octoai.py +0 -0
  127. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/embedders/test_openai.py +0 -0
  128. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/embedders/test_togetherai.py +0 -0
  129. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/embedders/test_vertexai.py +0 -0
  130. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/embedders/test_voyageai.py +0 -0
  131. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/partitioners/__init__.py +0 -0
  132. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/partitioners/test_partitioner.py +0 -0
  133. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/test_interfaces.py +0 -0
  134. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/test_utils.py +0 -0
  135. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/utils/__init__.py +0 -0
  136. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/test/unit/v2/utils/data_generator.py +0 -0
  137. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/__init__.py +0 -0
  138. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/__init__.py +0 -0
  139. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/base/__init__.py +0 -0
  140. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/base/cmd.py +0 -0
  141. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/base/dest.py +0 -0
  142. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/base/src.py +0 -0
  143. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cli.py +0 -0
  144. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmd_factory.py +0 -0
  145. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/__init__.py +0 -0
  146. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/airtable.py +0 -0
  147. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/astradb.py +0 -0
  148. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/azure_ai_search.py +0 -0
  149. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/biomed.py +0 -0
  150. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/chroma.py +0 -0
  151. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/clarifai.py +0 -0
  152. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/confluence.py +0 -0
  153. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/databricks_volumes.py +0 -0
  154. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/delta_table.py +0 -0
  155. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/discord.py +0 -0
  156. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/elasticsearch.py +0 -0
  157. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
  158. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/fsspec/azure.py +0 -0
  159. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/fsspec/box.py +0 -0
  160. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -0
  161. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -0
  162. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -0
  163. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/fsspec/s3.py +0 -0
  164. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -0
  165. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/github.py +0 -0
  166. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/gitlab.py +0 -0
  167. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/google_drive.py +0 -0
  168. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/hubspot.py +0 -0
  169. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/jira.py +0 -0
  170. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/kafka.py +0 -0
  171. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/local.py +0 -0
  172. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/mongodb.py +0 -0
  173. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/notion.py +0 -0
  174. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/onedrive.py +0 -0
  175. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/opensearch.py +0 -0
  176. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/outlook.py +0 -0
  177. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/pinecone.py +0 -0
  178. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/qdrant.py +0 -0
  179. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/reddit.py +0 -0
  180. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/salesforce.py +0 -0
  181. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/sharepoint.py +0 -0
  182. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/slack.py +0 -0
  183. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/sql.py +0 -0
  184. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/vectara.py +0 -0
  185. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/weaviate.py +0 -0
  186. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/cmds/wikipedia.py +0 -0
  187. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/common.py +0 -0
  188. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/interfaces.py +0 -0
  189. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/cli/utils.py +0 -0
  190. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/__init__.py +0 -0
  191. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/airtable.py +0 -0
  192. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/astradb.py +0 -0
  193. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/azure_ai_search.py +0 -0
  194. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/biomed.py +0 -0
  195. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/chroma.py +0 -0
  196. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/clarifai.py +0 -0
  197. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/confluence.py +0 -0
  198. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/databricks_volumes.py +0 -0
  199. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/delta_table.py +0 -0
  200. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/discord.py +0 -0
  201. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/elasticsearch.py +0 -0
  202. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/fsspec/__init__.py +0 -0
  203. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/fsspec/azure.py +0 -0
  204. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/fsspec/box.py +0 -0
  205. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/fsspec/dropbox.py +0 -0
  206. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/fsspec/fsspec.py +0 -0
  207. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/fsspec/gcs.py +0 -0
  208. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/fsspec/s3.py +0 -0
  209. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/fsspec/sftp.py +0 -0
  210. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/git.py +0 -0
  211. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/github.py +0 -0
  212. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/gitlab.py +0 -0
  213. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/google_drive.py +0 -0
  214. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/hubspot.py +0 -0
  215. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/jira.py +0 -0
  216. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/kafka.py +0 -0
  217. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/local.py +0 -0
  218. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/mongodb.py +0 -0
  219. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/__init__.py +0 -0
  220. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/client.py +0 -0
  221. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/connector.py +0 -0
  222. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/helpers.py +0 -0
  223. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/interfaces.py +0 -0
  224. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/__init__.py +0 -0
  225. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/block.py +0 -0
  226. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/__init__.py +0 -0
  227. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -0
  228. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +0 -0
  229. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +0 -0
  230. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/callout.py +0 -0
  231. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -0
  232. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/child_page.py +0 -0
  233. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/code.py +0 -0
  234. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -0
  235. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/divider.py +0 -0
  236. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/embed.py +0 -0
  237. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/equation.py +0 -0
  238. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/file.py +0 -0
  239. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/heading.py +0 -0
  240. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/image.py +0 -0
  241. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -0
  242. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/link_to_page.py +0 -0
  243. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -0
  244. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/paragraph.py +0 -0
  245. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/pdf.py +0 -0
  246. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/quote.py +0 -0
  247. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -0
  248. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/table.py +0 -0
  249. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -0
  250. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/template.py +0 -0
  251. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/todo.py +0 -0
  252. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/toggle.py +0 -0
  253. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -0
  254. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/blocks/video.py +0 -0
  255. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database.py +0 -0
  256. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -0
  257. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -0
  258. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/created_by.py +0 -0
  259. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/created_time.py +0 -0
  260. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/date.py +0 -0
  261. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/email.py +0 -0
  262. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/files.py +0 -0
  263. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -0
  264. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +0 -0
  265. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -0
  266. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -0
  267. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/number.py +0 -0
  268. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/people.py +0 -0
  269. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -0
  270. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -0
  271. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/rich_text.py +0 -0
  272. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/rollup.py +0 -0
  273. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/select.py +0 -0
  274. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/status.py +0 -0
  275. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/title.py +0 -0
  276. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -0
  277. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/url.py +0 -0
  278. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/database_properties/verification.py +0 -0
  279. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/date.py +0 -0
  280. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/file.py +0 -0
  281. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/page.py +0 -0
  282. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/parent.py +0 -0
  283. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/rich_text.py +0 -0
  284. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/notion/types/user.py +0 -0
  285. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/onedrive.py +0 -0
  286. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/opensearch.py +0 -0
  287. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/outlook.py +0 -0
  288. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/pinecone.py +0 -0
  289. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/qdrant.py +0 -0
  290. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/reddit.py +0 -0
  291. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/registry.py +0 -0
  292. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/salesforce.py +0 -0
  293. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/sharepoint.py +0 -0
  294. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/slack.py +0 -0
  295. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/sql.py +0 -0
  296. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/vectara.py +0 -0
  297. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/weaviate.py +0 -0
  298. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/connector/wikipedia.py +0 -0
  299. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/embed/__init__.py +0 -0
  300. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/embed/azure_openai.py +0 -0
  301. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/embed/bedrock.py +0 -0
  302. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/embed/huggingface.py +0 -0
  303. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/embed/interfaces.py +0 -0
  304. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/embed/mixedbreadai.py +0 -0
  305. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/embed/octoai.py +0 -0
  306. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/embed/openai.py +0 -0
  307. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/embed/togetherai.py +0 -0
  308. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/embed/vertexai.py +0 -0
  309. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/embed/voyageai.py +0 -0
  310. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/enhanced_dataclass/__init__.py +0 -0
  311. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/enhanced_dataclass/core.py +0 -0
  312. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -0
  313. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -0
  314. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/error.py +0 -0
  315. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/ingest_backoff/__init__.py +0 -0
  316. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/ingest_backoff/_common.py +0 -0
  317. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/ingest_backoff/_wrapper.py +0 -0
  318. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/interfaces.py +0 -0
  319. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/logger.py +0 -0
  320. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/main.py +0 -0
  321. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/pipeline/__init__.py +0 -0
  322. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/pipeline/copy.py +0 -0
  323. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/pipeline/doc_factory.py +0 -0
  324. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/pipeline/interfaces.py +0 -0
  325. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/pipeline/partition.py +0 -0
  326. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/pipeline/permissions.py +0 -0
  327. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/pipeline/pipeline.py +0 -0
  328. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/pipeline/reformat/__init__.py +0 -0
  329. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/pipeline/reformat/chunking.py +0 -0
  330. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/pipeline/reformat/embedding.py +0 -0
  331. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/pipeline/source.py +0 -0
  332. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/pipeline/utils.py +0 -0
  333. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/pipeline/write.py +0 -0
  334. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/processor.py +0 -0
  335. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/__init__.py +0 -0
  336. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/airtable.py +0 -0
  337. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/astradb.py +0 -0
  338. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/base_runner.py +0 -0
  339. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/biomed.py +0 -0
  340. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/confluence.py +0 -0
  341. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/delta_table.py +0 -0
  342. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/discord.py +0 -0
  343. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/elasticsearch.py +0 -0
  344. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/fsspec/__init__.py +0 -0
  345. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/fsspec/azure.py +0 -0
  346. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/fsspec/box.py +0 -0
  347. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/fsspec/dropbox.py +0 -0
  348. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/fsspec/fsspec.py +0 -0
  349. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/fsspec/gcs.py +0 -0
  350. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/fsspec/s3.py +0 -0
  351. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/fsspec/sftp.py +0 -0
  352. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/github.py +0 -0
  353. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/gitlab.py +0 -0
  354. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/google_drive.py +0 -0
  355. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/hubspot.py +0 -0
  356. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/jira.py +0 -0
  357. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/kafka.py +0 -0
  358. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/local.py +0 -0
  359. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/mongodb.py +0 -0
  360. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/notion.py +0 -0
  361. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/onedrive.py +0 -0
  362. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/opensearch.py +0 -0
  363. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/outlook.py +0 -0
  364. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/reddit.py +0 -0
  365. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/salesforce.py +0 -0
  366. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/sharepoint.py +0 -0
  367. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/slack.py +0 -0
  368. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/utils.py +0 -0
  369. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/wikipedia.py +0 -0
  370. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/__init__.py +0 -0
  371. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/astradb.py +0 -0
  372. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/azure_ai_search.py +0 -0
  373. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/base_writer.py +0 -0
  374. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/chroma.py +0 -0
  375. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/clarifai.py +0 -0
  376. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/databricks_volumes.py +0 -0
  377. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/delta_table.py +0 -0
  378. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/elasticsearch.py +0 -0
  379. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
  380. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/fsspec/azure.py +0 -0
  381. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/fsspec/box.py +0 -0
  382. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -0
  383. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/fsspec/gcs.py +0 -0
  384. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/fsspec/s3.py +0 -0
  385. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/kafka.py +0 -0
  386. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/mongodb.py +0 -0
  387. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/opensearch.py +0 -0
  388. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/pinecone.py +0 -0
  389. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/qdrant.py +0 -0
  390. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/sql.py +0 -0
  391. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/vectara.py +0 -0
  392. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/runner/writers/weaviate.py +0 -0
  393. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/utils/__init__.py +0 -0
  394. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/utils/chunking.py +0 -0
  395. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/utils/compression.py +0 -0
  396. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/utils/data_prep.py +0 -0
  397. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/utils/dep_check.py +0 -0
  398. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/utils/google_filetype.py +0 -0
  399. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
  400. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/utils/table.py +0 -0
  401. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/__init__.py +0 -0
  402. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/cli/__init__.py +0 -0
  403. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/cli/base/__init__.py +0 -0
  404. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/cli/base/cmd.py +0 -0
  405. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/cli/base/dest.py +0 -0
  406. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/cli/base/importer.py +0 -0
  407. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/cli/base/src.py +0 -0
  408. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/cli/cli.py +0 -0
  409. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/cli/cmds.py +0 -0
  410. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  411. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/cli/utils/click.py +0 -0
  412. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/cli/utils/model_conversion.py +0 -0
  413. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/constants.py +0 -0
  414. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/interfaces/connector.py +0 -0
  415. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/interfaces/downloader.py +0 -0
  416. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/interfaces/indexer.py +0 -0
  417. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/interfaces/process.py +0 -0
  418. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/interfaces/processor.py +0 -0
  419. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/interfaces/upload_stager.py +0 -0
  420. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/interfaces/uploader.py +0 -0
  421. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/logger.py +0 -0
  422. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/main.py +0 -0
  423. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/otel.py +0 -0
  424. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/__init__.py +0 -0
  425. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/interfaces.py +0 -0
  426. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/otel.py +0 -0
  427. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/pipeline.py +0 -0
  428. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  429. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/__init__.py +0 -0
  430. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/chunker.py +0 -0
  431. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connector_registry.py +0 -0
  432. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/airtable.py +0 -0
  433. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/azure_ai_search.py +0 -0
  434. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/chroma.py +0 -0
  435. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/confluence.py +0 -0
  436. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/databricks/__init__.py +0 -0
  437. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/databricks/volumes.py +0 -0
  438. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +0 -0
  439. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +0 -0
  440. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +0 -0
  441. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +0 -0
  442. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/delta_table.py +0 -0
  443. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/duckdb/__init__.py +0 -0
  444. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/duckdb/base.py +0 -0
  445. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +0 -0
  446. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +0 -0
  447. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py +0 -0
  448. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py +0 -0
  449. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +0 -0
  450. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/fsspec/utils.py +0 -0
  451. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/gitlab.py +0 -0
  452. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/google_drive.py +0 -0
  453. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/kafka/__init__.py +0 -0
  454. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/kafka/cloud.py +0 -0
  455. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/kafka/kafka.py +0 -0
  456. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/kafka/local.py +0 -0
  457. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/kdbai.py +0 -0
  458. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/lancedb/__init__.py +0 -0
  459. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/lancedb/aws.py +0 -0
  460. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/lancedb/azure.py +0 -0
  461. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/lancedb/cloud.py +0 -0
  462. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/lancedb/gcp.py +0 -0
  463. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +0 -0
  464. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/lancedb/local.py +0 -0
  465. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/local.py +0 -0
  466. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/milvus.py +0 -0
  467. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/outlook.py +0 -0
  468. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/pinecone.py +0 -0
  469. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/qdrant/__init__.py +0 -0
  470. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/qdrant/cloud.py +0 -0
  471. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/qdrant/local.py +0 -0
  472. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +0 -0
  473. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/qdrant/server.py +0 -0
  474. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/salesforce.py +0 -0
  475. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/sharepoint.py +0 -0
  476. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/slack.py +0 -0
  477. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/sql/__init__.py +0 -0
  478. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/utils.py +0 -0
  479. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/weaviate/__init__.py +0 -0
  480. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/weaviate/cloud.py +0 -0
  481. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/weaviate/embedded.py +0 -0
  482. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/weaviate/local.py +0 -0
  483. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +0 -0
  484. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/embedder.py +0 -0
  485. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/filter.py +0 -0
  486. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/partitioner.py +0 -0
  487. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/processes/uncompress.py +0 -0
  488. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/unstructured_api.py +0 -0
  489. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest/v2/utils.py +0 -0
  490. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest.egg-info/SOURCES.txt +0 -0
  491. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest.egg-info/dependency_links.txt +0 -0
  492. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest.egg-info/entry_points.txt +0 -0
  493. {unstructured_ingest-0.3.9 → unstructured_ingest-0.3.10}/unstructured_ingest.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.3.9
3
+ Version: 0.3.10
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,14 +22,14 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.13
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
- Requires-Dist: tqdm
26
- Requires-Dist: click
27
- Requires-Dist: python-dateutil
28
- Requires-Dist: ndjson
29
- Requires-Dist: pydantic>=2.7
30
25
  Requires-Dist: opentelemetry-sdk
26
+ Requires-Dist: click
31
27
  Requires-Dist: pandas
28
+ Requires-Dist: tqdm
29
+ Requires-Dist: ndjson
32
30
  Requires-Dist: dataclasses_json
31
+ Requires-Dist: python-dateutil
32
+ Requires-Dist: pydantic>=2.7
33
33
  Provides-Extra: remote
34
34
  Requires-Dist: unstructured-client>=0.26.1; extra == "remote"
35
35
  Provides-Extra: csv
@@ -67,8 +67,8 @@ Requires-Dist: pyairtable; extra == "airtable"
67
67
  Provides-Extra: astradb
68
68
  Requires-Dist: astrapy; extra == "astradb"
69
69
  Provides-Extra: azure
70
- Requires-Dist: adlfs; extra == "azure"
71
70
  Requires-Dist: fsspec; extra == "azure"
71
+ Requires-Dist: adlfs; extra == "azure"
72
72
  Provides-Extra: azure-ai-search
73
73
  Requires-Dist: azure-search-documents; extra == "azure-ai-search"
74
74
  Provides-Extra: biomed
@@ -87,24 +87,24 @@ Requires-Dist: requests; extra == "confluence"
87
87
  Provides-Extra: couchbase
88
88
  Requires-Dist: couchbase; extra == "couchbase"
89
89
  Provides-Extra: delta-table
90
- Requires-Dist: boto3; extra == "delta-table"
91
90
  Requires-Dist: deltalake; extra == "delta-table"
91
+ Requires-Dist: boto3; extra == "delta-table"
92
92
  Provides-Extra: discord
93
93
  Requires-Dist: discord-py; extra == "discord"
94
94
  Provides-Extra: dropbox
95
- Requires-Dist: dropboxdrivefs; extra == "dropbox"
96
95
  Requires-Dist: fsspec; extra == "dropbox"
96
+ Requires-Dist: dropboxdrivefs; extra == "dropbox"
97
97
  Provides-Extra: duckdb
98
98
  Requires-Dist: duckdb; extra == "duckdb"
99
99
  Provides-Extra: elasticsearch
100
100
  Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
101
101
  Provides-Extra: gcs
102
- Requires-Dist: gcsfs; extra == "gcs"
103
102
  Requires-Dist: fsspec; extra == "gcs"
103
+ Requires-Dist: gcsfs; extra == "gcs"
104
104
  Requires-Dist: bs4; extra == "gcs"
105
105
  Provides-Extra: github
106
- Requires-Dist: pygithub>1.58.0; extra == "github"
107
106
  Requires-Dist: requests; extra == "github"
107
+ Requires-Dist: pygithub>1.58.0; extra == "github"
108
108
  Provides-Extra: gitlab
109
109
  Requires-Dist: python-gitlab; extra == "gitlab"
110
110
  Provides-Extra: google-drive
@@ -127,11 +127,12 @@ Requires-Dist: pymongo; extra == "mongodb"
127
127
  Provides-Extra: neo4j
128
128
  Requires-Dist: cymple; extra == "neo4j"
129
129
  Requires-Dist: neo4j; extra == "neo4j"
130
+ Requires-Dist: networkx; extra == "neo4j"
130
131
  Provides-Extra: notion
132
+ Requires-Dist: htmlBuilder; extra == "notion"
131
133
  Requires-Dist: backoff; extra == "notion"
132
134
  Requires-Dist: notion-client; extra == "notion"
133
135
  Requires-Dist: httpx; extra == "notion"
134
- Requires-Dist: htmlBuilder; extra == "notion"
135
136
  Provides-Extra: onedrive
136
137
  Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
137
138
  Requires-Dist: msal; extra == "onedrive"
@@ -150,21 +151,21 @@ Requires-Dist: qdrant-client; extra == "qdrant"
150
151
  Provides-Extra: reddit
151
152
  Requires-Dist: praw; extra == "reddit"
152
153
  Provides-Extra: s3
153
- Requires-Dist: s3fs; extra == "s3"
154
154
  Requires-Dist: fsspec; extra == "s3"
155
+ Requires-Dist: s3fs; extra == "s3"
155
156
  Provides-Extra: sharepoint
156
157
  Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
157
158
  Requires-Dist: msal; extra == "sharepoint"
158
159
  Provides-Extra: salesforce
159
160
  Requires-Dist: simple-salesforce; extra == "salesforce"
160
161
  Provides-Extra: sftp
161
- Requires-Dist: paramiko; extra == "sftp"
162
162
  Requires-Dist: fsspec; extra == "sftp"
163
+ Requires-Dist: paramiko; extra == "sftp"
163
164
  Provides-Extra: slack
164
165
  Requires-Dist: slack_sdk[optional]; extra == "slack"
165
166
  Provides-Extra: snowflake
166
- Requires-Dist: psycopg2-binary; extra == "snowflake"
167
167
  Requires-Dist: snowflake-connector-python; extra == "snowflake"
168
+ Requires-Dist: psycopg2-binary; extra == "snowflake"
168
169
  Provides-Extra: wikipedia
169
170
  Requires-Dist: wikipedia; extra == "wikipedia"
170
171
  Provides-Extra: weaviate
@@ -178,8 +179,8 @@ Requires-Dist: requests; extra == "vectara"
178
179
  Provides-Extra: embed-huggingface
179
180
  Requires-Dist: sentence-transformers; extra == "embed-huggingface"
180
181
  Provides-Extra: embed-octoai
181
- Requires-Dist: tiktoken; extra == "embed-octoai"
182
182
  Requires-Dist: openai; extra == "embed-octoai"
183
+ Requires-Dist: tiktoken; extra == "embed-octoai"
183
184
  Provides-Extra: embed-vertexai
184
185
  Requires-Dist: vertexai; extra == "embed-vertexai"
185
186
  Provides-Extra: embed-voyageai
@@ -187,8 +188,8 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
187
188
  Provides-Extra: embed-mixedbreadai
188
189
  Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
189
190
  Provides-Extra: openai
190
- Requires-Dist: tiktoken; extra == "openai"
191
191
  Requires-Dist: openai; extra == "openai"
192
+ Requires-Dist: tiktoken; extra == "openai"
192
193
  Provides-Extra: bedrock
193
194
  Requires-Dist: boto3; extra == "bedrock"
194
195
  Provides-Extra: togetherai
@@ -28,7 +28,7 @@ from unstructured_ingest.v2.processes.connectors.sql.postgres import (
28
28
  PostgresUploadStager,
29
29
  )
30
30
 
31
- SEED_DATA_ROWS = 20
31
+ SEED_DATA_ROWS = 10
32
32
 
33
33
 
34
34
  @pytest.fixture
@@ -69,7 +69,7 @@ async def test_postgres_source(temp_dir: Path, source_database_setup: str):
69
69
  )
70
70
  indexer = PostgresIndexer(
71
71
  connection_config=connection_config,
72
- index_config=PostgresIndexerConfig(table_name="cars", id_column="car_id", batch_size=5),
72
+ index_config=PostgresIndexerConfig(table_name="cars", id_column="car_id", batch_size=6),
73
73
  )
74
74
  downloader = PostgresDownloader(
75
75
  connection_config=connection_config,
@@ -81,7 +81,7 @@ async def test_postgres_source(temp_dir: Path, source_database_setup: str):
81
81
  configs=SourceValidationConfigs(
82
82
  test_id="postgres",
83
83
  expected_num_files=SEED_DATA_ROWS,
84
- expected_number_indexed_file_data=4,
84
+ expected_number_indexed_file_data=2,
85
85
  validate_downloaded_files=True,
86
86
  ),
87
87
  )
@@ -29,7 +29,7 @@ from unstructured_ingest.v2.processes.connectors.sql.singlestore import (
29
29
  SingleStoreUploadStager,
30
30
  )
31
31
 
32
- SEED_DATA_ROWS = 20
32
+ SEED_DATA_ROWS = 10
33
33
 
34
34
 
35
35
  @pytest.fixture
@@ -66,7 +66,7 @@ async def test_singlestore_source(temp_dir: Path, source_database_setup: dict):
66
66
  )
67
67
  indexer = SingleStoreIndexer(
68
68
  connection_config=connection_config,
69
- index_config=SingleStoreIndexerConfig(table_name="cars", id_column="car_id", batch_size=5),
69
+ index_config=SingleStoreIndexerConfig(table_name="cars", id_column="car_id", batch_size=6),
70
70
  )
71
71
  downloader = SingleStoreDownloader(
72
72
  connection_config=connection_config,
@@ -80,7 +80,7 @@ async def test_singlestore_source(temp_dir: Path, source_database_setup: dict):
80
80
  configs=SourceValidationConfigs(
81
81
  test_id="singlestore",
82
82
  expected_num_files=SEED_DATA_ROWS,
83
- expected_number_indexed_file_data=4,
83
+ expected_number_indexed_file_data=2,
84
84
  validate_downloaded_files=True,
85
85
  ),
86
86
  )
@@ -27,7 +27,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sqlite import (
27
27
  SQLiteUploadStager,
28
28
  )
29
29
 
30
- SEED_DATA_ROWS = 20
30
+ SEED_DATA_ROWS = 10
31
31
 
32
32
 
33
33
  @pytest.fixture
@@ -57,7 +57,7 @@ async def test_sqlite_source(source_database_setup: Path, temp_dir: Path):
57
57
  connection_config = SQLiteConnectionConfig(database_path=source_database_setup)
58
58
  indexer = SQLiteIndexer(
59
59
  connection_config=connection_config,
60
- index_config=SQLiteIndexerConfig(table_name="cars", id_column="car_id", batch_size=5),
60
+ index_config=SQLiteIndexerConfig(table_name="cars", id_column="car_id", batch_size=6),
61
61
  )
62
62
  downloader = SQLiteDownloader(
63
63
  connection_config=connection_config,
@@ -69,7 +69,7 @@ async def test_sqlite_source(source_database_setup: Path, temp_dir: Path):
69
69
  configs=SourceValidationConfigs(
70
70
  test_id="sqlite",
71
71
  expected_num_files=SEED_DATA_ROWS,
72
- expected_number_indexed_file_data=4,
72
+ expected_number_indexed_file_data=2,
73
73
  validate_downloaded_files=True,
74
74
  ),
75
75
  )
@@ -14,12 +14,18 @@ from test.integration.connectors.utils.validation.destination import (
14
14
  StagerValidationConfigs,
15
15
  stager_validation,
16
16
  )
17
+ from test.integration.connectors.utils.validation.source import (
18
+ SourceValidationConfigs,
19
+ source_connector_validation,
20
+ )
17
21
  from test.integration.utils import requires_env
18
22
  from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
19
23
  from unstructured_ingest.v2.processes.connectors.astradb import (
20
24
  CONNECTOR_TYPE,
21
25
  AstraDBAccessConfig,
22
26
  AstraDBConnectionConfig,
27
+ AstraDBDownloader,
28
+ AstraDBDownloaderConfig,
23
29
  AstraDBIndexer,
24
30
  AstraDBIndexerConfig,
25
31
  AstraDBUploader,
@@ -110,6 +116,40 @@ def collection(upload_file: Path) -> Collection:
110
116
  astra_db.drop_collection(collection)
111
117
 
112
118
 
119
+ @pytest.mark.asyncio
120
+ @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
121
+ @requires_env("ASTRA_DB_API_ENDPOINT", "ASTRA_DB_APPLICATION_TOKEN")
122
+ async def test_astra_search_source(
123
+ tmp_path: Path,
124
+ ):
125
+ env_data = get_env_data()
126
+ collection_name = "ingest_test_src"
127
+ connection_config = AstraDBConnectionConfig(
128
+ access_config=AstraDBAccessConfig(token=env_data.token, api_endpoint=env_data.api_endpoint)
129
+ )
130
+ indexer = AstraDBIndexer(
131
+ index_config=AstraDBIndexerConfig(
132
+ collection_name=collection_name,
133
+ ),
134
+ connection_config=connection_config,
135
+ )
136
+ downloader = AstraDBDownloader(
137
+ connection_config=connection_config,
138
+ download_config=AstraDBDownloaderConfig(download_dir=tmp_path),
139
+ )
140
+
141
+ await source_connector_validation(
142
+ indexer=indexer,
143
+ downloader=downloader,
144
+ configs=SourceValidationConfigs(
145
+ test_id=CONNECTOR_TYPE,
146
+ expected_num_files=5,
147
+ expected_number_indexed_file_data=1,
148
+ validate_downloaded_files=True,
149
+ ),
150
+ )
151
+
152
+
113
153
  @pytest.mark.asyncio
114
154
  @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
115
155
  @requires_env("ASTRA_DB_API_ENDPOINT", "ASTRA_DB_APPLICATION_TOKEN")
@@ -122,7 +122,7 @@ async def test_kafka_source_local(kafka_seed_topic: str):
122
122
  indexer=indexer,
123
123
  downloader=downloader,
124
124
  configs=SourceValidationConfigs(
125
- test_id="kafka", expected_num_files=5, validate_downloaded_files=True
125
+ test_id="kafka-local", expected_num_files=5, validate_downloaded_files=True
126
126
  ),
127
127
  )
128
128
 
@@ -204,7 +204,7 @@ async def test_kafka_source_cloud(kafka_seed_topic_cloud: int):
204
204
  indexer=indexer,
205
205
  downloader=downloader,
206
206
  configs=SourceValidationConfigs(
207
- test_id="kafka",
207
+ test_id="kafka-cloud",
208
208
  exclude_fields_extend=["connector_type"],
209
209
  expected_num_files=expected_messages,
210
210
  validate_downloaded_files=True,
@@ -197,7 +197,10 @@ async def test_mongodb_source(temp_dir: Path):
197
197
  indexer=indexer,
198
198
  downloader=downloader,
199
199
  configs=SourceValidationConfigs(
200
- test_id=CONNECTOR_TYPE, expected_num_files=4, validate_downloaded_files=True
200
+ test_id=CONNECTOR_TYPE,
201
+ expected_num_files=4,
202
+ validate_downloaded_files=True,
203
+ expected_number_indexed_file_data=1,
201
204
  ),
202
205
  )
203
206
 
@@ -1,14 +1,13 @@
1
1
  import json
2
2
  import os
3
3
  import shutil
4
- from dataclasses import replace
5
4
  from pathlib import Path
6
5
  from typing import Callable, Optional
7
6
 
8
7
  from deepdiff import DeepDiff
9
8
  from pydantic import Field
10
9
 
11
- from test.integration.connectors.utils.validation.utils import ValidationConfig, reset_dir
10
+ from test.integration.connectors.utils.validation.utils import ValidationConfig
12
11
  from unstructured_ingest.v2.interfaces import Downloader, FileData, Indexer
13
12
 
14
13
 
@@ -92,7 +91,7 @@ def check_contents(
92
91
  file_data_path = expected_output_dir / f"{file_data.identifier}.json"
93
92
  with file_data_path.open("r") as file:
94
93
  expected_file_data_contents = json.load(file)
95
- current_file_data_contents = file_data.to_dict()
94
+ current_file_data_contents = file_data.model_dump()
96
95
  expected_file_data_contents = configs.omit_ignored_fields(expected_file_data_contents)
97
96
  current_file_data_contents = configs.omit_ignored_fields(current_file_data_contents)
98
97
  diff = DeepDiff(expected_file_data_contents, current_file_data_contents)
@@ -160,9 +159,11 @@ def update_fixtures(
160
159
  save_filedata: bool = True,
161
160
  ):
162
161
  # Rewrite the current file data
162
+ if not output_dir.exists():
163
+ output_dir.mkdir(parents=True)
163
164
  if save_filedata:
164
165
  file_data_output_path = output_dir / "file_data"
165
- reset_dir(dir_path=file_data_output_path)
166
+ shutil.rmtree(path=file_data_output_path, ignore_errors=True)
166
167
  print(
167
168
  f"Writing {len(all_file_data)} file data to "
168
169
  f"saved fixture location {file_data_output_path}"
@@ -171,7 +172,7 @@ def update_fixtures(
171
172
  for file_data in all_file_data:
172
173
  file_data_path = file_data_output_path / f"{file_data.identifier}.json"
173
174
  with file_data_path.open(mode="w") as f:
174
- json.dump(file_data.to_dict(), f, indent=2)
175
+ json.dump(file_data.model_dump(), f, indent=2)
175
176
 
176
177
  # Record file structure of download directory
177
178
  download_files = get_files(dir_path=download_dir)
@@ -183,7 +184,7 @@ def update_fixtures(
183
184
  # If applicable, save raw downloads
184
185
  if save_downloads:
185
186
  raw_download_output_path = output_dir / "downloads"
186
- reset_dir(raw_download_output_path)
187
+ shutil.rmtree(path=raw_download_output_path, ignore_errors=True)
187
188
  print(
188
189
  f"Writing {len(download_files)} downloaded files to "
189
190
  f"saved fixture location {raw_download_output_path}"
@@ -213,7 +214,10 @@ def run_all_validations(
213
214
  if configs.validate_file_data:
214
215
  run_expected_results_validation(
215
216
  expected_output_dir=test_output_dir / "file_data",
216
- all_file_data=postdownload_file_data,
217
+ all_file_data=get_all_file_data(
218
+ all_predownload_file_data=predownload_file_data,
219
+ all_postdownload_file_data=postdownload_file_data,
220
+ ),
217
221
  configs=configs,
218
222
  )
219
223
  download_files = get_files(dir_path=download_dir)
@@ -229,6 +233,19 @@ def run_all_validations(
229
233
  )
230
234
 
231
235
 
236
+ def get_all_file_data(
237
+ all_postdownload_file_data: list[FileData], all_predownload_file_data: list[FileData]
238
+ ) -> list[FileData]:
239
+ all_file_data = all_postdownload_file_data
240
+ indexed_file_data = [
241
+ fd
242
+ for fd in all_predownload_file_data
243
+ if fd.identifier not in [f.identifier for f in all_file_data]
244
+ ]
245
+ all_file_data += indexed_file_data
246
+ return all_file_data
247
+
248
+
232
249
  async def source_connector_validation(
233
250
  indexer: Indexer,
234
251
  downloader: Downloader,
@@ -246,7 +263,7 @@ async def source_connector_validation(
246
263
  test_output_dir = configs.test_output_dir()
247
264
  for file_data in indexer.run():
248
265
  assert file_data
249
- predownload_file_data = replace(file_data)
266
+ predownload_file_data = file_data.model_copy(deep=True)
250
267
  all_predownload_file_data.append(predownload_file_data)
251
268
  if downloader.is_async():
252
269
  resp = await downloader.run_async(file_data=file_data)
@@ -254,10 +271,10 @@ async def source_connector_validation(
254
271
  resp = downloader.run(file_data=file_data)
255
272
  if isinstance(resp, list):
256
273
  for r in resp:
257
- postdownload_file_data = replace(r["file_data"])
274
+ postdownload_file_data = r["file_data"].model_copy(deep=True)
258
275
  all_postdownload_file_data.append(postdownload_file_data)
259
276
  else:
260
- postdownload_file_data = replace(resp["file_data"])
277
+ postdownload_file_data = resp["file_data"].model_copy(deep=True)
261
278
  all_postdownload_file_data.append(postdownload_file_data)
262
279
  if not overwrite_fixtures:
263
280
  print("Running validation")
@@ -273,7 +290,10 @@ async def source_connector_validation(
273
290
  update_fixtures(
274
291
  output_dir=test_output_dir,
275
292
  download_dir=download_dir,
276
- all_file_data=all_postdownload_file_data,
293
+ all_file_data=get_all_file_data(
294
+ all_predownload_file_data=all_predownload_file_data,
295
+ all_postdownload_file_data=all_postdownload_file_data,
296
+ ),
277
297
  save_downloads=configs.validate_downloaded_files,
278
298
  save_filedata=configs.validate_file_data,
279
299
  )
@@ -0,0 +1 @@
1
+ __version__ = "0.3.10" # pragma: no cover
@@ -1,6 +1,6 @@
1
1
  from .connector import AccessConfig, BaseConnector, ConnectionConfig
2
2
  from .downloader import Downloader, DownloaderConfig, DownloadResponse, download_responses
3
- from .file_data import FileData, FileDataSourceMetadata, SourceIdentifiers
3
+ from .file_data import BatchFileData, BatchItem, FileData, FileDataSourceMetadata, SourceIdentifiers
4
4
  from .indexer import Indexer, IndexerConfig
5
5
  from .process import BaseProcess
6
6
  from .processor import ProcessorConfig
@@ -27,4 +27,6 @@ __all__ = [
27
27
  "ConnectionConfig",
28
28
  "BaseConnector",
29
29
  "FileDataSourceMetadata",
30
+ "BatchFileData",
31
+ "BatchItem",
30
32
  ]
@@ -0,0 +1,106 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Any, Optional
4
+ from uuid import NAMESPACE_DNS, uuid5
5
+
6
+ from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
7
+
8
+ from unstructured_ingest.v2.logger import logger
9
+
10
+
11
+ class SourceIdentifiers(BaseModel):
12
+ filename: str
13
+ fullpath: str
14
+ rel_path: Optional[str] = None
15
+
16
+ @property
17
+ def filename_stem(self) -> str:
18
+ return Path(self.filename).stem
19
+
20
+ @property
21
+ def relative_path(self) -> str:
22
+ return self.rel_path or self.fullpath
23
+
24
+
25
+ class FileDataSourceMetadata(BaseModel):
26
+ url: Optional[str] = None
27
+ version: Optional[str] = None
28
+ record_locator: Optional[dict[str, Any]] = None
29
+ date_created: Optional[str] = None
30
+ date_modified: Optional[str] = None
31
+ date_processed: Optional[str] = None
32
+ permissions_data: Optional[list[dict[str, Any]]] = None
33
+ filesize_bytes: Optional[int] = None
34
+
35
+
36
+ class FileData(BaseModel):
37
+ identifier: str
38
+ connector_type: str
39
+ source_identifiers: Optional[SourceIdentifiers] = None
40
+ metadata: FileDataSourceMetadata = Field(default_factory=lambda: FileDataSourceMetadata())
41
+ additional_metadata: dict[str, Any] = Field(default_factory=dict)
42
+ reprocess: bool = False
43
+ local_download_path: Optional[str] = None
44
+ display_name: Optional[str] = None
45
+
46
+ @classmethod
47
+ def from_file(cls, path: str) -> "FileData":
48
+ path = Path(path).resolve()
49
+ if not path.exists() or not path.is_file():
50
+ raise ValueError(f"file path not valid: {path}")
51
+ with open(str(path.resolve()), "rb") as f:
52
+ file_data_dict = json.load(f)
53
+ file_data = cls.model_validate(file_data_dict)
54
+ return file_data
55
+
56
+ @classmethod
57
+ def cast(cls, file_data: "FileData", **kwargs) -> "FileData":
58
+ file_data_dict = file_data.model_dump()
59
+ return cls.model_validate(file_data_dict, **kwargs)
60
+
61
+ def to_file(self, path: str) -> None:
62
+ path = Path(path).resolve()
63
+ path.parent.mkdir(parents=True, exist_ok=True)
64
+ with open(str(path.resolve()), "w") as f:
65
+ json.dump(self.model_dump(), f, indent=2)
66
+
67
+
68
+ class BatchItem(BaseModel):
69
+ identifier: str
70
+ version: Optional[str] = None
71
+
72
+
73
+ class BatchFileData(FileData):
74
+ identifier: str = Field(init=False)
75
+ batch_items: list[BatchItem]
76
+
77
+ @field_validator("batch_items")
78
+ @classmethod
79
+ def check_batch_items(cls, v: list[BatchItem]) -> list[BatchItem]:
80
+ if not v:
81
+ raise ValueError("batch items cannot be empty")
82
+ all_identifiers = [item.identifier for item in v]
83
+ if len(all_identifiers) != len(set(all_identifiers)):
84
+ raise ValueError(f"duplicate identifiers: {all_identifiers}")
85
+ sorted_batch_items = sorted(v, key=lambda item: item.identifier)
86
+ return sorted_batch_items
87
+
88
+ @model_validator(mode="before")
89
+ @classmethod
90
+ def populate_identifier(cls, data: Any) -> Any:
91
+ if isinstance(data, dict) and "identifier" not in data:
92
+ batch_items = data["batch_items"]
93
+ identifier_data = json.dumps(
94
+ {item.identifier: item.version for item in batch_items}, sort_keys=True
95
+ )
96
+ data["identifier"] = str(uuid5(NAMESPACE_DNS, str(identifier_data)))
97
+ return data
98
+
99
+
100
+ def file_data_from_file(path: str) -> FileData:
101
+ try:
102
+ return BatchFileData.from_file(path=path)
103
+ except ValidationError:
104
+ logger.debug(f"{path} not valid for batch file data")
105
+
106
+ return FileData.from_file(path=path)
@@ -6,6 +6,7 @@ from pathlib import Path
6
6
  from typing import Callable, Optional, TypedDict
7
7
 
8
8
  from unstructured_ingest.v2.interfaces import FileData
9
+ from unstructured_ingest.v2.interfaces.file_data import file_data_from_file
9
10
  from unstructured_ingest.v2.logger import logger
10
11
  from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
11
12
  from unstructured_ingest.v2.processes.chunker import Chunker
@@ -51,7 +52,7 @@ class ChunkStep(PipelineStep):
51
52
  self, fn: Callable, path: str, file_data_path: str, **kwargs
52
53
  ) -> ChunkStepResponse:
53
54
  path = Path(path)
54
- file_data = FileData.from_file(path=file_data_path)
55
+ file_data = file_data_from_file(path=file_data_path)
55
56
  output_filepath = self.get_output_filepath(filename=path)
56
57
  if not self.should_chunk(filepath=output_filepath, file_data=file_data):
57
58
  logger.debug(f"skipping chunking, output already exists: {output_filepath}")
@@ -8,6 +8,7 @@ from typing import Callable, Optional, TypedDict, TypeVar
8
8
 
9
9
  from unstructured_ingest.v2.interfaces import FileData, download_responses
10
10
  from unstructured_ingest.v2.interfaces.downloader import Downloader
11
+ from unstructured_ingest.v2.interfaces.file_data import file_data_from_file
11
12
  from unstructured_ingest.v2.logger import logger
12
13
  from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
13
14
  from unstructured_ingest.v2.utils import serialize_base_model_json
@@ -87,12 +88,12 @@ class DownloadStep(PipelineStep):
87
88
  f"match size of local file: {file_size_bytes}, updating"
88
89
  )
89
90
  file_data.metadata.filesize_bytes = file_size_bytes
90
- logger.debug(f"updating file data with new content: {file_data.to_dict()}")
91
+ logger.debug(f"updating file data with new content: {file_data.model_dump()}")
91
92
  with file_data_path.open("w") as file:
92
- json.dump(file_data.to_dict(), file, indent=2)
93
+ json.dump(file_data.model_dump(), file, indent=2)
93
94
 
94
95
  async def _run_async(self, fn: Callable, file_data_path: str) -> list[DownloadStepResponse]:
95
- file_data = FileData.from_file(path=file_data_path)
96
+ file_data = file_data_from_file(path=file_data_path)
96
97
  download_path = self.process.get_download_path(file_data=file_data)
97
98
  if not self.should_download(file_data=file_data, file_data_path=file_data_path):
98
99
  logger.debug(f"skipping download, file already exists locally: {download_path}")
@@ -172,7 +173,7 @@ class DownloadStep(PipelineStep):
172
173
  filepath = (self.cache_dir / filename).resolve()
173
174
  filepath.parent.mkdir(parents=True, exist_ok=True)
174
175
  with open(str(filepath), "w") as f:
175
- json.dump(file_data.to_dict(), f, indent=2)
176
+ json.dump(file_data.model_dump(), f, indent=2)
176
177
  return str(filepath)
177
178
 
178
179
  def get_hash(self, extras: Optional[list[str]]) -> str:
@@ -6,6 +6,7 @@ from pathlib import Path
6
6
  from typing import Callable, Optional, TypedDict
7
7
 
8
8
  from unstructured_ingest.v2.interfaces import FileData
9
+ from unstructured_ingest.v2.interfaces.file_data import file_data_from_file
9
10
  from unstructured_ingest.v2.logger import logger
10
11
  from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
11
12
  from unstructured_ingest.v2.processes.embedder import Embedder
@@ -49,7 +50,7 @@ class EmbedStep(PipelineStep):
49
50
 
50
51
  async def _run_async(self, fn: Callable, path: str, file_data_path: str) -> EmbedStepResponse:
51
52
  path = Path(path)
52
- file_data = FileData.from_file(path=file_data_path)
53
+ file_data = file_data_from_file(path=file_data_path)
53
54
  output_filepath = self.get_output_filepath(filename=path)
54
55
  if not self.should_embed(filepath=output_filepath, file_data=file_data):
55
56
  logger.debug(f"skipping embedding, output already exists: {output_filepath}")
@@ -2,7 +2,7 @@ import asyncio
2
2
  from dataclasses import dataclass
3
3
  from typing import Callable, Optional
4
4
 
5
- from unstructured_ingest.v2.interfaces.file_data import FileData
5
+ from unstructured_ingest.v2.interfaces.file_data import file_data_from_file
6
6
  from unstructured_ingest.v2.logger import logger
7
7
  from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
8
8
  from unstructured_ingest.v2.processes.filter import Filterer
@@ -20,7 +20,7 @@ class FilterStep(PipelineStep):
20
20
  logger.info(f"created {self.identifier} with configs: {config}")
21
21
 
22
22
  async def _run_async(self, fn: Callable, file_data_path: str, **kwargs) -> Optional[dict]:
23
- file_data = FileData.from_file(path=file_data_path)
23
+ file_data = file_data_from_file(path=file_data_path)
24
24
  fn_kwargs = {"file_data": file_data}
25
25
  if not asyncio.iscoroutinefunction(fn):
26
26
  resp = fn(**fn_kwargs)
@@ -37,14 +37,14 @@ class IndexStep(PipelineStep):
37
37
  @instrument(span_name=STEP_ID)
38
38
  def run(self) -> Generator[str, None, None]:
39
39
  for file_data in self.process.run():
40
- logger.debug(f"generated file data: {file_data.to_dict()}")
40
+ logger.debug(f"generated file data: {file_data.model_dump()}")
41
41
  try:
42
42
  record_hash = self.get_hash(extras=[file_data.identifier])
43
43
  filename = f"{record_hash}.json"
44
44
  filepath = (self.cache_dir / filename).resolve()
45
45
  filepath.parent.mkdir(parents=True, exist_ok=True)
46
46
  with open(str(filepath), "w") as f:
47
- json.dump(file_data.to_dict(), f, indent=2)
47
+ json.dump(file_data.model_dump(), f, indent=2)
48
48
  yield str(filepath)
49
49
  except Exception as e:
50
50
  logger.error(f"failed to create index for file data: {file_data}", exc_info=True)
@@ -54,14 +54,14 @@ class IndexStep(PipelineStep):
54
54
 
55
55
  async def run_async(self) -> AsyncGenerator[str, None]:
56
56
  async for file_data in self.process.run_async():
57
- logger.debug(f"generated file data: {file_data.to_dict()}")
57
+ logger.debug(f"generated file data: {file_data.model_dump()}")
58
58
  try:
59
59
  record_hash = self.get_hash(extras=[file_data.identifier])
60
60
  filename = f"{record_hash}.json"
61
61
  filepath = (self.cache_dir / filename).resolve()
62
62
  filepath.parent.mkdir(parents=True, exist_ok=True)
63
63
  with open(str(filepath), "w") as f:
64
- json.dump(file_data.to_dict(), f, indent=2)
64
+ json.dump(file_data.model_dump(), f, indent=2)
65
65
  yield str(filepath)
66
66
  except Exception as e:
67
67
  logger.error(f"failed to create index for file data: {file_data}", exc_info=True)