unstructured-ingest 0.0.14__tar.gz → 0.0.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (361) hide show
  1. {unstructured-ingest-0.0.14/unstructured_ingest.egg-info → unstructured-ingest-0.0.15}/PKG-INFO +1 -1
  2. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/test/test_interfaces.py +5 -5
  3. unstructured-ingest-0.0.15/unstructured_ingest/__version__.py +1 -0
  4. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/interfaces.py +1 -1
  5. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/utils.py +1 -1
  6. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/astradb.py +1 -1
  7. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/biomed.py +4 -4
  8. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/chroma.py +1 -1
  9. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/databricks_volumes.py +2 -2
  10. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/fsspec/box.py +1 -1
  11. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/fsspec/fsspec.py +5 -5
  12. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/git.py +1 -1
  13. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/google_drive.py +4 -4
  14. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/hubspot.py +1 -1
  15. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/kafka.py +8 -8
  16. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/local.py +1 -1
  17. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/helpers.py +4 -4
  18. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/onedrive.py +3 -3
  19. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/outlook.py +2 -2
  20. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/pinecone.py +1 -1
  21. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/sharepoint.py +8 -8
  22. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/vectara.py +6 -6
  23. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/interfaces.py +4 -4
  24. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/logger.py +1 -1
  25. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/pipeline/copy.py +1 -1
  26. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/pipeline/interfaces.py +2 -2
  27. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/pipeline/partition.py +1 -1
  28. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/pipeline/pipeline.py +1 -1
  29. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/pipeline/reformat/chunking.py +2 -2
  30. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/pipeline/reformat/embedding.py +1 -1
  31. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/pipeline/source.py +2 -2
  32. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/utils/compression.py +3 -3
  33. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/utils/string_and_date_utils.py +2 -2
  34. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/cli/base/cmd.py +3 -3
  35. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/cli/base/dest.py +1 -1
  36. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/cli/base/src.py +1 -1
  37. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/cli/utils/click.py +1 -1
  38. unstructured-ingest-0.0.15/unstructured_ingest/v2/interfaces/processor.py +88 -0
  39. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/logger.py +1 -1
  40. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/otel.py +1 -1
  41. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/interfaces.py +9 -2
  42. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/pipeline.py +17 -6
  43. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/steps/chunk.py +3 -3
  44. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/steps/download.py +17 -2
  45. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/steps/embed.py +3 -3
  46. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/steps/filter.py +1 -1
  47. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/steps/index.py +2 -2
  48. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/steps/partition.py +3 -3
  49. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/steps/stage.py +1 -1
  50. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/steps/uncompress.py +1 -1
  51. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/__init__.py +3 -0
  52. unstructured-ingest-0.0.15/unstructured_ingest/v2/processes/connectors/airtable.py +235 -0
  53. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/elasticsearch.py +1 -1
  54. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/fsspec/box.py +1 -1
  55. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +4 -4
  56. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/google_drive.py +1 -1
  57. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/local.py +6 -5
  58. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/milvus.py +1 -1
  59. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/onedrive.py +2 -2
  60. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/opensearch.py +1 -1
  61. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/pinecone.py +2 -2
  62. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/sharepoint.py +9 -5
  63. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/filter.py +1 -1
  64. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/partitioner.py +3 -3
  65. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/utils.py +7 -0
  66. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15/unstructured_ingest.egg-info}/PKG-INFO +1 -1
  67. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest.egg-info/SOURCES.txt +1 -1
  68. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest.egg-info/requires.txt +277 -279
  69. unstructured-ingest-0.0.14/unstructured_ingest/__version__.py +0 -1
  70. unstructured-ingest-0.0.14/unstructured_ingest/evaluate.py +0 -338
  71. unstructured-ingest-0.0.14/unstructured_ingest/v2/interfaces/processor.py +0 -53
  72. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/LICENSE.md +0 -0
  73. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/README.md +0 -0
  74. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/pyproject.toml +0 -0
  75. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/setup.cfg +0 -0
  76. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/setup.py +0 -0
  77. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/test/test_chunking_utils.py +0 -0
  78. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/test/test_error.py +0 -0
  79. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/test/test_logger.py +0 -0
  80. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/test/test_utils.py +0 -0
  81. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/test/test_utils_v2.py +0 -0
  82. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/__init__.py +0 -0
  83. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/__init__.py +0 -0
  84. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/base/__init__.py +0 -0
  85. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/base/cmd.py +0 -0
  86. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/base/dest.py +0 -0
  87. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/base/src.py +0 -0
  88. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cli.py +0 -0
  89. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmd_factory.py +0 -0
  90. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/__init__.py +0 -0
  91. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/airtable.py +0 -0
  92. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/astradb.py +0 -0
  93. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/azure_cognitive_search.py +0 -0
  94. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/biomed.py +0 -0
  95. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/chroma.py +0 -0
  96. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/clarifai.py +0 -0
  97. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/confluence.py +0 -0
  98. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/databricks_volumes.py +0 -0
  99. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/delta_table.py +0 -0
  100. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/discord.py +0 -0
  101. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/elasticsearch.py +0 -0
  102. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
  103. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/fsspec/azure.py +0 -0
  104. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/fsspec/box.py +0 -0
  105. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -0
  106. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -0
  107. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -0
  108. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/fsspec/s3.py +0 -0
  109. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -0
  110. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/github.py +0 -0
  111. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/gitlab.py +0 -0
  112. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/google_drive.py +0 -0
  113. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/hubspot.py +0 -0
  114. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/jira.py +0 -0
  115. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/kafka.py +0 -0
  116. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/local.py +0 -0
  117. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/mongodb.py +0 -0
  118. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/notion.py +0 -0
  119. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/onedrive.py +0 -0
  120. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/opensearch.py +0 -0
  121. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/outlook.py +0 -0
  122. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/pinecone.py +0 -0
  123. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/qdrant.py +0 -0
  124. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/reddit.py +0 -0
  125. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/salesforce.py +0 -0
  126. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/sharepoint.py +0 -0
  127. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/slack.py +0 -0
  128. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/sql.py +0 -0
  129. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/vectara.py +0 -0
  130. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/weaviate.py +0 -0
  131. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/cmds/wikipedia.py +0 -0
  132. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/cli/common.py +0 -0
  133. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/__init__.py +0 -0
  134. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/airtable.py +0 -0
  135. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/azure_cognitive_search.py +0 -0
  136. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/clarifai.py +0 -0
  137. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/confluence.py +0 -0
  138. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/delta_table.py +0 -0
  139. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/discord.py +0 -0
  140. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/elasticsearch.py +0 -0
  141. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/fsspec/__init__.py +0 -0
  142. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/fsspec/azure.py +0 -0
  143. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/fsspec/dropbox.py +0 -0
  144. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/fsspec/gcs.py +0 -0
  145. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/fsspec/s3.py +0 -0
  146. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/fsspec/sftp.py +0 -0
  147. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/github.py +0 -0
  148. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/gitlab.py +0 -0
  149. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/jira.py +0 -0
  150. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/mongodb.py +0 -0
  151. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/__init__.py +0 -0
  152. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/client.py +0 -0
  153. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/connector.py +0 -0
  154. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/interfaces.py +0 -0
  155. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/__init__.py +0 -0
  156. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/block.py +0 -0
  157. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/__init__.py +0 -0
  158. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -0
  159. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +0 -0
  160. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +0 -0
  161. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/callout.py +0 -0
  162. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -0
  163. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/child_page.py +0 -0
  164. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/code.py +0 -0
  165. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -0
  166. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/divider.py +0 -0
  167. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/embed.py +0 -0
  168. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/equation.py +0 -0
  169. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/file.py +0 -0
  170. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/heading.py +0 -0
  171. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/image.py +0 -0
  172. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -0
  173. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/link_to_page.py +0 -0
  174. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -0
  175. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/paragraph.py +0 -0
  176. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/pdf.py +0 -0
  177. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/quote.py +0 -0
  178. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -0
  179. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/table.py +0 -0
  180. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -0
  181. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/template.py +0 -0
  182. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/todo.py +0 -0
  183. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/toggle.py +0 -0
  184. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -0
  185. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/blocks/video.py +0 -0
  186. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database.py +0 -0
  187. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -0
  188. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -0
  189. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/created_by.py +0 -0
  190. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/created_time.py +0 -0
  191. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/date.py +0 -0
  192. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/email.py +0 -0
  193. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/files.py +0 -0
  194. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -0
  195. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +0 -0
  196. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -0
  197. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -0
  198. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/number.py +0 -0
  199. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/people.py +0 -0
  200. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -0
  201. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -0
  202. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/rich_text.py +0 -0
  203. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/rollup.py +0 -0
  204. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/select.py +0 -0
  205. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/status.py +0 -0
  206. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/title.py +0 -0
  207. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -0
  208. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/url.py +0 -0
  209. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/database_properties/verification.py +0 -0
  210. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/date.py +0 -0
  211. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/file.py +0 -0
  212. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/page.py +0 -0
  213. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/parent.py +0 -0
  214. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/rich_text.py +0 -0
  215. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/notion/types/user.py +0 -0
  216. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/opensearch.py +0 -0
  217. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/qdrant.py +0 -0
  218. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/reddit.py +0 -0
  219. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/registry.py +0 -0
  220. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/salesforce.py +0 -0
  221. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/slack.py +0 -0
  222. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/sql.py +0 -0
  223. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/weaviate.py +0 -0
  224. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/connector/wikipedia.py +0 -0
  225. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/embed/__init__.py +0 -0
  226. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/embed/bedrock.py +0 -0
  227. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/embed/huggingface.py +0 -0
  228. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/embed/interfaces.py +0 -0
  229. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/embed/mixedbreadai.py +0 -0
  230. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/embed/octoai.py +0 -0
  231. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/embed/openai.py +0 -0
  232. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/embed/vertexai.py +0 -0
  233. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/embed/voyageai.py +0 -0
  234. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/enhanced_dataclass/__init__.py +0 -0
  235. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/enhanced_dataclass/core.py +0 -0
  236. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -0
  237. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -0
  238. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/error.py +0 -0
  239. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/ingest_backoff/__init__.py +0 -0
  240. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/ingest_backoff/_common.py +0 -0
  241. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/ingest_backoff/_wrapper.py +0 -0
  242. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/main.py +0 -0
  243. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/pipeline/__init__.py +0 -0
  244. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/pipeline/doc_factory.py +0 -0
  245. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/pipeline/permissions.py +0 -0
  246. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/pipeline/reformat/__init__.py +0 -0
  247. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/pipeline/utils.py +0 -0
  248. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/pipeline/write.py +0 -0
  249. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/processor.py +0 -0
  250. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/__init__.py +0 -0
  251. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/airtable.py +0 -0
  252. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/astradb.py +0 -0
  253. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/base_runner.py +0 -0
  254. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/biomed.py +0 -0
  255. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/confluence.py +0 -0
  256. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/delta_table.py +0 -0
  257. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/discord.py +0 -0
  258. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/elasticsearch.py +0 -0
  259. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/fsspec/__init__.py +0 -0
  260. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/fsspec/azure.py +0 -0
  261. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/fsspec/box.py +0 -0
  262. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/fsspec/dropbox.py +0 -0
  263. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/fsspec/fsspec.py +0 -0
  264. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/fsspec/gcs.py +0 -0
  265. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/fsspec/s3.py +0 -0
  266. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/fsspec/sftp.py +0 -0
  267. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/github.py +0 -0
  268. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/gitlab.py +0 -0
  269. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/google_drive.py +0 -0
  270. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/hubspot.py +0 -0
  271. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/jira.py +0 -0
  272. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/kafka.py +0 -0
  273. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/local.py +0 -0
  274. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/mongodb.py +0 -0
  275. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/notion.py +0 -0
  276. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/onedrive.py +0 -0
  277. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/opensearch.py +0 -0
  278. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/outlook.py +0 -0
  279. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/reddit.py +0 -0
  280. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/salesforce.py +0 -0
  281. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/sharepoint.py +0 -0
  282. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/slack.py +0 -0
  283. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/utils.py +0 -0
  284. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/wikipedia.py +0 -0
  285. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/__init__.py +0 -0
  286. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/astradb.py +0 -0
  287. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/azure_cognitive_search.py +0 -0
  288. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/base_writer.py +0 -0
  289. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/chroma.py +0 -0
  290. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/clarifai.py +0 -0
  291. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/databricks_volumes.py +0 -0
  292. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/delta_table.py +0 -0
  293. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/elasticsearch.py +0 -0
  294. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
  295. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/fsspec/azure.py +0 -0
  296. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/fsspec/box.py +0 -0
  297. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -0
  298. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/fsspec/gcs.py +0 -0
  299. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/fsspec/s3.py +0 -0
  300. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/kafka.py +0 -0
  301. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/mongodb.py +0 -0
  302. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/opensearch.py +0 -0
  303. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/pinecone.py +0 -0
  304. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/qdrant.py +0 -0
  305. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/sql.py +0 -0
  306. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/vectara.py +0 -0
  307. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/runner/writers/weaviate.py +0 -0
  308. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/utils/__init__.py +0 -0
  309. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/utils/chunking.py +0 -0
  310. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/utils/data_prep.py +0 -0
  311. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/utils/dep_check.py +0 -0
  312. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/utils/google_filetype.py +0 -0
  313. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/utils/table.py +0 -0
  314. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/__init__.py +0 -0
  315. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/cli/__init__.py +0 -0
  316. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/cli/base/__init__.py +0 -0
  317. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/cli/base/importer.py +0 -0
  318. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/cli/cli.py +0 -0
  319. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/cli/cmds.py +0 -0
  320. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  321. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/cli/utils/model_conversion.py +0 -0
  322. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/interfaces/__init__.py +0 -0
  323. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/interfaces/connector.py +0 -0
  324. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/interfaces/downloader.py +0 -0
  325. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/interfaces/file_data.py +0 -0
  326. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/interfaces/indexer.py +0 -0
  327. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/interfaces/process.py +0 -0
  328. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/interfaces/upload_stager.py +0 -0
  329. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/interfaces/uploader.py +0 -0
  330. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/main.py +0 -0
  331. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/__init__.py +0 -0
  332. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/otel.py +0 -0
  333. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  334. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/pipeline/steps/upload.py +0 -0
  335. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/__init__.py +0 -0
  336. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/chunker.py +0 -0
  337. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connector_registry.py +0 -0
  338. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/astradb.py +0 -0
  339. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +0 -0
  340. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/chroma.py +0 -0
  341. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/couchbase.py +0 -0
  342. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/databricks_volumes.py +0 -0
  343. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +0 -0
  344. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/fsspec/azure.py +0 -0
  345. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +0 -0
  346. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +0 -0
  347. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/fsspec/s3.py +0 -0
  348. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +0 -0
  349. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/fsspec/utils.py +0 -0
  350. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/kdbai.py +0 -0
  351. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/mongodb.py +0 -0
  352. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/salesforce.py +0 -0
  353. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/singlestore.py +0 -0
  354. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/sql.py +0 -0
  355. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/utils.py +0 -0
  356. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/connectors/weaviate.py +0 -0
  357. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/embedder.py +0 -0
  358. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest/v2/processes/uncompress.py +0 -0
  359. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest.egg-info/dependency_links.txt +0 -0
  360. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest.egg-info/entry_points.txt +0 -0
  361. {unstructured-ingest-0.0.14 → unstructured-ingest-0.0.15}/unstructured_ingest.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.0.14
3
+ Version: 0.0.15
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -38,7 +38,7 @@ TEST_VERSION = "1.1.1"
38
38
  TEST_RECORD_LOCATOR = {"id": "data-source-id"}
39
39
  TEST_DATE_CREATED = "2021-01-01T00:00:00"
40
40
  TEST_DATE_MODIFIED = "2021-01-02T00:00:00"
41
- TEST_DATE_PROCESSSED = "2022-12-13T15:44:08"
41
+ TEST_DATE_PROCESSED = "2022-12-13T15:44:08"
42
42
 
43
43
 
44
44
  @dataclass
@@ -101,7 +101,7 @@ def partition_test_results():
101
101
  record_locator=TEST_RECORD_LOCATOR,
102
102
  date_created=TEST_DATE_CREATED,
103
103
  date_modified=TEST_DATE_MODIFIED,
104
- date_processed=TEST_DATE_PROCESSSED,
104
+ date_processed=TEST_DATE_PROCESSED,
105
105
  ),
106
106
  )
107
107
  return result
@@ -121,7 +121,7 @@ def test_partition_file():
121
121
  read_config=ReadConfig(download_dir=TEST_DOWNLOAD_DIR),
122
122
  processor_config=ProcessorConfig(output_dir=TEST_OUTPUT_DIR),
123
123
  )
124
- test_ingest_doc._date_processed = TEST_DATE_PROCESSSED
124
+ test_ingest_doc._date_processed = TEST_DATE_PROCESSED
125
125
  elements = test_ingest_doc.partition_file(partition_config=PartitionConfig())
126
126
  element_dicts = elements_to_dicts(elements)
127
127
  assert len(element_dicts)
@@ -153,7 +153,7 @@ def test_partition_file():
153
153
  assert data_source_metadata["record_locator"] == TEST_RECORD_LOCATOR
154
154
  assert data_source_metadata["date_created"] == TEST_DATE_CREATED
155
155
  assert data_source_metadata["date_modified"] == TEST_DATE_MODIFIED
156
- assert data_source_metadata["date_processed"] == TEST_DATE_PROCESSSED
156
+ assert data_source_metadata["date_processed"] == TEST_DATE_PROCESSED
157
157
 
158
158
 
159
159
  def test_process_file_fields_include_default(mocker, partition_test_results):
@@ -183,7 +183,7 @@ def test_process_file_fields_include_default(mocker, partition_test_results):
183
183
  assert data_source_metadata["record_locator"] == TEST_RECORD_LOCATOR
184
184
  assert data_source_metadata["date_created"] == TEST_DATE_CREATED
185
185
  assert data_source_metadata["date_modified"] == TEST_DATE_MODIFIED
186
- assert data_source_metadata["date_processed"] == TEST_DATE_PROCESSSED
186
+ assert data_source_metadata["date_processed"] == TEST_DATE_PROCESSED
187
187
 
188
188
 
189
189
  def test_process_file_metadata_includes_filename_and_filetype(
@@ -0,0 +1 @@
1
+ __version__ = "0.0.15" # pragma: no cover
@@ -73,7 +73,7 @@ class FileOrJson(click.ParamType):
73
73
  return value
74
74
  self.fail(
75
75
  gettext(
76
- "{value} is not a valid json string nor an existing filepath.",
76
+ "{value} is neither a valid json string nor an existing filepath.",
77
77
  ).format(value=value),
78
78
  param,
79
79
  ctx,
@@ -30,7 +30,7 @@ def extract_config(flat_data: dict, config: t.Type[BaseConfig]) -> BaseConfig:
30
30
  To be able to extract a nested dataclass from a flat dictionary (as in one coming
31
31
  from a click-based options input), the config class is dynamically looked through for
32
32
  nested dataclass fields and new nested dictionaries are created to conform to the
33
- shape the overall class expects whn parsing from a dict. During the process, this will create
33
+ shape the overall class expects when parsing from a dict. During the process, this will create
34
34
  copies of the original dictionary to avoid pruning fields but this isn't a
35
35
  problem since the `from_dict()` method ignores unneeded values.
36
36
 
@@ -222,7 +222,7 @@ class AstraDBDestinationConnector(BaseDestinationConnector):
222
222
  raise DestinationConnectionError(f"failed to validate connection: {e}")
223
223
 
224
224
  def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
225
- logger.info(f"Inserting / updating {len(elements_dict)} documents to Astra DB.")
225
+ logger.info(f"inserting / updating {len(elements_dict)} documents to Astra DB.")
226
226
 
227
227
  astra_db_batch_size = self.write_config.batch_size
228
228
 
@@ -123,7 +123,7 @@ class BiomedIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
123
123
  and self.filename.is_file()
124
124
  and not self.read_config.download_only
125
125
  ):
126
- logger.debug(f"Cleaning up {self}")
126
+ logger.debug(f"cleaning up {self}")
127
127
  Path.unlink(self.filename)
128
128
 
129
129
  @SourceConnectionError.wrap
@@ -132,12 +132,12 @@ class BiomedIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
132
132
  download_path = self.file_meta.download_filepath # type: ignore
133
133
  dir_ = Path(os.path.dirname(download_path)) # type: ignore
134
134
  if not dir_.is_dir():
135
- logger.debug(f"Creating directory: {dir_}")
135
+ logger.debug(f"creating directory: {dir_}")
136
136
 
137
137
  if dir_:
138
138
  dir_.mkdir(parents=True, exist_ok=True)
139
139
  self._retrieve()
140
- logger.debug(f"File downloaded: {self.file_meta.download_filepath}")
140
+ logger.debug(f"file downloaded: {self.file_meta.download_filepath}")
141
141
 
142
142
  @SourceConnectionNetworkError.wrap
143
143
  def _retrieve(self):
@@ -229,7 +229,7 @@ class BiomedSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
229
229
 
230
230
  def traverse(path, download_dir, output_dir):
231
231
  full_path = Path(PMC_DIR) / path
232
- logger.debug(f"Traversing directory: {full_path}")
232
+ logger.debug(f"traversing directory: {full_path}")
233
233
 
234
234
  ftp = FTP(DOMAIN)
235
235
  ftp.login()
@@ -139,7 +139,7 @@ class ChromaDestinationConnector(BaseDestinationConnector):
139
139
  return chroma_dict
140
140
 
141
141
  def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
142
- logger.info(f"Inserting / updating {len(elements_dict)} documents to destination ")
142
+ logger.info(f"inserting / updating {len(elements_dict)} documents to destination ")
143
143
 
144
144
  chroma_batch_size = self.write_config.batch_size
145
145
 
@@ -112,10 +112,10 @@ class DatabricksVolumesDestinationConnector(BaseDestinationConnector):
112
112
  **kwargs,
113
113
  ) -> None:
114
114
  output_folder = self.write_config.path
115
- output_folder = os.path.join(output_folder) # Make sure folder ends with file seperator
115
+ output_folder = os.path.join(output_folder) # Make sure folder ends with file separator
116
116
  filename = (
117
117
  filename.strip(os.sep) if filename else filename
118
- ) # Make sure filename doesn't begin with file seperator
118
+ ) # Make sure filename doesn't begin with file separator
119
119
  output_path = str(PurePath(output_folder, filename)) if filename else output_folder
120
120
  logger.debug(f"uploading content to {output_path}")
121
121
  self.client.files.upload(
@@ -44,7 +44,7 @@ class SimpleBoxConfig(SimpleFsspecConfig):
44
44
 
45
45
  @requires_dependencies(["boxfs"], extras="box")
46
46
  def get_access_config(self) -> dict:
47
- # Return access_kwargs with oauth. The oauth object can not be stored directly in the config
47
+ # Return access_kwargs with oauth. The oauth object cannot be stored directly in the config
48
48
  # because it is not serializable.
49
49
  from boxsdk import JWTAuth
50
50
 
@@ -221,12 +221,12 @@ class FsspecSourceConnector(
221
221
  for pattern in patterns:
222
222
  if fnmatch.filter([path], pattern):
223
223
  return True
224
- logger.debug(f"The file {path!r} is discarded as it does not match any given glob.")
224
+ logger.debug(f"the file {path!r} is discarded as it does not match any given glob.")
225
225
  return False
226
226
 
227
227
  def get_ingest_docs(self):
228
228
  raw_files = self._list_files()
229
- # If glob filters provided, use to fiter on filepaths
229
+ # If glob filters provided, use to filter on filepaths
230
230
  files = [f for f in raw_files if self.does_path_match_glob(f)]
231
231
  # remove compressed files
232
232
  compressed_file_ext = TAR_FILE_EXT + ZIP_FILE_EXT
@@ -328,13 +328,13 @@ class FsspecDestinationConnector(BaseDestinationConnector):
328
328
  **self.connector_config.get_access_config(),
329
329
  )
330
330
 
331
- logger.info(f"Writing content using filesystem: {type(fs).__name__}")
331
+ logger.info(f"writing content using filesystem: {type(fs).__name__}")
332
332
 
333
333
  output_folder = self.connector_config.path_without_protocol
334
- output_folder = os.path.join(output_folder) # Make sure folder ends with file seperator
334
+ output_folder = os.path.join(output_folder) # Make sure folder ends with file separator
335
335
  filename = (
336
336
  filename.strip(os.sep) if filename else filename
337
- ) # Make sure filename doesn't begin with file seperator
337
+ ) # Make sure filename doesn't begin with file separator
338
338
  output_path = str(PurePath(output_folder, filename)) if filename else output_folder
339
339
  full_output_path = f"{self.connector_config.protocol}://{output_path}"
340
340
  logger.debug(f"uploading content to {full_output_path}")
@@ -120,5 +120,5 @@ class GitSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
120
120
  for pattern in patterns:
121
121
  if fnmatch.filter([path], pattern):
122
122
  return True
123
- logger.debug(f"The file {path!r} is discarded as it does not match any given glob.")
123
+ logger.debug(f"the file {path!r} is discarded as it does not match any given glob.")
124
124
  return False
@@ -222,7 +222,7 @@ class GoogleDriveIngestDoc(IngestDocSessionHandleMixin, IngestDocCleanupMixin, B
222
222
  dir_ = Path(self.meta["download_dir"])
223
223
  if dir_:
224
224
  if not dir_.is_dir():
225
- logger.debug(f"Creating directory: {self.meta.get('download_dir')}")
225
+ logger.debug(f"creating directory: {self.meta.get('download_dir')}")
226
226
 
227
227
  if dir_:
228
228
  dir_.mkdir(parents=True, exist_ok=True)
@@ -230,7 +230,7 @@ class GoogleDriveIngestDoc(IngestDocSessionHandleMixin, IngestDocCleanupMixin, B
230
230
  with open(self.filename, "wb") as handler:
231
231
  handler.write(file.getbuffer())
232
232
  saved = True
233
- logger.debug(f"File downloaded: {self.filename}.")
233
+ logger.debug(f"file downloaded: {self.filename}.")
234
234
  if not saved:
235
235
  logger.error(f"Error while downloading and saving file: {self.filename}.")
236
236
 
@@ -241,7 +241,7 @@ class GoogleDriveIngestDoc(IngestDocSessionHandleMixin, IngestDocCleanupMixin, B
241
241
  self._output_filename.parent.mkdir(parents=True, exist_ok=True)
242
242
  with open(self._output_filename, "w") as output_f:
243
243
  output_f.write(json.dumps(self.isd_elems_no_filename, ensure_ascii=False, indent=2))
244
- logger.info(f"Wrote {self._output_filename}")
244
+ logger.info(f"wrote {self._output_filename}")
245
245
 
246
246
 
247
247
  @dataclass
@@ -295,7 +295,7 @@ class GoogleDriveSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnecto
295
295
  guess = guess_extension(export_mime)
296
296
  ext = guess if guess else ext
297
297
 
298
- # TODO (Habeeb): Consider filtering at the query level.
298
+ # TODO(Habeeb): Consider filtering at the query level.
299
299
  if (
300
300
  self.connector_config.extension
301
301
  and self.connector_config.extension != ext
@@ -271,7 +271,7 @@ class HubSpotSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
271
271
 
272
272
  ingest_docs: t.List[HubSpotIngestDoc] = []
273
273
  for obj_name, obj_method in obj_method_resolver.items():
274
- logger.info(f"Retrieving - {obj_name}")
274
+ logger.info(f"retrieving - {obj_name}")
275
275
  results: t.List[HubSpotIngestDoc] = obj_method() # type: ignore
276
276
  ingest_docs += results # type: ignore
277
277
 
@@ -114,7 +114,7 @@ class KafkaSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
114
114
 
115
115
  def initialize(self):
116
116
  topic = self.connector_config.topic
117
- logger.info(f"Subscribing to topic: {topic}")
117
+ logger.info(f"subscribing to topic: {topic}")
118
118
  self.kafka_consumer.subscribe([topic])
119
119
 
120
120
  @property
@@ -149,7 +149,7 @@ class KafkaSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
149
149
  conf["sasl.password"] = secret
150
150
 
151
151
  consumer = Consumer(conf)
152
- logger.debug(f"Kafka Consumer connected to bootstrap: {bootstrap}")
152
+ logger.debug(f"kafka consumer connected to bootstrap: {bootstrap}")
153
153
  return consumer
154
154
 
155
155
  @SourceConnectionError.wrap
@@ -161,7 +161,7 @@ class KafkaSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
161
161
 
162
162
  collected = []
163
163
  num_messages_to_consume = self.connector_config.num_messages_to_consume
164
- logger.info(f"Config set for blocking on {num_messages_to_consume} messages")
164
+ logger.info(f"config set for blocking on {num_messages_to_consume} messages")
165
165
  # Consume specified number of messages
166
166
  while running:
167
167
  msg = consumer.poll(timeout=self.connector_config.timeout)
@@ -178,7 +178,7 @@ class KafkaSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
178
178
  else:
179
179
  collected.append(json.loads(msg.value().decode("utf8")))
180
180
  if len(collected) >= num_messages_to_consume:
181
- logger.debug(f"Found {len(collected)} messages, stopping")
181
+ logger.debug(f"found {len(collected)} messages, stopping")
182
182
  consumer.commit(asynchronous=False)
183
183
  break
184
184
 
@@ -243,7 +243,7 @@ class KafkaDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationConn
243
243
  conf["sasl.password"] = secret
244
244
 
245
245
  producer = Producer(conf)
246
- logger.debug(f"Connected to bootstrap: {bootstrap}")
246
+ logger.debug(f"connected to bootstrap: {bootstrap}")
247
247
  return producer
248
248
 
249
249
  def check_connection(self):
@@ -255,7 +255,7 @@ class KafkaDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationConn
255
255
 
256
256
  @DestinationConnectionError.wrap
257
257
  def upload_msg(self, batch) -> int:
258
- logger.debug(f"Uploading batch: {batch}")
258
+ logger.debug(f"uploading batch: {batch}")
259
259
  topic = self.connector_config.topic
260
260
  producer = self.kafka_producer
261
261
  uploaded = 0
@@ -267,7 +267,7 @@ class KafkaDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationConn
267
267
 
268
268
  @DestinationConnectionError.wrap
269
269
  def write_dict(self, *args, dict_list: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
270
- logger.info(f"Writing {len(dict_list)} documents to Kafka")
270
+ logger.info(f"writing {len(dict_list)} documents to Kafka")
271
271
  num_uploaded = 0
272
272
 
273
273
  for chunk in batch_generator(dict_list, self.write_config.batch_size):
@@ -275,7 +275,7 @@ class KafkaDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationConn
275
275
 
276
276
  producer = self.kafka_producer
277
277
  producer.flush()
278
- logger.info(f"Uploaded {num_uploaded} documents to Kafka")
278
+ logger.info(f"uploaded {num_uploaded} documents to Kafka")
279
279
 
280
280
  def write(self, docs: t.List[BaseIngestDoc]) -> None:
281
281
  content_list: t.List[t.Dict[str, t.Any]] = []
@@ -123,7 +123,7 @@ class LocalSourceConnector(BaseSourceConnector):
123
123
  for pattern in patterns:
124
124
  if fnmatch.filter([path], pattern):
125
125
  return True
126
- logger.debug(f"The file {path!r} is discarded as it does not match any given glob.")
126
+ logger.debug(f"the file {path!r} is discarded as it does not match any given glob.")
127
127
  return False
128
128
 
129
129
  def get_ingest_docs(self):
@@ -103,7 +103,7 @@ def extract_page_html(
103
103
  ):
104
104
  children.extend(children_block)
105
105
  if children:
106
- logger.debug(f"Adding {len(children)} children from parent: {parent}")
106
+ logger.debug(f"adding {len(children)} children from parent: {parent}")
107
107
  for child in children:
108
108
  if child.id not in processed_block_ids:
109
109
  parents.append((level + 1, child))
@@ -159,7 +159,7 @@ def extract_database_html(
159
159
  for page_chunk in client.databases.iterate_query(database_id=database_id): # type: ignore
160
160
  all_pages.extend(page_chunk)
161
161
 
162
- logger.debug(f"Creating {len(all_pages)} rows")
162
+ logger.debug(f"creating {len(all_pages)} rows")
163
163
  for page in all_pages:
164
164
  if is_database_url(client=client, url=page.url):
165
165
  child_databases.append(page.id)
@@ -237,7 +237,7 @@ def get_recursive_content(
237
237
  parent: QueueEntry = parents.pop()
238
238
  processed.append(str(parent.id))
239
239
  if parent.type == QueueEntryType.PAGE:
240
- logger.debug(f"Getting child data from page: {parent.id}")
240
+ logger.debug(f"getting child data from page: {parent.id}")
241
241
  page_children = []
242
242
  try:
243
243
  for children_block in client.blocks.children.iterate_list( # type: ignore
@@ -316,7 +316,7 @@ def get_recursive_content(
316
316
  )
317
317
 
318
318
  elif parent.type == QueueEntryType.DATABASE:
319
- logger.debug(f"Getting child data from database: {parent.id}")
319
+ logger.debug(f"getting child data from database: {parent.id}")
320
320
  database_pages = []
321
321
  try:
322
322
  for page_entries in client.databases.iterate_query( # type: ignore
@@ -157,17 +157,17 @@ class OneDriveIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
157
157
  self.output_dir.mkdir(parents=True, exist_ok=True)
158
158
 
159
159
  if not self.download_dir.is_dir():
160
- logger.debug(f"Creating directory: {self.download_dir}")
160
+ logger.debug(f"creating directory: {self.download_dir}")
161
161
  self.download_dir.mkdir(parents=True, exist_ok=True)
162
162
 
163
163
  if fsize > MAX_MB_SIZE:
164
- logger.info(f"Downloading file with size: {fsize} bytes in chunks")
164
+ logger.info(f"downloading file with size: {fsize} bytes in chunks")
165
165
  with self.filename.open(mode="wb") as f:
166
166
  file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query()
167
167
  else:
168
168
  with self.filename.open(mode="wb") as f:
169
169
  file.download(f).execute_query()
170
- logger.info(f"File downloaded: {self.filename}")
170
+ logger.info(f"file downloaded: {self.filename}")
171
171
  return
172
172
 
173
173
 
@@ -164,7 +164,7 @@ class OutlookIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
164
164
  self.connector_config._get_client()
165
165
  self.update_source_metadata()
166
166
  if not self.download_dir.is_dir():
167
- logger.debug(f"Creating directory: {self.download_dir}")
167
+ logger.debug(f"creating directory: {self.download_dir}")
168
168
  self.download_dir.mkdir(parents=True, exist_ok=True)
169
169
 
170
170
  with open(
@@ -182,7 +182,7 @@ class OutlookIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
182
182
  )
183
183
  logger.error(e)
184
184
  return
185
- logger.info(f"File downloaded: {self.hash_mail_name(self.message_id)}")
185
+ logger.info(f"file downloaded: {self.hash_mail_name(self.message_id)}")
186
186
  return
187
187
 
188
188
 
@@ -80,7 +80,7 @@ class PineconeDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationC
80
80
  )
81
81
 
82
82
  index = pc.Index(self.connector_config.index_name)
83
- logger.debug(f"Connected to index: {pc.describe_index(self.connector_config.index_name)}")
83
+ logger.debug(f"connected to index: {pc.describe_index(self.connector_config.index_name)}")
84
84
  return index
85
85
 
86
86
  @DestinationConnectionError.wrap
@@ -253,11 +253,11 @@ class SharepointIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
253
253
 
254
254
  self.output_dir.mkdir(parents=True, exist_ok=True)
255
255
  if not self.download_dir.is_dir():
256
- logger.debug(f"Creating directory: {self.download_dir}")
256
+ logger.debug(f"creating directory: {self.download_dir}")
257
257
  self.download_dir.mkdir(parents=True, exist_ok=True)
258
258
  with self.filename.open(mode="w") as f:
259
259
  f.write(pld)
260
- logger.info(f"File downloaded: {self.filename}")
260
+ logger.info(f"file downloaded: {self.filename}")
261
261
 
262
262
  def _download_file(self):
263
263
  file = self._fetch_file()
@@ -266,17 +266,17 @@ class SharepointIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc):
266
266
  self.output_dir.mkdir(parents=True, exist_ok=True)
267
267
 
268
268
  if not self.download_dir.is_dir():
269
- logger.debug(f"Creating directory: {self.download_dir}")
269
+ logger.debug(f"creating directory: {self.download_dir}")
270
270
  self.download_dir.mkdir(parents=True, exist_ok=True)
271
271
 
272
272
  if fsize > MAX_MB_SIZE:
273
- logger.info(f"Downloading file with size: {fsize} bytes in chunks")
273
+ logger.info(f"downloading file with size: {fsize} bytes in chunks")
274
274
  with self.filename.open(mode="wb") as f:
275
275
  file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query()
276
276
  else:
277
277
  with self.filename.open(mode="wb") as f:
278
278
  file.download(f).execute_query()
279
- logger.info(f"File downloaded: {self.filename}")
279
+ logger.info(f"file downloaded: {self.filename}")
280
280
 
281
281
  @BaseSingleIngestDoc.skip_if_file_exists
282
282
  @SourceConnectionError.wrap
@@ -374,7 +374,7 @@ class SharepointSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector
374
374
  if self.connector_config.process_pages:
375
375
  page_output = self._list_pages(site_client)
376
376
  if not page_output:
377
- logger.info(f"Couldn't process pages for site {site_client.base_url}")
377
+ logger.info(f"couldn't process pages for site {site_client.base_url}")
378
378
  output = output + page_output
379
379
  return output
380
380
 
@@ -404,7 +404,7 @@ class SharepointSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector
404
404
  tenant_sites = {s.url for s in tenant_sites if (s.url is not None)}
405
405
  ingest_docs: t.List[SharepointIngestDoc] = []
406
406
  for site_url in tenant_sites:
407
- logger.info(f"Processing docs for site: {site_url}")
407
+ logger.info(f"processing docs for site: {site_url}")
408
408
  site_client = self.connector_config.get_site_client(site_url)
409
409
  ingest_docs = ingest_docs + self._ingest_site_docs(site_client)
410
410
  return ingest_docs
@@ -440,7 +440,7 @@ class SharepointPermissionsConnector:
440
440
  if response.status_code == 200:
441
441
  return response.json()
442
442
  else:
443
- logger.info(f"Request failed with status code {response.status_code}:")
443
+ logger.info(f"request failed with status code {response.status_code}:")
444
444
  logger.info(response.text)
445
445
 
446
446
  @requires_dependencies(["requests"], extras="sharepoint")
@@ -181,7 +181,7 @@ class VectaraDestinationConnector(BaseDestinationConnector):
181
181
  try:
182
182
  result = self._request(endpoint="index", data=body, http_method="POST")
183
183
  except Exception as e:
184
- logger.info(f"Exception {e} while indexing document {document['documentId']}")
184
+ logger.info(f"exception {e} while indexing document {document['documentId']}")
185
185
  return
186
186
 
187
187
  if (
@@ -196,18 +196,18 @@ class VectaraDestinationConnector(BaseDestinationConnector):
196
196
  )
197
197
  )
198
198
  ):
199
- logger.info(f"Document {document['documentId']} already exists, re-indexing")
199
+ logger.info(f"document {document['documentId']} already exists, re-indexing")
200
200
  self._delete_doc(document["documentId"])
201
201
  result = self._request(endpoint="index", data=body, http_method="POST")
202
202
  return
203
203
 
204
204
  if "status" in result and result["status"] and "OK" in result["status"]["code"]:
205
- logger.info(f"Indexing document {document['documentId']} succeeded")
205
+ logger.info(f"indexing document {document['documentId']} succeeded")
206
206
  else:
207
- logger.info(f"Indexing document {document['documentId']} failed, response = {result}")
207
+ logger.info(f"indexing document {document['documentId']} failed, response = {result}")
208
208
 
209
209
  def write_dict(self, *args, docs_list: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
210
- logger.info(f"Inserting / updating {len(docs_list)} documents to Vectara ")
210
+ logger.info(f"inserting / updating {len(docs_list)} documents to Vectara ")
211
211
  for vdoc in docs_list:
212
212
  self._index_document(vdoc)
213
213
 
@@ -216,7 +216,7 @@ class VectaraDestinationConnector(BaseDestinationConnector):
216
216
 
217
217
  def get_metadata(element) -> t.Dict[str, t.Any]:
218
218
  """
219
- Select which meta-data fields to include and optionaly map them to a new new.
219
+ Select which meta-data fields to include and optionally map them to a new new.
220
220
  remove the "metadata-" prefix from the keys
221
221
  """
222
222
  metadata_map = {
@@ -529,7 +529,7 @@ class BaseSingleIngestDoc(BaseIngestDoc, IngestDocJsonMixin, ABC):
529
529
  and self.filename.is_file()
530
530
  and self.filename.stat().st_size
531
531
  ):
532
- logger.debug(f"File exists: {self.filename}, skipping {func.__name__}")
532
+ logger.debug(f"file exists: {self.filename}, skipping {func.__name__}")
533
533
  return None
534
534
  return func(self, *args, **kwargs)
535
535
 
@@ -586,7 +586,7 @@ class BaseSingleIngestDoc(BaseIngestDoc, IngestDocJsonMixin, ABC):
586
586
 
587
587
  endpoint = partition_config.partition_endpoint
588
588
 
589
- logger.debug(f"Using remote partition ({endpoint})")
589
+ logger.debug(f"using remote partition ({endpoint})")
590
590
 
591
591
  elements = partition_via_api(
592
592
  filename=str(self.filename),
@@ -606,7 +606,7 @@ class BaseSingleIngestDoc(BaseIngestDoc, IngestDocJsonMixin, ABC):
606
606
  self._date_processed = datetime.utcnow().isoformat()
607
607
  if self.read_config.download_only:
608
608
  return None
609
- logger.info(f"Processing {self.filename}")
609
+ logger.info(f"processing {self.filename}")
610
610
 
611
611
  elements = self.partition_file(partition_config=partition_config, **partition_kwargs)
612
612
  element_dicts = [e.to_dict() for e in elements]
@@ -824,7 +824,7 @@ class IngestDocCleanupMixin:
824
824
  and self.filename.is_file()
825
825
  and not self.read_config.download_only
826
826
  ):
827
- logger.debug(f"Cleaning up {self}")
827
+ logger.debug(f"cleaning up {self}")
828
828
  os.unlink(self.filename)
829
829
 
830
830
 
@@ -95,7 +95,7 @@ class SensitiveFormatter(logging.Formatter):
95
95
 
96
96
 
97
97
  def remove_root_handlers(logger: logging.Logger) -> None:
98
- # NOTE(robinson) - in some environments such as Google Colab, there is a root handler
98
+ # NOTE(robinson): in some environments such as Google Colab, there is a root handler
99
99
  # that doesn't not mask secrets, meaning sensitive info such as api keys appear in logs.
100
100
  # Removing these when they exist prevents this behavior
101
101
  if logger.root.hasHandlers():
@@ -15,5 +15,5 @@ class Copier(CopyNode):
15
15
  ingest_doc = create_ingest_doc_from_dict(ingest_doc_dict)
16
16
  desired_output = ingest_doc._output_filename
17
17
  Path(desired_output).parent.mkdir(parents=True, exist_ok=True)
18
- logger.info(f"Copying {json_path} -> {desired_output}")
18
+ logger.info(f"copying {json_path} -> {desired_output}")
19
19
  shutil.copy(json_path, desired_output)
@@ -57,7 +57,7 @@ class PipelineNode(DataClassJsonMixin, ABC):
57
57
  iterable = iterable if iterable else []
58
58
  if iterable:
59
59
  logger.info(
60
- f"Calling {self.__class__.__name__} " f"with {len(iterable)} docs", # type: ignore
60
+ f"calling {self.__class__.__name__} " f"with {len(iterable)} docs", # type: ignore
61
61
  )
62
62
 
63
63
  self.initialize()
@@ -92,7 +92,7 @@ class PipelineNode(DataClassJsonMixin, ABC):
92
92
 
93
93
  def initialize(self):
94
94
  if path := self.get_path():
95
- logger.info(f"Creating {path}")
95
+ logger.info(f"creating {path}")
96
96
  path.mkdir(parents=True, exist_ok=True)
97
97
  ingest_log_streaming_init(logging.DEBUG if self.pipeline_context.verbose else logging.INFO)
98
98
 
@@ -30,7 +30,7 @@ class Partitioner(PartitionNode):
30
30
  and json_path.is_file()
31
31
  and json_path.stat().st_size
32
32
  ):
33
- logger.info(f"File exists: {json_path}, skipping partition")
33
+ logger.info(f"file exists: {json_path}, skipping partition")
34
34
  return str(json_path)
35
35
  partition_kwargs: t.Dict[str, t.Any] = {
36
36
  "strategy": self.partition_config.strategy,
@@ -96,7 +96,7 @@ class Pipeline(DataClassJsonMixin):
96
96
  for reformat_node in self.reformat_nodes:
97
97
  reformatted_jsons = reformat_node(iterable=partitioned_jsons)
98
98
  if not reformatted_jsons:
99
- logger.info(f"No files to process after {reformat_node.__class__.__name__}")
99
+ logger.info(f"no files to process after {reformat_node.__class__.__name__}")
100
100
  return
101
101
  partitioned_jsons = reformatted_jsons
102
102
 
@@ -58,7 +58,7 @@ class Chunker(ReformatNode):
58
58
  and json_path.is_file()
59
59
  and json_path.stat().st_size
60
60
  ):
61
- logger.debug(f"File exists: {json_path}, skipping chunking")
61
+ logger.debug(f"file exists: {json_path}, skipping chunking")
62
62
  return str(json_path)
63
63
 
64
64
  chunked_elements = self.chunk(elements_json)
@@ -112,7 +112,7 @@ class Chunker(ReformatNode):
112
112
 
113
113
  return partition_via_api(
114
114
  filename=elements_json_file,
115
- # -- (jennings) If api_key or api_url are None, partition_via_api will raise an
115
+ # -- NOTE(jennings): If api_key or api_url are None, partition_via_api will raise an
116
116
  # -- error, which will be caught and logged by Chunker.run()
117
117
  api_key=self.partition_config.api_key, # type: ignore
118
118
  api_url=self.partition_config.partition_endpoint, # type: ignore
@@ -44,7 +44,7 @@ class Embedder(ReformatNode):
44
44
  and json_path.is_file()
45
45
  and json_path.stat().st_size
46
46
  ):
47
- logger.debug(f"File exists: {json_path}, skipping embedding")
47
+ logger.debug(f"file exists: {json_path}, skipping embedding")
48
48
  return str(json_path)
49
49
  with open(elements_json) as f:
50
50
  elements = json.load(f)