unstructured-ingest 1.0.13__tar.gz → 1.0.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (239) hide show
  1. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/PKG-INFO +1 -1
  2. unstructured_ingest-1.0.15/unstructured_ingest/__version__.py +1 -0
  3. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/embed/bedrock.py +51 -16
  4. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/embed/huggingface.py +1 -1
  5. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/embed/interfaces.py +3 -0
  6. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/embed/mixedbreadai.py +2 -2
  7. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/embed/octoai.py +19 -2
  8. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/embed/openai.py +34 -0
  9. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/embed/togetherai.py +17 -0
  10. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/embed/vertexai.py +1 -1
  11. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/google_drive.py +16 -39
  12. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/embedder.py +4 -0
  13. unstructured_ingest-1.0.13/unstructured_ingest/__version__.py +0 -1
  14. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/.gitignore +0 -0
  15. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/LICENSE.md +0 -0
  16. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/README.md +0 -0
  17. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/pyproject.toml +0 -0
  18. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/__init__.py +0 -0
  19. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/cli/README.md +0 -0
  20. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/cli/__init__.py +0 -0
  21. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/cli/base/__init__.py +0 -0
  22. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/cli/base/cmd.py +0 -0
  23. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/cli/base/dest.py +0 -0
  24. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/cli/base/importer.py +0 -0
  25. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/cli/base/src.py +0 -0
  26. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/cli/cli.py +0 -0
  27. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/cli/cmds.py +0 -0
  28. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/cli/utils/__init__.py +0 -0
  29. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/cli/utils/click.py +0 -0
  30. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
  31. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/data_types/__init__.py +0 -0
  32. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/data_types/entities.py +0 -0
  33. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/data_types/file_data.py +0 -0
  34. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/embed/__init__.py +0 -0
  35. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/embed/azure_openai.py +0 -0
  36. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/embed/voyageai.py +0 -0
  37. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/error.py +0 -0
  38. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/errors_v2.py +0 -0
  39. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/interfaces/__init__.py +0 -0
  40. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/interfaces/connector.py +0 -0
  41. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/interfaces/downloader.py +0 -0
  42. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/interfaces/indexer.py +0 -0
  43. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/interfaces/process.py +0 -0
  44. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/interfaces/processor.py +0 -0
  45. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/interfaces/upload_stager.py +0 -0
  46. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/interfaces/uploader.py +0 -0
  47. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/logger.py +0 -0
  48. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/main.py +0 -0
  49. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/otel.py +0 -0
  50. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/__init__.py +0 -0
  51. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/interfaces.py +0 -0
  52. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/otel.py +0 -0
  53. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/pipeline.py +0 -0
  54. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/steps/__init__.py +0 -0
  55. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
  56. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/steps/download.py +0 -0
  57. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/steps/embed.py +0 -0
  58. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/steps/filter.py +0 -0
  59. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/steps/index.py +0 -0
  60. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/steps/partition.py +0 -0
  61. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/steps/stage.py +0 -0
  62. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
  63. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/pipeline/steps/upload.py +0 -0
  64. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/__init__.py +0 -0
  65. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/chunker.py +0 -0
  66. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connector_registry.py +0 -0
  67. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/__init__.py +0 -0
  68. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/airtable.py +0 -0
  69. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
  70. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +0 -0
  71. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
  72. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/astradb.py +0 -0
  73. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/azure_ai_search.py +0 -0
  74. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/chroma.py +0 -0
  75. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/confluence.py +0 -0
  76. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
  77. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
  78. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/databricks/volumes.py +0 -0
  79. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +0 -0
  80. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +0 -0
  81. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +0 -0
  82. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
  83. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +0 -0
  84. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/delta_table.py +0 -0
  85. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/discord.py +0 -0
  86. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
  87. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/duckdb/base.py +0 -0
  88. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +0 -0
  89. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +0 -0
  90. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
  91. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +0 -0
  92. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
  93. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
  94. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
  95. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
  96. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
  97. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +0 -0
  98. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
  99. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/fsspec/s3.py +0 -0
  100. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
  101. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
  102. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/github.py +0 -0
  103. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/gitlab.py +0 -0
  104. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
  105. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +0 -0
  106. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/jira.py +0 -0
  107. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
  108. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
  109. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
  110. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
  111. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/kdbai.py +0 -0
  112. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
  113. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
  114. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
  115. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
  116. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
  117. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
  118. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
  119. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/local.py +0 -0
  120. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/milvus.py +0 -0
  121. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/mongodb.py +0 -0
  122. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/neo4j.py +0 -0
  123. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
  124. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
  125. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/connector.py +0 -0
  126. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
  127. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
  128. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
  129. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
  130. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
  131. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
  132. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
  133. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
  134. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
  135. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
  136. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
  137. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
  138. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
  139. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
  140. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
  141. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
  142. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
  143. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
  144. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
  145. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
  146. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
  147. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
  148. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
  149. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
  150. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
  151. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
  152. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
  153. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
  154. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
  155. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
  156. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
  157. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
  158. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
  159. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
  160. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
  161. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
  162. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
  163. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
  164. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +0 -0
  165. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +0 -0
  166. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +0 -0
  167. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +0 -0
  168. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +0 -0
  169. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +0 -0
  170. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +0 -0
  171. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +0 -0
  172. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -0
  173. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +0 -0
  174. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +0 -0
  175. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +0 -0
  176. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +0 -0
  177. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +0 -0
  178. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +0 -0
  179. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +0 -0
  180. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +0 -0
  181. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +0 -0
  182. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +0 -0
  183. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +0 -0
  184. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +0 -0
  185. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +0 -0
  186. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +0 -0
  187. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
  188. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
  189. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
  190. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
  191. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
  192. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
  193. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/onedrive.py +0 -0
  194. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/outlook.py +0 -0
  195. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/pinecone.py +0 -0
  196. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
  197. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
  198. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
  199. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
  200. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
  201. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/redisdb.py +0 -0
  202. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/salesforce.py +0 -0
  203. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/sharepoint.py +0 -0
  204. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/slack.py +0 -0
  205. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
  206. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +0 -0
  207. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/sql/postgres.py +0 -0
  208. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/sql/singlestore.py +0 -0
  209. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/sql/snowflake.py +0 -0
  210. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/sql/sql.py +0 -0
  211. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/sql/sqlite.py +0 -0
  212. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/sql/vastdb.py +0 -0
  213. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/utils.py +0 -0
  214. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/vectara.py +0 -0
  215. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
  216. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
  217. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
  218. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
  219. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +0 -0
  220. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
  221. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
  222. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -0
  223. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/filter.py +0 -0
  224. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/partitioner.py +0 -0
  225. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/uncompress.py +0 -0
  226. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/utils/__init__.py +0 -0
  227. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/processes/utils/blob_storage.py +0 -0
  228. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/unstructured_api.py +0 -0
  229. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/utils/__init__.py +0 -0
  230. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/utils/chunking.py +0 -0
  231. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/utils/compression.py +0 -0
  232. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/utils/constants.py +0 -0
  233. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/utils/data_prep.py +0 -0
  234. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/utils/dep_check.py +0 -0
  235. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/utils/html.py +0 -0
  236. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/utils/ndjson.py +0 -0
  237. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/utils/pydantic_models.py +0 -0
  238. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
  239. {unstructured_ingest-1.0.13 → unstructured_ingest-1.0.15}/unstructured_ingest/utils/table.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.13
3
+ Version: 1.0.15
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -0,0 +1 @@
1
+ __version__ = "1.0.15" # pragma: no cover
@@ -26,16 +26,20 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
26
26
  if TYPE_CHECKING:
27
27
  from botocore.client import BaseClient
28
28
 
29
- class BedrockClient(BaseClient):
29
+ class BedrockRuntimeClient(BaseClient):
30
30
  def invoke_model(self, body: str, modelId: str, accept: str, contentType: str) -> dict:
31
31
  pass
32
32
 
33
- class AsyncBedrockClient(BaseClient):
33
+ class AsyncBedrockRuntimeClient(BaseClient):
34
34
  async def invoke_model(
35
35
  self, body: str, modelId: str, accept: str, contentType: str
36
36
  ) -> dict:
37
37
  pass
38
38
 
39
+ class BedrockClient(BaseClient):
40
+ def list_foundation_models(self, byOutputModality: str) -> dict:
41
+ pass
42
+
39
43
 
40
44
  def conform_query(query: str, provider: str) -> dict:
41
45
  # replace newlines, which can negatively affect performance.
@@ -87,19 +91,49 @@ class BedrockEmbeddingConfig(EmbeddingConfig):
87
91
  logger.error(f"unhandled exception from bedrock: {e}", exc_info=True)
88
92
  return e
89
93
 
94
+ def run_precheck(self) -> None:
95
+ client = self.get_bedrock_client()
96
+ try:
97
+ model_info = client.list_foundation_models(byOutputModality="EMBEDDING")
98
+ summaries = model_info.get("modelSummaries", [])
99
+ model_ids = [m["modelId"] for m in summaries]
100
+ arns = [":".join(m["modelArn"]) for m in summaries]
101
+
102
+ if self.embedder_model_name not in model_ids and self.embedder_model_name not in arns:
103
+ raise UserError(
104
+ "model '{}' not found either : {} or {}".format(
105
+ self.embedder_model_name, ", ".join(model_ids), ", ".join(arns)
106
+ )
107
+ )
108
+ except Exception as e:
109
+ raise self.wrap_error(e=e)
110
+
111
+ def get_client_kwargs(self) -> dict:
112
+ return {
113
+ "aws_access_key_id": self.aws_access_key_id.get_secret_value(),
114
+ "aws_secret_access_key": self.aws_secret_access_key.get_secret_value(),
115
+ "region_name": self.region_name,
116
+ }
117
+
118
+ @requires_dependencies(
119
+ ["boto3"],
120
+ extras="bedrock",
121
+ )
122
+ def get_bedrock_client(self) -> "BedrockClient":
123
+ import boto3
124
+
125
+ bedrock_client = boto3.client(service_name="bedrock", **self.get_client_kwargs())
126
+
127
+ return bedrock_client
128
+
90
129
  @requires_dependencies(
91
130
  ["boto3", "numpy", "botocore"],
92
131
  extras="bedrock",
93
132
  )
94
- def get_client(self) -> "BedrockClient":
133
+ def get_client(self) -> "BedrockRuntimeClient":
95
134
  import boto3
96
135
 
97
- bedrock_client = boto3.client(
98
- service_name="bedrock-runtime",
99
- aws_access_key_id=self.aws_access_key_id.get_secret_value(),
100
- aws_secret_access_key=self.aws_secret_access_key.get_secret_value(),
101
- region_name=self.region_name,
102
- )
136
+ bedrock_client = boto3.client(service_name="bedrock-runtime", **self.get_client_kwargs())
103
137
 
104
138
  return bedrock_client
105
139
 
@@ -108,16 +142,11 @@ class BedrockEmbeddingConfig(EmbeddingConfig):
108
142
  extras="bedrock",
109
143
  )
110
144
  @asynccontextmanager
111
- async def get_async_client(self) -> AsyncIterable["AsyncBedrockClient"]:
145
+ async def get_async_client(self) -> AsyncIterable["AsyncBedrockRuntimeClient"]:
112
146
  import aioboto3
113
147
 
114
148
  session = aioboto3.Session()
115
- async with session.client(
116
- "bedrock-runtime",
117
- aws_access_key_id=self.aws_access_key_id.get_secret_value(),
118
- aws_secret_access_key=self.aws_secret_access_key.get_secret_value(),
119
- region_name=self.region_name,
120
- ) as aws_bedrock:
149
+ async with session.client("bedrock-runtime", **self.get_client_kwargs()) as aws_bedrock:
121
150
  yield aws_bedrock
122
151
 
123
152
 
@@ -125,6 +154,9 @@ class BedrockEmbeddingConfig(EmbeddingConfig):
125
154
  class BedrockEmbeddingEncoder(BaseEmbeddingEncoder):
126
155
  config: BedrockEmbeddingConfig
127
156
 
157
+ def precheck(self):
158
+ self.config.run_precheck()
159
+
128
160
  def wrap_error(self, e: Exception) -> Exception:
129
161
  return self.config.wrap_error(e=e)
130
162
 
@@ -168,6 +200,9 @@ class BedrockEmbeddingEncoder(BaseEmbeddingEncoder):
168
200
  class AsyncBedrockEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
169
201
  config: BedrockEmbeddingConfig
170
202
 
203
+ def precheck(self):
204
+ self.config.run_precheck()
205
+
171
206
  def wrap_error(self, e: Exception) -> Exception:
172
207
  return self.config.wrap_error(e=e)
173
208
 
@@ -23,7 +23,7 @@ class HuggingFaceEmbeddingConfig(EmbeddingConfig):
23
23
 
24
24
  @requires_dependencies(
25
25
  ["sentence_transformers"],
26
- extras="embed-huggingface",
26
+ extras="huggingface",
27
27
  )
28
28
  def get_client(self) -> "SentenceTransformer":
29
29
  from sentence_transformers import SentenceTransformer
@@ -20,6 +20,9 @@ class EmbeddingConfig(BaseModel):
20
20
  class BaseEncoder(ABC):
21
21
  config: EmbeddingConfig
22
22
 
23
+ def precheck(self):
24
+ pass
25
+
23
26
  def initialize(self):
24
27
  """Initializes the embedding encoder class. Should also validate the instance
25
28
  is properly configured: e.g., embed a single a element"""
@@ -42,7 +42,7 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
42
42
 
43
43
  @requires_dependencies(
44
44
  ["mixedbread_ai"],
45
- extras="embed-mixedbreadai",
45
+ extras="mixedbreadai",
46
46
  )
47
47
  def get_client(self) -> "MixedbreadAI":
48
48
  """
@@ -59,7 +59,7 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
59
59
 
60
60
  @requires_dependencies(
61
61
  ["mixedbread_ai"],
62
- extras="embed-mixedbreadai",
62
+ extras="mixedbreadai",
63
63
  )
64
64
  def get_async_client(self) -> "AsyncMixedbreadAI":
65
65
  from mixedbread_ai.client import AsyncMixedbreadAI
@@ -54,9 +54,20 @@ class OctoAiEmbeddingConfig(EmbeddingConfig):
54
54
  logger.error(f"unhandled exception from openai: {e}", exc_info=True)
55
55
  return e
56
56
 
57
+ def run_precheck(self) -> None:
58
+ client = self.get_client()
59
+ try:
60
+ models = [m.id for m in list(client.models.list())]
61
+ if self.embedder_model_name not in models:
62
+ raise UserError(
63
+ "model '{}' not found: {}".format(self.embedder_model_name, ", ".join(models))
64
+ )
65
+ except Exception as e:
66
+ raise self.wrap_error(e=e)
67
+
57
68
  @requires_dependencies(
58
69
  ["openai", "tiktoken"],
59
- extras="embed-octoai",
70
+ extras="octoai",
60
71
  )
61
72
  def get_client(self) -> "OpenAI":
62
73
  """Creates an OpenAI python client to embed elements. Uses the OpenAI SDK."""
@@ -66,7 +77,7 @@ class OctoAiEmbeddingConfig(EmbeddingConfig):
66
77
 
67
78
  @requires_dependencies(
68
79
  ["openai", "tiktoken"],
69
- extras="embed-octoai",
80
+ extras="octoai",
70
81
  )
71
82
  def get_async_client(self) -> "AsyncOpenAI":
72
83
  """Creates an OpenAI python client to embed elements. Uses the OpenAI SDK."""
@@ -79,6 +90,9 @@ class OctoAiEmbeddingConfig(EmbeddingConfig):
79
90
  class OctoAIEmbeddingEncoder(BaseEmbeddingEncoder):
80
91
  config: OctoAiEmbeddingConfig
81
92
 
93
+ def precheck(self):
94
+ self.config.run_precheck()
95
+
82
96
  def wrap_error(self, e: Exception) -> Exception:
83
97
  return self.config.wrap_error(e=e)
84
98
 
@@ -99,6 +113,9 @@ class OctoAIEmbeddingEncoder(BaseEmbeddingEncoder):
99
113
  class AsyncOctoAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
100
114
  config: OctoAiEmbeddingConfig
101
115
 
116
+ def precheck(self):
117
+ self.config.run_precheck()
118
+
102
119
  def wrap_error(self, e: Exception) -> Exception:
103
120
  return self.config.wrap_error(e=e)
104
121
 
@@ -28,6 +28,7 @@ class OpenAIEmbeddingConfig(EmbeddingConfig):
28
28
  embedder_model_name: str = Field(default="text-embedding-ada-002", alias="model_name")
29
29
  base_url: Optional[str] = None
30
30
 
31
+ @requires_dependencies(["openai"], extras="openai")
31
32
  def wrap_error(self, e: Exception) -> Exception:
32
33
  if is_internal_error(e=e):
33
34
  return e
@@ -54,6 +55,33 @@ class OpenAIEmbeddingConfig(EmbeddingConfig):
54
55
  logger.error(f"unhandled exception from openai: {e}", exc_info=True)
55
56
  return e
56
57
 
58
+ @requires_dependencies(["openai"], extras="openai")
59
+ def get_models(self) -> Optional[list[str]]:
60
+ # In case the list model endpoint isn't exposed, don't break
61
+ from openai import APIStatusError
62
+
63
+ client = self.get_client()
64
+ try:
65
+ models = [m.id for m in list(client.models.list())]
66
+ return models
67
+ except APIStatusError as e:
68
+ if e.status_code == 404:
69
+ return None
70
+ except Exception as e:
71
+ raise self.wrap_error(e=e)
72
+
73
+ def run_precheck(self) -> None:
74
+ try:
75
+ models = self.get_models()
76
+ if models is None:
77
+ return
78
+ if self.embedder_model_name not in models:
79
+ raise UserError(
80
+ "model '{}' not found: {}".format(self.embedder_model_name, ", ".join(models))
81
+ )
82
+ except Exception as e:
83
+ raise self.wrap_error(e=e)
84
+
57
85
  @requires_dependencies(["openai"], extras="openai")
58
86
  def get_client(self) -> "OpenAI":
59
87
  from openai import OpenAI
@@ -71,6 +99,9 @@ class OpenAIEmbeddingConfig(EmbeddingConfig):
71
99
  class OpenAIEmbeddingEncoder(BaseEmbeddingEncoder):
72
100
  config: OpenAIEmbeddingConfig
73
101
 
102
+ def precheck(self):
103
+ self.config.run_precheck()
104
+
74
105
  def wrap_error(self, e: Exception) -> Exception:
75
106
  return self.config.wrap_error(e=e)
76
107
 
@@ -86,6 +117,9 @@ class OpenAIEmbeddingEncoder(BaseEmbeddingEncoder):
86
117
  class AsyncOpenAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
87
118
  config: OpenAIEmbeddingConfig
88
119
 
120
+ def precheck(self):
121
+ self.config.run_precheck()
122
+
89
123
  def wrap_error(self, e: Exception) -> Exception:
90
124
  return self.config.wrap_error(e=e)
91
125
 
@@ -41,6 +41,17 @@ class TogetherAIEmbeddingConfig(EmbeddingConfig):
41
41
  return CustomRateLimitError(message)
42
42
  return UserError(message)
43
43
 
44
+ def run_precheck(self) -> None:
45
+ client = self.get_client()
46
+ try:
47
+ models = [m.id for m in list(client.models.list())]
48
+ if self.embedder_model_name not in models:
49
+ raise UserError(
50
+ "model '{}' not found: {}".format(self.embedder_model_name, ", ".join(models))
51
+ )
52
+ except Exception as e:
53
+ raise self.wrap_error(e=e)
54
+
44
55
  @requires_dependencies(["together"], extras="togetherai")
45
56
  def get_client(self) -> "Together":
46
57
  from together import Together
@@ -58,6 +69,9 @@ class TogetherAIEmbeddingConfig(EmbeddingConfig):
58
69
  class TogetherAIEmbeddingEncoder(BaseEmbeddingEncoder):
59
70
  config: TogetherAIEmbeddingConfig
60
71
 
72
+ def precheck(self):
73
+ self.config.run_precheck()
74
+
61
75
  def wrap_error(self, e: Exception) -> Exception:
62
76
  return self.config.wrap_error(e=e)
63
77
 
@@ -73,6 +87,9 @@ class TogetherAIEmbeddingEncoder(BaseEmbeddingEncoder):
73
87
  class AsyncTogetherAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
74
88
  config: TogetherAIEmbeddingConfig
75
89
 
90
+ def precheck(self):
91
+ self.config.run_precheck()
92
+
76
93
  def wrap_error(self, e: Exception) -> Exception:
77
94
  return self.config.wrap_error(e=e)
78
95
 
@@ -55,7 +55,7 @@ class VertexAIEmbeddingConfig(EmbeddingConfig):
55
55
 
56
56
  @requires_dependencies(
57
57
  ["vertexai"],
58
- extras="embed-vertexai",
58
+ extras="vertexai",
59
59
  )
60
60
  def get_client(self) -> "TextEmbeddingModel":
61
61
  """Creates a VertexAI python client to embed elements."""
@@ -38,12 +38,9 @@ CONNECTOR_TYPE = "google_drive"
38
38
 
39
39
  # Maps Google-native Drive MIME types → export MIME types
40
40
  GOOGLE_EXPORT_MIME_MAP = {
41
- "application/vnd.google-apps.document": \
42
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
43
- "application/vnd.google-apps.spreadsheet": \
44
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
45
- "application/vnd.google-apps.presentation": \
46
- "application/vnd.openxmlformats-officedocument.presentationml.presentation",
41
+ "application/vnd.google-apps.document": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", # noqa: E501
42
+ "application/vnd.google-apps.spreadsheet": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # noqa: E501
43
+ "application/vnd.google-apps.presentation": "application/vnd.openxmlformats-officedocument.presentationml.presentation", # noqa: E501
47
44
  }
48
45
 
49
46
  # Maps export MIME types → file extensions
@@ -57,9 +54,9 @@ EXPORT_EXTENSION_MAP = {
57
54
 
58
55
 
59
56
  class GoogleDriveAccessConfig(AccessConfig):
60
- service_account_key: Optional[
61
- Annotated[dict, BeforeValidator(conform_string_to_dict)]
62
- ] = Field(default=None, description="Credentials values to use for authentication")
57
+ service_account_key: Optional[Annotated[dict, BeforeValidator(conform_string_to_dict)]] = Field(
58
+ default=None, description="Credentials values to use for authentication"
59
+ )
63
60
  service_account_key_path: Optional[Path] = Field(
64
61
  default=None,
65
62
  description="File path to credentials values to use for authentication",
@@ -178,14 +175,10 @@ class GoogleDriveIndexer(Indexer):
178
175
  Please enable it in the Google Cloud Console."
179
176
  )
180
177
  else:
181
- raise SourceConnectionError(
182
- "Google drive API unreachable for an unknown reason!"
183
- )
178
+ raise SourceConnectionError("Google drive API unreachable for an unknown reason!")
184
179
 
185
180
  @staticmethod
186
- def count_files_recursively(
187
- files_client, folder_id: str, extensions: list[str] = None
188
- ) -> int:
181
+ def count_files_recursively(files_client, folder_id: str, extensions: list[str] = None) -> int:
189
182
  """
190
183
  Count non-folder files recursively under the given folder.
191
184
  If `extensions` is provided, only count files
@@ -266,9 +259,7 @@ class GoogleDriveIndexer(Indexer):
266
259
  # that the service account has proper permissions."
267
260
  # )
268
261
  else:
269
- logger.info(
270
- f"Found {file_count} files recursively in the folder."
271
- )
262
+ logger.info(f"Found {file_count} files recursively in the folder.")
272
263
  else:
273
264
  # Non-recursive: check for at least one immediate non-folder child.
274
265
  response = client.list(
@@ -319,9 +310,7 @@ class GoogleDriveIndexer(Indexer):
319
310
  date_modified_str = root_info.pop("modifiedTime", None)
320
311
  parent_path = root_info.pop("parent_path", None)
321
312
  parent_root_path = root_info.pop("parent_root_path", None)
322
- date_modified_dt = (
323
- parser.parse(date_modified_str) if date_modified_str else None
324
- )
313
+ date_modified_dt = parser.parse(date_modified_str) if date_modified_str else None
325
314
  if (
326
315
  parent_path
327
316
  and isinstance(parent_path, str)
@@ -421,9 +410,7 @@ class GoogleDriveIndexer(Indexer):
421
410
  ) -> list[FileData]:
422
411
  root_info = self.get_root_info(files_client=files_client, object_id=object_id)
423
412
  if not self.is_dir(root_info):
424
- root_info["permissions"] = self.extract_permissions(
425
- root_info.get("permissions")
426
- )
413
+ root_info["permissions"] = self.extract_permissions(root_info.get("permissions"))
427
414
  data = [self.map_file_data(root_info)]
428
415
  else:
429
416
  file_contents = self.get_paginated_results(
@@ -514,9 +501,7 @@ class GoogleDriveDownloader(Downloader):
514
501
  )
515
502
  connector_type: str = CONNECTOR_TYPE
516
503
 
517
- def _get_download_url_and_ext(
518
- self, file_id: str, mime_type: str
519
- ) -> tuple[str, str]:
504
+ def _get_download_url_and_ext(self, file_id: str, mime_type: str) -> tuple[str, str]:
520
505
  """
521
506
  Resolves the appropriate download URL and expected file extension for a Google Drive file.
522
507
 
@@ -530,9 +515,7 @@ class GoogleDriveDownloader(Downloader):
530
515
  SourceConnectionError: If no valid export or download link is available.
531
516
  """
532
517
  with self.connection_config.get_client() as client:
533
- metadata = client.get(
534
- fileId=file_id, fields="exportLinks,webContentLink"
535
- ).execute()
518
+ metadata = client.get(fileId=file_id, fields="exportLinks,webContentLink").execute()
536
519
 
537
520
  export_links = metadata.get("exportLinks", {})
538
521
  web_link = metadata.get("webContentLink")
@@ -540,16 +523,12 @@ class GoogleDriveDownloader(Downloader):
540
523
  if export_mime := GOOGLE_EXPORT_MIME_MAP.get(mime_type):
541
524
  url = export_links.get(export_mime)
542
525
  if not url:
543
- raise SourceConnectionError(
544
- f"No export link found for {file_id} as {export_mime}"
545
- )
526
+ raise SourceConnectionError(f"No export link found for {file_id} as {export_mime}")
546
527
  ext = EXPORT_EXTENSION_MAP.get(export_mime, "")
547
528
  return url, ext
548
529
 
549
530
  if not web_link:
550
- raise SourceConnectionError(
551
- f"No webContentLink available for file {file_id}"
552
- )
531
+ raise SourceConnectionError(f"No webContentLink available for file {file_id}")
553
532
  return web_link, ""
554
533
 
555
534
  @requires_dependencies(["httpx", "google.auth"], extras="google-drive")
@@ -622,9 +601,7 @@ class GoogleDriveDownloader(Downloader):
622
601
  )
623
602
  file_data.local_download_path = str(download_path.resolve())
624
603
 
625
- return self.generate_download_response(
626
- file_data=file_data, download_path=download_path
627
- )
604
+ return self.generate_download_response(file_data=file_data, download_path=download_path)
628
605
 
629
606
 
630
607
  google_drive_source_entry = SourceRegistryEntry(
@@ -189,6 +189,10 @@ class Embedder(BaseProcess, ABC):
189
189
  def init(self, **kwargs: Any) -> None:
190
190
  self.config.get_embedder().initialize()
191
191
 
192
+ def precheck(self) -> None:
193
+ embedder = self.config.get_embedder()
194
+ embedder.precheck()
195
+
192
196
  def run(self, elements_filepath: Path, **kwargs: Any) -> list[dict]:
193
197
  # TODO update base embedder classes to support async
194
198
  embedder = self.config.get_embedder()
@@ -1 +0,0 @@
1
- __version__ = "1.0.13" # pragma: no cover