unstructured-ingest 1.0.11__tar.gz → 1.0.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (239) hide show
  1. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/PKG-INFO +1 -1
  2. unstructured_ingest-1.0.13/unstructured_ingest/__version__.py +1 -0
  3. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/google_drive.py +155 -63
  4. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +65 -7
  5. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +1 -0
  6. unstructured_ingest-1.0.11/unstructured_ingest/__version__.py +0 -1
  7. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/.gitignore +0 -0
  8. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/LICENSE.md +0 -0
  9. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/README.md +0 -0
  10. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/pyproject.toml +0 -0
  11. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/__init__.py +0 -0
  12. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/cli/README.md +0 -0
  13. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/cli/__init__.py +0 -0
  14. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/cli/base/__init__.py +0 -0
  15. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/cli/base/cmd.py +0 -0
  16. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/cli/base/dest.py +0 -0
  17. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/cli/base/importer.py +0 -0
  18. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/cli/base/src.py +0 -0
  19. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/cli/cli.py +0 -0
  20. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/cli/cmds.py +0 -0
  21. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/cli/utils/__init__.py +0 -0
  22. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/cli/utils/click.py +0 -0
  23. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
  24. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/data_types/__init__.py +0 -0
  25. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/data_types/entities.py +0 -0
  26. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/data_types/file_data.py +0 -0
  27. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/embed/__init__.py +0 -0
  28. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/embed/azure_openai.py +0 -0
  29. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/embed/bedrock.py +0 -0
  30. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/embed/huggingface.py +0 -0
  31. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/embed/interfaces.py +0 -0
  32. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/embed/mixedbreadai.py +0 -0
  33. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/embed/octoai.py +0 -0
  34. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/embed/openai.py +0 -0
  35. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/embed/togetherai.py +0 -0
  36. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/embed/vertexai.py +0 -0
  37. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/embed/voyageai.py +0 -0
  38. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/error.py +0 -0
  39. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/errors_v2.py +0 -0
  40. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/interfaces/__init__.py +0 -0
  41. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/interfaces/connector.py +0 -0
  42. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/interfaces/downloader.py +0 -0
  43. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/interfaces/indexer.py +0 -0
  44. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/interfaces/process.py +0 -0
  45. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/interfaces/processor.py +0 -0
  46. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/interfaces/upload_stager.py +0 -0
  47. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/interfaces/uploader.py +0 -0
  48. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/logger.py +0 -0
  49. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/main.py +0 -0
  50. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/otel.py +0 -0
  51. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/__init__.py +0 -0
  52. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/interfaces.py +0 -0
  53. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/otel.py +0 -0
  54. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/pipeline.py +0 -0
  55. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/steps/__init__.py +0 -0
  56. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
  57. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/steps/download.py +0 -0
  58. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/steps/embed.py +0 -0
  59. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/steps/filter.py +0 -0
  60. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/steps/index.py +0 -0
  61. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/steps/partition.py +0 -0
  62. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/steps/stage.py +0 -0
  63. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
  64. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/pipeline/steps/upload.py +0 -0
  65. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/__init__.py +0 -0
  66. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/chunker.py +0 -0
  67. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connector_registry.py +0 -0
  68. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/__init__.py +0 -0
  69. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/airtable.py +0 -0
  70. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
  71. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +0 -0
  72. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
  73. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/astradb.py +0 -0
  74. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/azure_ai_search.py +0 -0
  75. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/chroma.py +0 -0
  76. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/confluence.py +0 -0
  77. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
  78. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
  79. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/databricks/volumes.py +0 -0
  80. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +0 -0
  81. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +0 -0
  82. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +0 -0
  83. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
  84. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +0 -0
  85. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/delta_table.py +0 -0
  86. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/discord.py +0 -0
  87. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
  88. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/duckdb/base.py +0 -0
  89. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +0 -0
  90. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +0 -0
  91. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
  92. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +0 -0
  93. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
  94. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
  95. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
  96. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
  97. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
  98. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +0 -0
  99. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
  100. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/fsspec/s3.py +0 -0
  101. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
  102. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
  103. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/github.py +0 -0
  104. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/gitlab.py +0 -0
  105. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
  106. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/jira.py +0 -0
  107. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
  108. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
  109. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
  110. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
  111. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/kdbai.py +0 -0
  112. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
  113. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
  114. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
  115. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
  116. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
  117. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
  118. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
  119. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/local.py +0 -0
  120. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/milvus.py +0 -0
  121. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/mongodb.py +0 -0
  122. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/neo4j.py +0 -0
  123. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
  124. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
  125. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/connector.py +0 -0
  126. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
  127. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
  128. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
  129. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
  130. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
  131. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
  132. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
  133. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
  134. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
  135. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
  136. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
  137. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
  138. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
  139. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
  140. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
  141. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
  142. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
  143. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
  144. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
  145. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
  146. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
  147. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
  148. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
  149. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
  150. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
  151. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
  152. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
  153. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
  154. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
  155. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
  156. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
  157. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
  158. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
  159. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
  160. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
  161. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
  162. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
  163. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
  164. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +0 -0
  165. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +0 -0
  166. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +0 -0
  167. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +0 -0
  168. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +0 -0
  169. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +0 -0
  170. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +0 -0
  171. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +0 -0
  172. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -0
  173. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +0 -0
  174. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +0 -0
  175. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +0 -0
  176. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +0 -0
  177. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +0 -0
  178. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +0 -0
  179. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +0 -0
  180. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +0 -0
  181. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +0 -0
  182. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +0 -0
  183. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +0 -0
  184. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +0 -0
  185. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +0 -0
  186. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
  187. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
  188. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
  189. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
  190. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
  191. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
  192. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/onedrive.py +0 -0
  193. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/outlook.py +0 -0
  194. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/pinecone.py +0 -0
  195. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
  196. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
  197. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
  198. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
  199. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
  200. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/redisdb.py +0 -0
  201. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/salesforce.py +0 -0
  202. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/sharepoint.py +0 -0
  203. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/slack.py +0 -0
  204. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
  205. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +0 -0
  206. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/sql/postgres.py +0 -0
  207. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/sql/singlestore.py +0 -0
  208. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/sql/snowflake.py +0 -0
  209. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/sql/sql.py +0 -0
  210. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/sql/sqlite.py +0 -0
  211. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/sql/vastdb.py +0 -0
  212. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/utils.py +0 -0
  213. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/vectara.py +0 -0
  214. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
  215. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
  216. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
  217. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
  218. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +0 -0
  219. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
  220. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
  221. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -0
  222. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/embedder.py +0 -0
  223. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/filter.py +0 -0
  224. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/partitioner.py +0 -0
  225. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/uncompress.py +0 -0
  226. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/utils/__init__.py +0 -0
  227. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/processes/utils/blob_storage.py +0 -0
  228. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/unstructured_api.py +0 -0
  229. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/utils/__init__.py +0 -0
  230. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/utils/chunking.py +0 -0
  231. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/utils/compression.py +0 -0
  232. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/utils/constants.py +0 -0
  233. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/utils/data_prep.py +0 -0
  234. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/utils/dep_check.py +0 -0
  235. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/utils/html.py +0 -0
  236. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/utils/ndjson.py +0 -0
  237. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/utils/pydantic_models.py +0 -0
  238. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
  239. {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.13}/unstructured_ingest/utils/table.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.11
3
+ Version: 1.0.13
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -0,0 +1 @@
1
+ __version__ = "1.0.13" # pragma: no cover
@@ -1,4 +1,3 @@
1
- import io
2
1
  import json
3
2
  from contextlib import contextmanager
4
3
  from dataclasses import dataclass, field
@@ -16,7 +15,6 @@ from unstructured_ingest.data_types.file_data import (
16
15
  )
17
16
  from unstructured_ingest.error import (
18
17
  SourceConnectionError,
19
- SourceConnectionNetworkError,
20
18
  )
21
19
  from unstructured_ingest.interfaces import (
22
20
  AccessConfig,
@@ -34,25 +32,34 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
34
32
 
35
33
  if TYPE_CHECKING:
36
34
  from googleapiclient.discovery import Resource as GoogleAPIResource
37
- from googleapiclient.http import MediaIoBaseDownload
38
35
 
39
36
  CONNECTOR_TYPE = "google_drive"
40
37
 
41
- GOOGLE_DRIVE_EXPORT_TYPES = {
42
- "application/vnd.google-apps.document": "application/"
43
- "vnd.openxmlformats-officedocument.wordprocessingml.document",
44
- "application/vnd.google-apps.spreadsheet": "application/"
45
- "vnd.openxmlformats-officedocument.spreadsheetml.sheet",
46
- "application/vnd.google-apps.presentation": "application/"
47
- "vnd.openxmlformats-officedocument.presentationml.presentation",
48
- "application/vnd.google-apps.photo": "image/jpeg",
38
+
39
+ # Maps Google-native Drive MIME types → export MIME types
40
+ GOOGLE_EXPORT_MIME_MAP = {
41
+ "application/vnd.google-apps.document": \
42
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
43
+ "application/vnd.google-apps.spreadsheet": \
44
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
45
+ "application/vnd.google-apps.presentation": \
46
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
47
+ }
48
+
49
+ # Maps export MIME types → file extensions
50
+ EXPORT_EXTENSION_MAP = {
51
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
52
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
53
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
54
+ "application/pdf": ".pdf",
55
+ "text/html": ".html",
49
56
  }
50
57
 
51
58
 
52
59
  class GoogleDriveAccessConfig(AccessConfig):
53
- service_account_key: Optional[Annotated[dict, BeforeValidator(conform_string_to_dict)]] = Field(
54
- default=None, description="Credentials values to use for authentication"
55
- )
60
+ service_account_key: Optional[
61
+ Annotated[dict, BeforeValidator(conform_string_to_dict)]
62
+ ] = Field(default=None, description="Credentials values to use for authentication")
56
63
  service_account_key_path: Optional[Path] = Field(
57
64
  default=None,
58
65
  description="File path to credentials values to use for authentication",
@@ -171,10 +178,14 @@ class GoogleDriveIndexer(Indexer):
171
178
  Please enable it in the Google Cloud Console."
172
179
  )
173
180
  else:
174
- raise SourceConnectionError("Google drive API unreachable for an unknown reason!")
181
+ raise SourceConnectionError(
182
+ "Google drive API unreachable for an unknown reason!"
183
+ )
175
184
 
176
185
  @staticmethod
177
- def count_files_recursively(files_client, folder_id: str, extensions: list[str] = None) -> int:
186
+ def count_files_recursively(
187
+ files_client, folder_id: str, extensions: list[str] = None
188
+ ) -> int:
178
189
  """
179
190
  Count non-folder files recursively under the given folder.
180
191
  If `extensions` is provided, only count files
@@ -255,7 +266,9 @@ class GoogleDriveIndexer(Indexer):
255
266
  # that the service account has proper permissions."
256
267
  # )
257
268
  else:
258
- logger.info(f"Found {file_count} files recursively in the folder.")
269
+ logger.info(
270
+ f"Found {file_count} files recursively in the folder."
271
+ )
259
272
  else:
260
273
  # Non-recursive: check for at least one immediate non-folder child.
261
274
  response = client.list(
@@ -306,7 +319,9 @@ class GoogleDriveIndexer(Indexer):
306
319
  date_modified_str = root_info.pop("modifiedTime", None)
307
320
  parent_path = root_info.pop("parent_path", None)
308
321
  parent_root_path = root_info.pop("parent_root_path", None)
309
- date_modified_dt = parser.parse(date_modified_str) if date_modified_str else None
322
+ date_modified_dt = (
323
+ parser.parse(date_modified_str) if date_modified_str else None
324
+ )
310
325
  if (
311
326
  parent_path
312
327
  and isinstance(parent_path, str)
@@ -406,7 +421,9 @@ class GoogleDriveIndexer(Indexer):
406
421
  ) -> list[FileData]:
407
422
  root_info = self.get_root_info(files_client=files_client, object_id=object_id)
408
423
  if not self.is_dir(root_info):
409
- root_info["permissions"] = self.extract_permissions(root_info.get("permissions"))
424
+ root_info["permissions"] = self.extract_permissions(
425
+ root_info.get("permissions")
426
+ )
410
427
  data = [self.map_file_data(root_info)]
411
428
  else:
412
429
  file_contents = self.get_paginated_results(
@@ -427,7 +444,7 @@ class GoogleDriveIndexer(Indexer):
427
444
  def extract_permissions(self, permissions: Optional[list[dict]]) -> list[dict]:
428
445
  if not permissions:
429
446
  logger.debug("no permissions found")
430
- return {}
447
+ return [{}]
431
448
 
432
449
  # https://developers.google.com/workspace/drive/api/guides/ref-roles
433
450
  role_mapping = {
@@ -478,61 +495,136 @@ class GoogleDriveDownloaderConfig(DownloaderConfig):
478
495
 
479
496
  @dataclass
480
497
  class GoogleDriveDownloader(Downloader):
498
+ """
499
+ Downloads files from Google Drive using authenticated direct HTTP requests
500
+ via `exportLinks` (for Google-native files) and `webContentLink` (for binary files).
501
+
502
+ These links emulate the behavior of Google Drive's "File > Download as..." options
503
+ in the UI and bypass the size limitations of `files.export()`.
504
+
505
+ Behavior:
506
+ - Google-native formats are downloaded using `exportLinks` in appropriate MIME formats.
507
+ - Binary files (non-Google-native) are downloaded using `webContentLink`.
508
+ - All downloads are performed via `requests.get()` using a valid bearer token.
509
+ """
510
+
481
511
  connection_config: GoogleDriveConnectionConfig
482
512
  download_config: GoogleDriveDownloaderConfig = field(
483
513
  default_factory=lambda: GoogleDriveDownloaderConfig()
484
514
  )
485
515
  connector_type: str = CONNECTOR_TYPE
486
516
 
487
- @SourceConnectionNetworkError.wrap
488
- def _get_content(self, downloader: "MediaIoBaseDownload") -> bool:
489
- downloaded = False
490
- while downloaded is False:
491
- _, downloaded = downloader.next_chunk()
492
- return downloaded
517
+ def _get_download_url_and_ext(
518
+ self, file_id: str, mime_type: str
519
+ ) -> tuple[str, str]:
520
+ """
521
+ Resolves the appropriate download URL and expected file extension for a Google Drive file.
493
522
 
494
- def _write_file(self, file_data: FileData, file_contents: io.BytesIO) -> DownloadResponse:
495
- download_path = self.get_download_path(file_data=file_data)
496
- download_path.parent.mkdir(parents=True, exist_ok=True)
497
- logger.debug(f"writing {file_data.source_identifiers.fullpath} to {download_path}")
498
- with open(download_path, "wb") as handler:
499
- handler.write(file_contents.getbuffer())
500
- return self.generate_download_response(file_data=file_data, download_path=download_path)
523
+ - Google-native files use export MIME types from exportLinks (e.g., .docx, .xlsx).
524
+ - Binary files use webContentLink (e.g., uploaded PDFs or ZIPs).
501
525
 
502
- @requires_dependencies(["googleapiclient"], extras="google-drive")
503
- def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
504
- from googleapiclient.http import MediaIoBaseDownload
526
+ Returns:
527
+ Tuple[str, str]: (download URL, file extension or "")
505
528
 
506
- logger.debug(f"fetching file: {file_data.source_identifiers.fullpath}")
507
- record_id = file_data.identifier
508
- mime_type = file_data.additional_metadata["mimeType"]
509
- if not mime_type:
510
- raise TypeError(
511
- f"File not supported. Name: {file_data.source_identifiers.filename} "
512
- f"ID: {record_id} "
513
- f"MimeType: {mime_type}"
514
- )
529
+ Raises:
530
+ SourceConnectionError: If no valid export or download link is available.
531
+ """
515
532
  with self.connection_config.get_client() as client:
516
- if (
517
- mime_type.startswith("application/vnd.google-apps")
518
- and mime_type in GOOGLE_DRIVE_EXPORT_TYPES
519
- ):
520
- export_mime = GOOGLE_DRIVE_EXPORT_TYPES.get(
521
- mime_type, # type: ignore
533
+ metadata = client.get(
534
+ fileId=file_id, fields="exportLinks,webContentLink"
535
+ ).execute()
536
+
537
+ export_links = metadata.get("exportLinks", {})
538
+ web_link = metadata.get("webContentLink")
539
+
540
+ if export_mime := GOOGLE_EXPORT_MIME_MAP.get(mime_type):
541
+ url = export_links.get(export_mime)
542
+ if not url:
543
+ raise SourceConnectionError(
544
+ f"No export link found for {file_id} as {export_mime}"
522
545
  )
523
- request = client.export_media(
524
- fileId=record_id,
525
- mimeType=export_mime,
546
+ ext = EXPORT_EXTENSION_MAP.get(export_mime, "")
547
+ return url, ext
548
+
549
+ if not web_link:
550
+ raise SourceConnectionError(
551
+ f"No webContentLink available for file {file_id}"
552
+ )
553
+ return web_link, ""
554
+
555
+ @requires_dependencies(["httpx", "google.auth"], extras="google-drive")
556
+ def _download_url(self, file_data: FileData, url: str, ext: str = "") -> Path:
557
+ """
558
+ Streams file content directly to disk using authenticated HTTP request.
559
+
560
+ Writes the file to the correct path in the download directory while downloading.
561
+ Avoids buffering large files in memory.
562
+
563
+ Returns:
564
+ Path to the downloaded file.
565
+
566
+ Raises:
567
+ SourceConnectionError: If the HTTP request fails.
568
+ """
569
+ import httpx
570
+ from google.auth.transport.requests import Request
571
+ from google.oauth2 import service_account
572
+
573
+ access_config = self.connection_config.access_config.get_secret_value()
574
+ key_data = access_config.get_service_account_key()
575
+ creds = service_account.Credentials.from_service_account_info(
576
+ key_data,
577
+ scopes=["https://www.googleapis.com/auth/drive.readonly"],
578
+ )
579
+ creds.refresh(Request())
580
+
581
+ headers = {
582
+ "Authorization": f"Bearer {creds.token}",
583
+ }
584
+
585
+ download_path = self.get_download_path(file_data)
586
+ if ext:
587
+ download_path = download_path.with_suffix(ext)
588
+
589
+ download_path.parent.mkdir(parents=True, exist_ok=True)
590
+ logger.debug(f"Streaming file to {download_path}")
591
+
592
+ with (
593
+ httpx.Client(timeout=None, follow_redirects=True) as client,
594
+ client.stream("GET", url, headers=headers) as response,
595
+ ):
596
+ if response.status_code != 200:
597
+ raise SourceConnectionError(
598
+ f"Failed to stream download from {url}: {response.status_code}"
526
599
  )
527
- else:
528
- request = client.get_media(fileId=record_id)
529
-
530
- file_contents = io.BytesIO()
531
- downloader = MediaIoBaseDownload(file_contents, request)
532
- downloaded = self._get_content(downloader=downloader)
533
- if not downloaded or not file_contents:
534
- raise SourceConnectionError("nothing found to download")
535
- return self._write_file(file_data=file_data, file_contents=file_contents)
600
+ with open(download_path, "wb") as f:
601
+ for chunk in response.iter_bytes():
602
+ f.write(chunk)
603
+
604
+ return download_path
605
+
606
+ def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
607
+ mime_type = file_data.additional_metadata.get("mimeType", "")
608
+ record_id = file_data.identifier
609
+
610
+ logger.debug(
611
+ f"Downloading file {file_data.source_identifiers.fullpath} of type {mime_type}"
612
+ )
613
+
614
+ download_url, ext = self._get_download_url_and_ext(record_id, mime_type)
615
+ download_path = self._download_url(file_data, download_url, ext)
616
+
617
+ file_data.additional_metadata.update(
618
+ {
619
+ "download_method": "export_link" if ext else "web_content_link",
620
+ "download_url_used": download_url,
621
+ }
622
+ )
623
+ file_data.local_download_path = str(download_path.resolve())
624
+
625
+ return self.generate_download_response(
626
+ file_data=file_data, download_path=download_path
627
+ )
536
628
 
537
629
 
538
630
  google_drive_source_entry = SourceRegistryEntry(
@@ -56,6 +56,12 @@ class IbmWatsonxConnectionConfig(ConnectionConfig):
56
56
  object_storage_endpoint: str = Field(description="Cloud Object Storage public endpoint")
57
57
  object_storage_region: str = Field(description="Cloud Object Storage region")
58
58
  catalog: str = Field(description="Catalog name")
59
+ max_retries_connection: int = Field(
60
+ default=10,
61
+ description="Maximum number of retries in case of a connection error (RESTError)",
62
+ ge=2,
63
+ le=100,
64
+ )
59
65
 
60
66
  _bearer_token: Optional[dict[str, Any]] = None
61
67
 
@@ -69,8 +75,8 @@ class IbmWatsonxConnectionConfig(ConnectionConfig):
69
75
 
70
76
  @property
71
77
  def bearer_token(self) -> str:
72
- # Add 60 seconds to deal with edge cases where the token expires before the request is made
73
- timestamp = int(time.time()) + 60
78
+ # Add 5 minutes to deal with edge cases where the token expires before the request is made
79
+ timestamp = int(time.time()) + (60 * 5)
74
80
  if self._bearer_token is None or self._bearer_token.get("expiration", 0) <= timestamp:
75
81
  self._bearer_token = self.generate_bearer_token()
76
82
  return self._bearer_token["access_token"]
@@ -145,10 +151,29 @@ class IbmWatsonxConnectionConfig(ConnectionConfig):
145
151
  @contextmanager
146
152
  def get_catalog(self) -> Generator["RestCatalog", None, None]:
147
153
  from pyiceberg.catalog import load_catalog
154
+ from pyiceberg.exceptions import RESTError
155
+ from tenacity import (
156
+ before_log,
157
+ retry,
158
+ retry_if_exception_type,
159
+ stop_after_attempt,
160
+ wait_exponential,
161
+ )
162
+
163
+ # Retry connection in case of a connection error
164
+ @retry(
165
+ stop=stop_after_attempt(self.max_retries_connection),
166
+ wait=wait_exponential(exp_base=2, multiplier=1, min=2, max=10),
167
+ retry=retry_if_exception_type(RESTError),
168
+ before=before_log(logger, logging.DEBUG),
169
+ reraise=True,
170
+ )
171
+ def _get_catalog(catalog_config: dict[str, Any]) -> "RestCatalog":
172
+ return load_catalog(**catalog_config)
148
173
 
149
174
  try:
150
175
  catalog_config = self.get_catalog_config()
151
- catalog = load_catalog(**catalog_config)
176
+ catalog = _get_catalog(catalog_config)
152
177
  except Exception as e:
153
178
  logger.error(f"Failed to connect to catalog '{self.catalog}': {e}", exc_info=True)
154
179
  raise ProviderError(f"Failed to connect to catalog '{self.catalog}': {e}")
@@ -172,7 +197,10 @@ class IbmWatsonxUploaderConfig(UploaderConfig):
172
197
  namespace: str = Field(description="Namespace name")
173
198
  table: str = Field(description="Table name")
174
199
  max_retries: int = Field(
175
- default=5, description="Maximum number of retries to upload data", ge=2, le=500
200
+ default=50,
201
+ description="Maximum number of retries to upload data (CommitFailedException)",
202
+ ge=2,
203
+ le=500,
176
204
  )
177
205
  record_id_key: str = Field(
178
206
  default=RECORD_ID_LABEL,
@@ -240,7 +268,7 @@ class IbmWatsonxUploader(SQLUploader):
240
268
  def upload_data_table(
241
269
  self, table: "Table", data_table: "ArrowTable", file_data: FileData
242
270
  ) -> None:
243
- from pyiceberg.exceptions import CommitFailedException
271
+ from pyiceberg.exceptions import CommitFailedException, RESTError
244
272
  from tenacity import (
245
273
  before_log,
246
274
  retry,
@@ -265,21 +293,51 @@ class IbmWatsonxUploader(SQLUploader):
265
293
  table.refresh()
266
294
  logger.debug(e)
267
295
  raise IcebergCommitFailedException(e)
296
+ except RESTError:
297
+ raise
268
298
  except Exception as e:
269
299
  raise ProviderError(f"Failed to upload data to table: {e}")
270
300
 
271
301
  try:
272
302
  return _upload_data_table(table, data_table, file_data)
303
+ except RESTError:
304
+ raise
273
305
  except ProviderError:
274
306
  raise
275
307
  except Exception as e:
276
308
  raise ProviderError(f"Failed to upload data to table: {e}")
277
309
 
310
+ @requires_dependencies(["pyiceberg", "tenacity"], extras="ibm-watsonx-s3")
278
311
  def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
312
+ from pyiceberg.exceptions import RESTError
313
+ from tenacity import (
314
+ before_log,
315
+ retry,
316
+ retry_if_exception_type,
317
+ stop_after_attempt,
318
+ wait_exponential,
319
+ )
320
+
279
321
  data_table = self._df_to_arrow_table(df)
280
322
 
281
- with self.get_table() as table:
282
- self.upload_data_table(table, data_table, file_data)
323
+ # Retry connection in case of a connection error or token expiration
324
+ @retry(
325
+ stop=stop_after_attempt(self.connection_config.max_retries_connection),
326
+ wait=wait_exponential(exp_base=2, multiplier=1, min=2, max=10),
327
+ retry=retry_if_exception_type(RESTError),
328
+ before=before_log(logger, logging.DEBUG),
329
+ reraise=True,
330
+ )
331
+ def _upload_dataframe(data_table: Any, file_data: FileData) -> None:
332
+ with self.get_table() as table:
333
+ self.upload_data_table(table, data_table, file_data)
334
+
335
+ try:
336
+ return _upload_dataframe(data_table, file_data)
337
+ except ProviderError:
338
+ raise
339
+ except Exception as e:
340
+ raise ProviderError(f"Failed to upload data to table: {e}")
283
341
 
284
342
  @requires_dependencies(["pandas"], extras="ibm-watsonx-s3")
285
343
  def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
@@ -38,6 +38,7 @@ class Select(DBPropertyBase):
38
38
  id: str
39
39
  name: str
40
40
  select: SelectProp
41
+ description: Optional[str] = None
41
42
  type: str = "select"
42
43
 
43
44
  @classmethod
@@ -1 +0,0 @@
1
- __version__ = "1.0.11" # pragma: no cover