unstructured-ingest 1.0.4__tar.gz → 1.0.6.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (238) hide show
  1. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/PKG-INFO +1 -1
  2. unstructured_ingest-1.0.6.dev0/unstructured_ingest/__version__.py +1 -0
  3. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/data_types/file_data.py +2 -2
  4. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/confluence.py +7 -1
  5. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/google_drive.py +22 -19
  6. unstructured_ingest-1.0.4/unstructured_ingest/__version__.py +0 -1
  7. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/.gitignore +0 -0
  8. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/LICENSE.md +0 -0
  9. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/README.md +0 -0
  10. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/pyproject.toml +0 -0
  11. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/__init__.py +0 -0
  12. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/cli/README.md +0 -0
  13. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/cli/__init__.py +0 -0
  14. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/cli/base/__init__.py +0 -0
  15. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/cli/base/cmd.py +0 -0
  16. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/cli/base/dest.py +0 -0
  17. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/cli/base/importer.py +0 -0
  18. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/cli/base/src.py +0 -0
  19. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/cli/cli.py +0 -0
  20. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/cli/cmds.py +0 -0
  21. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/cli/utils/__init__.py +0 -0
  22. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/cli/utils/click.py +0 -0
  23. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
  24. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/data_types/__init__.py +0 -0
  25. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/embed/__init__.py +0 -0
  26. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/embed/azure_openai.py +0 -0
  27. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/embed/bedrock.py +0 -0
  28. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/embed/huggingface.py +0 -0
  29. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/embed/interfaces.py +0 -0
  30. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/embed/mixedbreadai.py +0 -0
  31. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/embed/octoai.py +0 -0
  32. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/embed/openai.py +0 -0
  33. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/embed/togetherai.py +0 -0
  34. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/embed/vertexai.py +0 -0
  35. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/embed/voyageai.py +0 -0
  36. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/error.py +0 -0
  37. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/errors_v2.py +0 -0
  38. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/interfaces/__init__.py +0 -0
  39. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/interfaces/connector.py +0 -0
  40. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/interfaces/downloader.py +0 -0
  41. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/interfaces/indexer.py +0 -0
  42. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/interfaces/process.py +0 -0
  43. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/interfaces/processor.py +0 -0
  44. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/interfaces/upload_stager.py +0 -0
  45. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/interfaces/uploader.py +0 -0
  46. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/logger.py +0 -0
  47. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/main.py +0 -0
  48. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/otel.py +0 -0
  49. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/__init__.py +0 -0
  50. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/interfaces.py +0 -0
  51. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/otel.py +0 -0
  52. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/pipeline.py +0 -0
  53. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/steps/__init__.py +0 -0
  54. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
  55. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/steps/download.py +0 -0
  56. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/steps/embed.py +0 -0
  57. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/steps/filter.py +0 -0
  58. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/steps/index.py +0 -0
  59. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/steps/partition.py +0 -0
  60. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/steps/stage.py +0 -0
  61. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
  62. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/pipeline/steps/upload.py +0 -0
  63. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/__init__.py +0 -0
  64. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/chunker.py +0 -0
  65. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connector_registry.py +0 -0
  66. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/__init__.py +0 -0
  67. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/airtable.py +0 -0
  68. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
  69. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +0 -0
  70. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
  71. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/astradb.py +0 -0
  72. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/azure_ai_search.py +0 -0
  73. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/chroma.py +0 -0
  74. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
  75. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
  76. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/databricks/volumes.py +0 -0
  77. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +0 -0
  78. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +0 -0
  79. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +0 -0
  80. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
  81. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +0 -0
  82. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/delta_table.py +0 -0
  83. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/discord.py +0 -0
  84. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
  85. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/duckdb/base.py +0 -0
  86. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +0 -0
  87. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +0 -0
  88. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
  89. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +0 -0
  90. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
  91. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
  92. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
  93. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
  94. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
  95. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +0 -0
  96. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
  97. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/fsspec/s3.py +0 -0
  98. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
  99. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
  100. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/github.py +0 -0
  101. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/gitlab.py +0 -0
  102. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
  103. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +0 -0
  104. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/jira.py +0 -0
  105. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
  106. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
  107. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
  108. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
  109. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/kdbai.py +0 -0
  110. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
  111. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
  112. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
  113. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
  114. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
  115. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
  116. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
  117. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/local.py +0 -0
  118. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/milvus.py +0 -0
  119. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/mongodb.py +0 -0
  120. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/neo4j.py +0 -0
  121. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
  122. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
  123. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/connector.py +0 -0
  124. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
  125. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
  126. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
  127. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
  128. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
  129. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
  130. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
  131. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
  132. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
  133. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
  134. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
  135. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
  136. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
  137. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
  138. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
  139. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
  140. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
  141. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
  142. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
  143. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
  144. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
  145. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
  146. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
  147. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
  148. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
  149. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
  150. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
  151. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
  152. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
  153. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
  154. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
  155. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
  156. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
  157. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
  158. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
  159. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
  160. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
  161. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
  162. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +0 -0
  163. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +0 -0
  164. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +0 -0
  165. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +0 -0
  166. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +0 -0
  167. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +0 -0
  168. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +0 -0
  169. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +0 -0
  170. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -0
  171. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +0 -0
  172. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +0 -0
  173. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +0 -0
  174. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +0 -0
  175. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +0 -0
  176. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +0 -0
  177. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +0 -0
  178. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +0 -0
  179. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +0 -0
  180. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +0 -0
  181. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +0 -0
  182. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +0 -0
  183. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +0 -0
  184. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +0 -0
  185. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
  186. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
  187. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
  188. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
  189. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
  190. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
  191. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/onedrive.py +0 -0
  192. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/outlook.py +0 -0
  193. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/pinecone.py +0 -0
  194. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
  195. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
  196. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
  197. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
  198. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
  199. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/redisdb.py +0 -0
  200. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/salesforce.py +0 -0
  201. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/sharepoint.py +0 -0
  202. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/slack.py +0 -0
  203. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
  204. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +0 -0
  205. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/sql/postgres.py +0 -0
  206. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/sql/singlestore.py +0 -0
  207. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/sql/snowflake.py +0 -0
  208. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/sql/sql.py +0 -0
  209. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/sql/sqlite.py +0 -0
  210. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/sql/vastdb.py +0 -0
  211. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/utils.py +0 -0
  212. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/vectara.py +0 -0
  213. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
  214. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
  215. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
  216. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
  217. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +0 -0
  218. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
  219. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
  220. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -0
  221. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/embedder.py +0 -0
  222. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/filter.py +0 -0
  223. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/partitioner.py +0 -0
  224. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/uncompress.py +0 -0
  225. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/utils/__init__.py +0 -0
  226. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/processes/utils/blob_storage.py +0 -0
  227. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/unstructured_api.py +0 -0
  228. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/utils/__init__.py +0 -0
  229. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/utils/chunking.py +0 -0
  230. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/utils/compression.py +0 -0
  231. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/utils/constants.py +0 -0
  232. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/utils/data_prep.py +0 -0
  233. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/utils/dep_check.py +0 -0
  234. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/utils/html.py +0 -0
  235. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/utils/ndjson.py +0 -0
  236. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/utils/pydantic_models.py +0 -0
  237. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
  238. {unstructured_ingest-1.0.4 → unstructured_ingest-1.0.6.dev0}/unstructured_ingest/utils/table.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.4
3
+ Version: 1.0.6.dev0
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -0,0 +1 @@
1
+ __version__ = "1.0.6-dev0" # pragma: no cover
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  from pathlib import Path
3
- from typing import Any, Optional, Union
3
+ from typing import Any, Optional
4
4
  from uuid import NAMESPACE_DNS, uuid5
5
5
 
6
6
  from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
@@ -29,7 +29,7 @@ class FileDataSourceMetadata(BaseModel):
29
29
  date_created: Optional[str] = None
30
30
  date_modified: Optional[str] = None
31
31
  date_processed: Optional[str] = None
32
- permissions_data: Union[list[dict[str, Any]], dict[str, Any], None] = None
32
+ permissions_data: Optional[list[dict[str, Any]]] = None
33
33
  filesize_bytes: Optional[int] = None
34
34
 
35
35
 
@@ -352,6 +352,7 @@ class ConfluenceDownloader(Downloader):
352
352
  def _get_permissions_for_space(self, space_id: int) -> Optional[List[dict]]:
353
353
  if space_id in self._permissions_cache:
354
354
  self._permissions_cache.move_to_end(space_id) # mark recent use
355
+ logger.debug(f"Retrieved cached permissions for space {space_id}")
355
356
  return self._permissions_cache[space_id]
356
357
  else:
357
358
  with self.connection_config.get_client() as client:
@@ -371,22 +372,27 @@ class ConfluenceDownloader(Downloader):
371
372
  self._permissions_cache.popitem(last=False) # LRU/FIFO eviction
372
373
  self._permissions_cache[space_id] = space_permissions
373
374
 
375
+ logger.debug(f"Retrieved permissions for space {space_id}")
374
376
  return space_permissions
375
377
  except Exception as e:
376
378
  logger.debug(f"Could not retrieve permissions for space {space_id}: {e}")
377
379
  return None
378
380
 
379
- def _parse_permissions_for_doc(self, doc_id: str, space_permissions: list) -> Optional[dict]:
381
+ def _parse_permissions_for_doc(
382
+ self, doc_id: str, space_permissions: list
383
+ ) -> Optional[list[dict]]:
380
384
  with self.connection_config.get_client() as client:
381
385
  try:
382
386
  doc_permissions = client.get_all_restrictions_for_content(content_id=doc_id)
383
387
  parsed_permissions_dict = self.parse_permissions(doc_permissions, space_permissions)
388
+ parsed_permissions_dict = [{k: v} for k, v in parsed_permissions_dict.items()]
384
389
 
385
390
  except Exception as e:
386
391
  # skip writing any permission metadata
387
392
  logger.debug(f"Could not retrieve permissions for doc {doc_id}: {e}")
388
393
  return None
389
394
 
395
+ logger.debug(f"normalized permissions generated: {parsed_permissions_dict}")
390
396
  return parsed_permissions_dict
391
397
 
392
398
  def run(self, file_data: FileData, **kwargs) -> download_responses:
@@ -54,7 +54,8 @@ class GoogleDriveAccessConfig(AccessConfig):
54
54
  default=None, description="Credentials values to use for authentication"
55
55
  )
56
56
  service_account_key_path: Optional[Path] = Field(
57
- default=None, description="File path to credentials values to use for authentication"
57
+ default=None,
58
+ description="File path to credentials values to use for authentication",
58
59
  )
59
60
 
60
61
  def model_post_init(self, __context: Any) -> None:
@@ -111,10 +112,9 @@ class GoogleDriveIndexerConfig(IndexerConfig):
111
112
  extensions: Optional[list[str]] = None
112
113
  recursive: bool = False
113
114
 
114
- def __post_init__(self):
115
- # Strip leading period of extension
115
+ def model_post_init(self, __context: Any) -> None:
116
116
  if self.extensions is not None:
117
- self.extensions = [e[1:] if e.startswith(".") else e for e in self.extensions]
117
+ self.extensions = [e.lstrip(".") for e in self.extensions]
118
118
 
119
119
 
120
120
  @dataclass
@@ -275,7 +275,8 @@ class GoogleDriveIndexer(Indexer):
275
275
 
276
276
  except Exception as e:
277
277
  logger.error(
278
- "Failed to validate Google Drive connection during precheck", exc_info=True
278
+ "Failed to validate Google Drive connection during precheck",
279
+ exc_info=True,
279
280
  )
280
281
  raise SourceConnectionError(f"Precheck failed: {e}")
281
282
 
@@ -284,17 +285,17 @@ class GoogleDriveIndexer(Indexer):
284
285
  return record.get("mimeType") == "application/vnd.google-apps.folder"
285
286
 
286
287
  @staticmethod
287
- def map_file_data(f: dict) -> FileData:
288
- file_id = f["id"]
289
- filename = f.pop("name")
290
- url = f.pop("webContentLink", None)
291
- version = f.pop("version", None)
292
- permissions = f.pop("permissions", None)
293
- date_created_str = f.pop("createdTime", None)
288
+ def map_file_data(root_info: dict) -> FileData:
289
+ file_id = root_info["id"]
290
+ filename = root_info.pop("name")
291
+ url = root_info.pop("webContentLink", None)
292
+ version = root_info.pop("version", None)
293
+ permissions = root_info.pop("permissions", None)
294
+ date_created_str = root_info.pop("createdTime", None)
294
295
  date_created_dt = parser.parse(date_created_str) if date_created_str else None
295
- date_modified_str = f.pop("modifiedTime", None)
296
- parent_path = f.pop("parent_path", None)
297
- parent_root_path = f.pop("parent_root_path", None)
296
+ date_modified_str = root_info.pop("modifiedTime", None)
297
+ parent_path = root_info.pop("parent_path", None)
298
+ parent_root_path = root_info.pop("parent_root_path", None)
298
299
  date_modified_dt = parser.parse(date_modified_str) if date_modified_str else None
299
300
  if (
300
301
  parent_path
@@ -323,7 +324,7 @@ class GoogleDriveIndexer(Indexer):
323
324
  "file_id": file_id,
324
325
  },
325
326
  ),
326
- additional_metadata=f,
327
+ additional_metadata=root_info,
327
328
  )
328
329
 
329
330
  def get_paginated_results(
@@ -404,13 +405,14 @@ class GoogleDriveIndexer(Indexer):
404
405
  data = []
405
406
  for f in file_contents:
406
407
  f["permissions"] = self.extract_permissions(f.get("permissions"))
407
- data.append(self.map_file_data(f=f))
408
+ data.append(self.map_file_data(root_info=f))
408
409
  for d in data:
409
410
  d.metadata.record_locator["drive_id"]: object_id
410
411
  return data
411
412
 
412
- def extract_permissions(self, permissions: list[dict]) -> dict:
413
+ def extract_permissions(self, permissions: Optional[list[dict]]) -> list[dict]:
413
414
  if not permissions:
415
+ logger.debug("no permissions found")
414
416
  return {}
415
417
 
416
418
  # https://developers.google.com/workspace/drive/api/guides/ref-roles
@@ -442,7 +444,8 @@ class GoogleDriveIndexer(Indexer):
442
444
  for key in role_dict:
443
445
  role_dict[key] = sorted(role_dict[key])
444
446
 
445
- return normalized_permissions
447
+ logger.debug(f"normalized permissions generated: {normalized_permissions}")
448
+ return [{k: v} for k, v in normalized_permissions.items()]
446
449
 
447
450
  def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
448
451
  with self.connection_config.get_client() as client:
@@ -1 +0,0 @@
1
- __version__ = "1.0.4" # pragma: no cover