unstructured-ingest 1.0.7__tar.gz → 1.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (239) hide show
  1. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/PKG-INFO +1 -1
  2. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/pyproject.toml +1 -0
  3. unstructured_ingest-1.0.11/unstructured_ingest/__version__.py +1 -0
  4. unstructured_ingest-1.0.11/unstructured_ingest/data_types/entities.py +17 -0
  5. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/google_drive.py +16 -2
  6. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +1 -1
  7. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/neo4j.py +53 -22
  8. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/pinecone.py +26 -0
  9. unstructured_ingest-1.0.7/unstructured_ingest/__version__.py +0 -1
  10. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/.gitignore +0 -0
  11. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/LICENSE.md +0 -0
  12. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/README.md +0 -0
  13. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/__init__.py +0 -0
  14. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/README.md +0 -0
  15. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/__init__.py +0 -0
  16. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/__init__.py +0 -0
  17. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/cmd.py +0 -0
  18. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/dest.py +0 -0
  19. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/importer.py +0 -0
  20. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/src.py +0 -0
  21. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/cli.py +0 -0
  22. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/cmds.py +0 -0
  23. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/utils/__init__.py +0 -0
  24. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/utils/click.py +0 -0
  25. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
  26. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/data_types/__init__.py +0 -0
  27. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/data_types/file_data.py +0 -0
  28. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/__init__.py +0 -0
  29. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/azure_openai.py +0 -0
  30. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/bedrock.py +0 -0
  31. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/huggingface.py +0 -0
  32. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/interfaces.py +0 -0
  33. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/mixedbreadai.py +0 -0
  34. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/octoai.py +0 -0
  35. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/openai.py +0 -0
  36. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/togetherai.py +0 -0
  37. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/vertexai.py +0 -0
  38. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/voyageai.py +0 -0
  39. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/error.py +0 -0
  40. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/errors_v2.py +0 -0
  41. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/__init__.py +0 -0
  42. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/connector.py +0 -0
  43. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/downloader.py +0 -0
  44. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/indexer.py +0 -0
  45. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/process.py +0 -0
  46. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/processor.py +0 -0
  47. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/upload_stager.py +0 -0
  48. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/uploader.py +0 -0
  49. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/logger.py +0 -0
  50. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/main.py +0 -0
  51. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/otel.py +0 -0
  52. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/__init__.py +0 -0
  53. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/interfaces.py +0 -0
  54. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/otel.py +0 -0
  55. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/pipeline.py +0 -0
  56. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/__init__.py +0 -0
  57. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
  58. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/download.py +0 -0
  59. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/embed.py +0 -0
  60. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/filter.py +0 -0
  61. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/index.py +0 -0
  62. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/partition.py +0 -0
  63. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/stage.py +0 -0
  64. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
  65. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/upload.py +0 -0
  66. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/__init__.py +0 -0
  67. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/chunker.py +0 -0
  68. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connector_registry.py +0 -0
  69. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/__init__.py +0 -0
  70. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/airtable.py +0 -0
  71. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
  72. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +0 -0
  73. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
  74. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/astradb.py +0 -0
  75. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/azure_ai_search.py +0 -0
  76. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/chroma.py +0 -0
  77. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/confluence.py +0 -0
  78. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
  79. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
  80. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/volumes.py +0 -0
  81. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +0 -0
  82. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +0 -0
  83. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +0 -0
  84. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
  85. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +0 -0
  86. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/delta_table.py +0 -0
  87. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/discord.py +0 -0
  88. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
  89. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/duckdb/base.py +0 -0
  90. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +0 -0
  91. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +0 -0
  92. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
  93. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +0 -0
  94. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
  95. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
  96. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
  97. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
  98. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
  99. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +0 -0
  100. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
  101. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/s3.py +0 -0
  102. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
  103. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
  104. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/github.py +0 -0
  105. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/gitlab.py +0 -0
  106. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
  107. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/jira.py +0 -0
  108. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
  109. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
  110. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
  111. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
  112. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/kdbai.py +0 -0
  113. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
  114. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
  115. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
  116. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
  117. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
  118. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
  119. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
  120. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/local.py +0 -0
  121. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/milvus.py +0 -0
  122. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/mongodb.py +0 -0
  123. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
  124. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
  125. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/connector.py +0 -0
  126. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
  127. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
  128. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
  129. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
  130. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
  131. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
  132. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
  133. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
  134. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
  135. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
  136. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
  137. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
  138. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
  139. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
  140. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
  141. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
  142. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
  143. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
  144. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
  145. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
  146. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
  147. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
  148. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
  149. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
  150. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
  151. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
  152. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
  153. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
  154. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
  155. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
  156. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
  157. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
  158. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
  159. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
  160. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
  161. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
  162. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
  163. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
  164. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +0 -0
  165. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +0 -0
  166. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +0 -0
  167. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +0 -0
  168. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +0 -0
  169. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +0 -0
  170. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +0 -0
  171. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +0 -0
  172. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -0
  173. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +0 -0
  174. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +0 -0
  175. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +0 -0
  176. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +0 -0
  177. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +0 -0
  178. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +0 -0
  179. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +0 -0
  180. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +0 -0
  181. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +0 -0
  182. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +0 -0
  183. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +0 -0
  184. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +0 -0
  185. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +0 -0
  186. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +0 -0
  187. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
  188. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
  189. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
  190. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
  191. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
  192. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
  193. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/onedrive.py +0 -0
  194. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/outlook.py +0 -0
  195. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
  196. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
  197. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
  198. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
  199. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
  200. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/redisdb.py +0 -0
  201. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/salesforce.py +0 -0
  202. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sharepoint.py +0 -0
  203. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/slack.py +0 -0
  204. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
  205. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +0 -0
  206. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/postgres.py +0 -0
  207. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/singlestore.py +0 -0
  208. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/snowflake.py +0 -0
  209. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/sql.py +0 -0
  210. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/sqlite.py +0 -0
  211. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/vastdb.py +0 -0
  212. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/utils.py +0 -0
  213. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/vectara.py +0 -0
  214. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
  215. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
  216. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
  217. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
  218. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +0 -0
  219. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
  220. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
  221. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -0
  222. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/embedder.py +0 -0
  223. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/filter.py +0 -0
  224. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/partitioner.py +0 -0
  225. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/uncompress.py +0 -0
  226. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/utils/__init__.py +0 -0
  227. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/utils/blob_storage.py +0 -0
  228. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/unstructured_api.py +0 -0
  229. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/__init__.py +0 -0
  230. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/chunking.py +0 -0
  231. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/compression.py +0 -0
  232. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/constants.py +0 -0
  233. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/data_prep.py +0 -0
  234. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/dep_check.py +0 -0
  235. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/html.py +0 -0
  236. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/ndjson.py +0 -0
  237. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/pydantic_models.py +0 -0
  238. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
  239. {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/table.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.7
3
+ Version: 1.0.11
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -143,6 +143,7 @@ test = [
143
143
  "vertexai",
144
144
  "pyiceberg",
145
145
  "pyarrow",
146
+ "networkx"
146
147
  ]
147
148
  # Add constraints needed for CI
148
149
  ci = [
@@ -0,0 +1 @@
1
+ __version__ = "1.0.11" # pragma: no cover
@@ -0,0 +1,17 @@
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class Entity(BaseModel):
5
+ type: str
6
+ entity: str
7
+
8
+
9
+ class EntityRelationship(BaseModel):
10
+ to: str
11
+ from_: str = Field(..., alias="from")
12
+ relationship: str
13
+
14
+
15
+ class EntitiesData(BaseModel):
16
+ items: list[Entity] = Field(default_factory=list)
17
+ relationships: list[EntityRelationship] = Field(default_factory=list)
@@ -153,7 +153,13 @@ class GoogleDriveIndexer(Indexer):
153
153
  """
154
154
  try:
155
155
  # A very minimal call: list 1 file from the drive.
156
- client.list(spaces="drive", pageSize=1, fields="files(id)").execute()
156
+ client.list(
157
+ supportsAllDrives=True,
158
+ includeItemsFromAllDrives=True,
159
+ spaces="drive",
160
+ pageSize=1,
161
+ fields="files(id)",
162
+ ).execute()
157
163
  except HttpError as e:
158
164
  error_content = e.content.decode() if hasattr(e, "content") else ""
159
165
  lower_error = error_content.lower()
@@ -183,6 +189,8 @@ class GoogleDriveIndexer(Indexer):
183
189
  page_token = None
184
190
  while True:
185
191
  response = files_client.list(
192
+ supportsAllDrives=True,
193
+ includeItemsFromAllDrives=True,
186
194
  spaces="drive",
187
195
  q=query,
188
196
  fields="nextPageToken, files(id, mimeType, fileExtension)",
@@ -251,6 +259,8 @@ class GoogleDriveIndexer(Indexer):
251
259
  else:
252
260
  # Non-recursive: check for at least one immediate non-folder child.
253
261
  response = client.list(
262
+ supportsAllDrives=True,
263
+ includeItemsFromAllDrives=True,
254
264
  spaces="drive",
255
265
  fields="files(id)",
256
266
  pageSize=1,
@@ -348,6 +358,8 @@ class GoogleDriveIndexer(Indexer):
348
358
  files_response = []
349
359
  while not done:
350
360
  response: dict = files_client.list(
361
+ supportsAllDrives=True,
362
+ includeItemsFromAllDrives=True,
351
363
  spaces="drive",
352
364
  fields=fields_input,
353
365
  corpora="user",
@@ -381,7 +393,9 @@ class GoogleDriveIndexer(Indexer):
381
393
  return files_response
382
394
 
383
395
  def get_root_info(self, files_client, object_id: str) -> dict:
384
- return files_client.get(fileId=object_id, fields=",".join(self.fields)).execute()
396
+ return files_client.get(
397
+ supportsAllDrives=True, fileId=object_id, fields=",".join(self.fields)
398
+ ).execute()
385
399
 
386
400
  def get_files(
387
401
  self,
@@ -172,7 +172,7 @@ class IbmWatsonxUploaderConfig(UploaderConfig):
172
172
  namespace: str = Field(description="Namespace name")
173
173
  table: str = Field(description="Table name")
174
174
  max_retries: int = Field(
175
- default=5, description="Maximum number of retries to upload data", ge=2, le=10
175
+ default=5, description="Maximum number of retries to upload data", ge=2, le=500
176
176
  )
177
177
  record_id_key: str = Field(
178
178
  default=RECORD_ID_LABEL,
@@ -9,8 +9,9 @@ from enum import Enum
9
9
  from pathlib import Path
10
10
  from typing import TYPE_CHECKING, Any, AsyncGenerator, Literal, Optional
11
11
 
12
- from pydantic import BaseModel, ConfigDict, Field, Secret, field_validator
12
+ from pydantic import BaseModel, ConfigDict, Field, Secret, ValidationError, field_validator
13
13
 
14
+ from unstructured_ingest.data_types.entities import EntitiesData, Entity, EntityRelationship
14
15
  from unstructured_ingest.data_types.file_data import FileData
15
16
  from unstructured_ingest.error import DestinationConnectionError
16
17
  from unstructured_ingest.interfaces import (
@@ -97,7 +98,6 @@ class Neo4jUploadStager(UploadStager):
97
98
  **kwargs: Any,
98
99
  ) -> Path:
99
100
  elements = get_json_data(elements_filepath)
100
-
101
101
  nx_graph = self._create_lexical_graph(
102
102
  elements, self._create_document_node(file_data=file_data)
103
103
  )
@@ -109,28 +109,54 @@ class Neo4jUploadStager(UploadStager):
109
109
 
110
110
  return output_filepath
111
111
 
112
- def _add_entities(self, element: dict, graph: "Graph", element_node: _Node) -> None:
113
- entities = element.get("metadata", {}).get("entities", [])
114
- if not entities:
115
- return None
116
- if not isinstance(entities, list):
117
- return None
118
-
112
+ def _add_entities(self, entities: list[Entity], graph: "Graph", element_node: _Node) -> None:
119
113
  for entity in entities:
120
- if not isinstance(entity, dict):
121
- continue
122
- if "entity" not in entity or "type" not in entity:
123
- continue
124
114
  entity_node = _Node(
125
- labels=[Label.ENTITY], properties={"id": entity["entity"]}, id_=entity["entity"]
115
+ labels=[Label.ENTITY], properties={"id": entity.entity}, id_=entity.entity
126
116
  )
127
117
  graph.add_edge(
128
118
  entity_node,
129
- _Node(labels=[Label.ENTITY], properties={"id": entity["type"]}, id_=entity["type"]),
119
+ _Node(labels=[Label.ENTITY], properties={"id": entity.type}, id_=entity.type),
130
120
  relationship=Relationship.ENTITY_TYPE,
131
121
  )
132
122
  graph.add_edge(element_node, entity_node, relationship=Relationship.HAS_ENTITY)
133
123
 
124
+ def _add_entity_relationships(
125
+ self, relationships: list[EntityRelationship], graph: "Graph"
126
+ ) -> None:
127
+ for relationship in relationships:
128
+ from_node = _Node(
129
+ labels=[Label.ENTITY],
130
+ properties={"id": relationship.from_},
131
+ id_=relationship.from_,
132
+ )
133
+ to_node = _Node(
134
+ labels=[Label.ENTITY], properties={"id": relationship.to}, id_=relationship.to
135
+ )
136
+ graph.add_edge(from_node, to_node, relationship=relationship.relationship)
137
+
138
+ def _add_entity_data(self, element: dict, graph: "Graph", element_node: _Node) -> None:
139
+ entities = element.get("metadata", {}).get("entities", {})
140
+ if not entities:
141
+ return None
142
+ try:
143
+ if isinstance(entities, list):
144
+ self._add_entities(
145
+ [Entity.model_validate(e) for e in entities if isinstance(e, dict)],
146
+ graph,
147
+ element_node,
148
+ )
149
+ elif isinstance(entities, dict):
150
+ entity_data = EntitiesData.model_validate(entities)
151
+ self._add_entities(entity_data.items, graph, element_node)
152
+ self._add_entity_relationships(entity_data.relationships, graph)
153
+ except ValidationError:
154
+ logger.warning(
155
+ "Failed to add entities to the graph. "
156
+ "Please check the format of the entities in the input data."
157
+ )
158
+ return None
159
+
134
160
  def _create_lexical_graph(self, elements: list[dict], document_node: _Node) -> "Graph":
135
161
  import networkx as nx
136
162
 
@@ -149,7 +175,7 @@ class Neo4jUploadStager(UploadStager):
149
175
  previous_node = element_node
150
176
  graph.add_edge(element_node, document_node, relationship=Relationship.PART_OF_DOCUMENT)
151
177
 
152
- self._add_entities(element, graph, element_node)
178
+ self._add_entity_data(element, graph, element_node)
153
179
 
154
180
  if self._is_chunk(element):
155
181
  for origin_element in format_and_truncate_orig_elements(element, include_text=True):
@@ -165,7 +191,7 @@ class Neo4jUploadStager(UploadStager):
165
191
  document_node,
166
192
  relationship=Relationship.PART_OF_DOCUMENT,
167
193
  )
168
- self._add_entities(origin_element, graph, origin_element_node)
194
+ self._add_entity_data(origin_element, graph, origin_element_node)
169
195
 
170
196
  return graph
171
197
 
@@ -208,7 +234,9 @@ class _GraphData(BaseModel):
208
234
  _Edge(
209
235
  source=u,
210
236
  destination=v,
211
- relationship=Relationship(data_dict["relationship"]),
237
+ relationship=Relationship(data_dict["relationship"])
238
+ if data_dict["relationship"] in Relationship
239
+ else data_dict["relationship"],
212
240
  )
213
241
  for u, v, data_dict in nx_graph.edges(data=True)
214
242
  ]
@@ -242,7 +270,7 @@ class _Edge(BaseModel):
242
270
 
243
271
  source: _Node
244
272
  destination: _Node
245
- relationship: Relationship
273
+ relationship: Relationship | str
246
274
 
247
275
 
248
276
  class Label(Enum):
@@ -380,7 +408,7 @@ class Neo4jUploader(Uploader):
380
408
  )
381
409
  logger.info(f"Finished merging {len(graph_data.nodes)} graph nodes.")
382
410
 
383
- edges_by_relationship: defaultdict[tuple[Relationship, Label, Label], list[_Edge]] = (
411
+ edges_by_relationship: defaultdict[tuple[Relationship | str, Label, Label], list[_Edge]] = (
384
412
  defaultdict(list)
385
413
  )
386
414
  for edge in graph_data.edges:
@@ -463,16 +491,19 @@ class Neo4jUploader(Uploader):
463
491
  @staticmethod
464
492
  def _create_edges_query(
465
493
  edges: list[_Edge],
466
- relationship: Relationship,
494
+ relationship: Relationship | str,
467
495
  source_label: Label,
468
496
  destination_label: Label,
469
497
  ) -> tuple[str, dict]:
470
498
  logger.info(f"Preparing MERGE query for {len(edges)} {relationship} relationships.")
499
+ relationship = (
500
+ relationship.value if isinstance(relationship, Relationship) else relationship
501
+ )
471
502
  query_string = f"""
472
503
  UNWIND $edges AS edge
473
504
  MATCH (u: `{source_label.value}` {{id: edge.source}})
474
505
  MATCH (v: `{destination_label.value}` {{id: edge.destination}})
475
- MERGE (u)-[:`{relationship.value}`]->(v)
506
+ MERGE (u)-[:`{relationship}`]->(v)
476
507
  """
477
508
  parameters = {
478
509
  "edges": [
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import re
3
3
  from dataclasses import dataclass, field
4
+ from pathlib import Path
4
5
  from typing import TYPE_CHECKING, Any, Literal, Optional
5
6
 
6
7
  from pydantic import Field, Secret
@@ -18,11 +19,14 @@ from unstructured_ingest.interfaces import (
18
19
  )
19
20
  from unstructured_ingest.logger import logger
20
21
  from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
22
+ from unstructured_ingest.utils import ndjson
21
23
  from unstructured_ingest.utils.constants import RECORD_ID_LABEL
22
24
  from unstructured_ingest.utils.data_prep import (
23
25
  flatten_dict,
24
26
  generator_batching_wbytes,
25
27
  get_enhanced_element_id,
28
+ get_json_data,
29
+ write_data,
26
30
  )
27
31
  from unstructured_ingest.utils.dep_check import requires_dependencies
28
32
 
@@ -162,6 +166,28 @@ class PineconeUploadStager(UploadStager):
162
166
  "metadata": metadata,
163
167
  }
164
168
 
169
+ def stream_update(self, input_file: Path, output_file: Path, file_data: FileData) -> None:
170
+ with input_file.open() as in_f:
171
+ reader = ndjson.reader(in_f)
172
+ with output_file.open("w") as out_f:
173
+ writer = ndjson.writer(out_f)
174
+ for element in reader:
175
+ if "embeddings" not in element:
176
+ continue
177
+ conformed_element = self.conform_dict(element_dict=element, file_data=file_data)
178
+ writer.write(row=conformed_element)
179
+ writer.f.flush()
180
+
181
+ def process_whole(self, input_file: Path, output_file: Path, file_data: FileData) -> None:
182
+ elements_contents = get_json_data(path=input_file)
183
+
184
+ conformed_elements = [
185
+ self.conform_dict(element_dict=element, file_data=file_data)
186
+ for element in elements_contents
187
+ if "embeddings" in element
188
+ ]
189
+ write_data(path=output_file, data=conformed_elements)
190
+
165
191
 
166
192
  @dataclass
167
193
  class PineconeUploader(VectorDBUploader):
@@ -1 +0,0 @@
1
- __version__ = "1.0.7" # pragma: no cover