unstructured-ingest 0.5.21__tar.gz → 1.2.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (623) hide show
  1. unstructured_ingest-1.2.35/.gitignore +213 -0
  2. unstructured_ingest-1.2.35/PKG-INFO +235 -0
  3. unstructured_ingest-1.2.35/pyproject.toml +215 -0
  4. unstructured_ingest-1.2.35/unstructured_ingest/__version__.py +1 -0
  5. unstructured_ingest-1.2.35/unstructured_ingest/cli/README.md +28 -0
  6. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/cli/base/cmd.py +13 -13
  7. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/cli/base/dest.py +5 -6
  8. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/cli/base/src.py +7 -17
  9. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/cli/cli.py +1 -1
  10. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/cli/cmds.py +2 -2
  11. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/cli/utils/model_conversion.py +6 -6
  12. unstructured_ingest-1.2.35/unstructured_ingest/data_types/entities.py +17 -0
  13. {unstructured_ingest-0.5.21/unstructured_ingest/v2/interfaces → unstructured_ingest-1.2.35/unstructured_ingest/data_types}/file_data.py +1 -1
  14. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/embed/azure_openai.py +11 -4
  15. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/embed/bedrock.py +150 -37
  16. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/embed/huggingface.py +11 -4
  17. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/embed/interfaces.py +11 -8
  18. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/embed/mixedbreadai.py +30 -45
  19. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/embed/octoai.py +29 -8
  20. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/embed/openai.py +53 -10
  21. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/embed/togetherai.py +40 -8
  22. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/embed/vertexai.py +4 -4
  23. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/embed/voyageai.py +12 -9
  24. unstructured_ingest-1.2.35/unstructured_ingest/error.py +156 -0
  25. unstructured_ingest-1.2.35/unstructured_ingest/errors_v2.py +156 -0
  26. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/interfaces/__init__.py +0 -6
  27. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/interfaces/connector.py +7 -1
  28. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/interfaces/downloader.py +5 -3
  29. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/interfaces/indexer.py +3 -3
  30. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/interfaces/upload_stager.py +4 -4
  31. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/interfaces/uploader.py +5 -6
  32. unstructured_ingest-1.2.35/unstructured_ingest/logger.py +39 -0
  33. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/otel.py +19 -2
  34. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/pipeline/interfaces.py +5 -5
  35. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/pipeline/otel.py +2 -2
  36. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/pipeline/pipeline.py +20 -20
  37. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/pipeline/steps/chunk.py +5 -6
  38. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/pipeline/steps/download.py +5 -6
  39. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/pipeline/steps/embed.py +5 -6
  40. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/pipeline/steps/filter.py +4 -4
  41. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/pipeline/steps/index.py +5 -5
  42. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/pipeline/steps/partition.py +5 -6
  43. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/pipeline/steps/stage.py +5 -5
  44. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/pipeline/steps/uncompress.py +4 -4
  45. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/pipeline/steps/upload.py +5 -5
  46. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/chunker.py +12 -5
  47. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connector_registry.py +1 -1
  48. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/__init__.py +14 -11
  49. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/airtable.py +10 -7
  50. unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +9 -0
  51. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/astradb.py +160 -52
  52. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/azure_ai_search.py +10 -11
  53. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/chroma.py +12 -9
  54. unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/confluence.py +527 -0
  55. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/couchbase.py +12 -10
  56. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/databricks/__init__.py +1 -1
  57. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/databricks/volumes.py +41 -19
  58. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/databricks/volumes_aws.py +4 -4
  59. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/databricks/volumes_azure.py +5 -5
  60. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/databricks/volumes_gcp.py +4 -4
  61. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/databricks/volumes_native.py +3 -3
  62. unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/databricks/volumes_table.py +187 -0
  63. unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/delta_table.py +310 -0
  64. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/discord.py +13 -10
  65. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/duckdb/__init__.py +1 -1
  66. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/duckdb/base.py +4 -4
  67. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/duckdb/duckdb.py +11 -11
  68. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/duckdb/motherduck.py +10 -10
  69. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/elasticsearch/__init__.py +1 -1
  70. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/elasticsearch/elasticsearch.py +49 -41
  71. unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +523 -0
  72. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/fsspec/__init__.py +1 -1
  73. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/fsspec/azure.py +8 -8
  74. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/fsspec/box.py +8 -8
  75. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/fsspec/dropbox.py +9 -8
  76. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/fsspec/fsspec.py +140 -29
  77. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/fsspec/gcs.py +9 -9
  78. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/fsspec/s3.py +79 -17
  79. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/fsspec/sftp.py +77 -7
  80. unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/github.py +226 -0
  81. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/gitlab.py +18 -16
  82. unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/google_drive.py +848 -0
  83. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/ibm_watsonx/__init__.py +1 -1
  84. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +94 -28
  85. unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/jira.py +522 -0
  86. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/kafka/__init__.py +1 -1
  87. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/kafka/cloud.py +3 -3
  88. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/kafka/kafka.py +14 -12
  89. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/kafka/local.py +2 -2
  90. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/kdbai.py +12 -7
  91. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/lancedb/__init__.py +1 -1
  92. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/lancedb/aws.py +3 -3
  93. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/lancedb/azure.py +3 -3
  94. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/lancedb/cloud.py +3 -3
  95. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/lancedb/gcp.py +3 -3
  96. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/lancedb/lancedb.py +9 -5
  97. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/lancedb/local.py +3 -3
  98. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/local.py +22 -19
  99. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/milvus.py +101 -15
  100. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/mongodb.py +41 -13
  101. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/neo4j.py +67 -32
  102. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/client.py +20 -20
  103. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/connector.py +18 -14
  104. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/helpers.py +4 -4
  105. {unstructured_ingest-0.5.21/unstructured_ingest → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/notion}/ingest_backoff/_wrapper.py +5 -1
  106. unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
  107. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/block.py +4 -4
  108. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/bookmark.py +2 -2
  109. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/breadcrumb.py +1 -1
  110. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/bulleted_list_item.py +2 -2
  111. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/callout.py +42 -5
  112. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/child_database.py +1 -1
  113. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/child_page.py +1 -1
  114. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/code.py +2 -2
  115. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/column_list.py +1 -1
  116. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/blocks/divider.py +1 -1
  117. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/embed.py +2 -2
  118. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/blocks/equation.py +1 -1
  119. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/blocks/file.py +4 -4
  120. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/blocks/heading.py +2 -2
  121. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/image.py +2 -2
  122. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/link_preview.py +1 -1
  123. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
  124. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/numbered_list.py +2 -2
  125. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/paragraph.py +2 -2
  126. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/blocks/pdf.py +3 -3
  127. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/quote.py +2 -2
  128. unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +109 -0
  129. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/table.py +2 -5
  130. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/table_of_contents.py +1 -1
  131. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/blocks/template.py +2 -2
  132. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/todo.py +2 -2
  133. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/blocks/toggle.py +2 -2
  134. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/blocks/unsupported.py +1 -1
  135. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/blocks/video.py +2 -2
  136. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/database.py +6 -6
  137. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/database_properties/__init__.py +23 -4
  138. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/database_properties/checkbox.py +2 -1
  139. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/database_properties/created_by.py +3 -2
  140. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/database_properties/created_time.py +2 -1
  141. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/database_properties/date.py +3 -2
  142. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/database_properties/email.py +2 -1
  143. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/database_properties/files.py +3 -2
  144. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/database_properties/formula.py +2 -1
  145. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/database_properties/last_edited_by.py +3 -3
  146. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/database_properties/last_edited_time.py +2 -1
  147. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/database_properties/multiselect.py +2 -1
  148. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/database_properties/number.py +2 -1
  149. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/database_properties/people.py +3 -2
  150. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/database_properties/phone_number.py +2 -1
  151. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/database_properties/relation.py +2 -1
  152. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/database_properties/rich_text.py +3 -2
  153. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/database_properties/rollup.py +2 -1
  154. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/database_properties/select.py +2 -1
  155. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/database_properties/status.py +2 -1
  156. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/database_properties/title.py +3 -2
  157. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/database_properties/unique_id.py +2 -1
  158. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/database_properties/url.py +2 -1
  159. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/database_properties/verification.py +4 -3
  160. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/date.py +1 -1
  161. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/file.py +1 -1
  162. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/page.py +13 -6
  163. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/parent.py +1 -1
  164. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/rich_text.py +3 -3
  165. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/notion/types/user.py +11 -7
  166. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/onedrive.py +76 -28
  167. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/outlook.py +14 -11
  168. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/pinecone.py +46 -16
  169. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/qdrant/__init__.py +1 -1
  170. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/qdrant/cloud.py +2 -2
  171. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/qdrant/local.py +2 -2
  172. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/qdrant/qdrant.py +9 -6
  173. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/qdrant/server.py +2 -2
  174. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/redisdb.py +80 -49
  175. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/salesforce.py +22 -18
  176. unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/sharepoint.py +282 -0
  177. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/slack.py +16 -15
  178. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/sql/__init__.py +5 -1
  179. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/sql/databricks_delta_tables.py +17 -9
  180. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/sql/postgres.py +11 -5
  181. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/sql/singlestore.py +15 -10
  182. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/sql/snowflake.py +59 -25
  183. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/sql/sql.py +46 -20
  184. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/sql/sqlite.py +10 -4
  185. unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/sql/teradata.py +253 -0
  186. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/sql/vastdb.py +15 -12
  187. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/vectara.py +8 -10
  188. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/weaviate/__init__.py +1 -1
  189. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/weaviate/cloud.py +4 -3
  190. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/weaviate/embedded.py +3 -3
  191. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/weaviate/local.py +3 -3
  192. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/weaviate/weaviate.py +40 -20
  193. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/zendesk/client.py +9 -3
  194. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/zendesk/zendesk.py +24 -17
  195. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/embedder.py +7 -3
  196. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/filter.py +4 -4
  197. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/partitioner.py +10 -6
  198. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/uncompress.py +3 -3
  199. unstructured_ingest-1.2.35/unstructured_ingest/processes/utils/__init__.py +8 -0
  200. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/utils/blob_storage.py +4 -3
  201. unstructured_ingest-1.2.35/unstructured_ingest/processes/utils/logging/connector.py +365 -0
  202. unstructured_ingest-1.2.35/unstructured_ingest/processes/utils/logging/sanitizer.py +117 -0
  203. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/unstructured_api.py +15 -10
  204. unstructured_ingest-1.2.35/unstructured_ingest/utils/__init__.py +5 -0
  205. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/utils/compression.py +3 -49
  206. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/utils/data_prep.py +17 -33
  207. unstructured_ingest-1.2.35/unstructured_ingest/utils/filesystem.py +27 -0
  208. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/utils/html.py +18 -3
  209. unstructured_ingest-0.5.21/unstructured_ingest/v2/utils.py → unstructured_ingest-1.2.35/unstructured_ingest/utils/pydantic_models.py +0 -9
  210. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/utils/string_and_date_utils.py +4 -4
  211. unstructured_ingest-1.2.35/unstructured_ingest/utils/tls.py +15 -0
  212. unstructured_ingest-0.5.21/MANIFEST.in +0 -2
  213. unstructured_ingest-0.5.21/PKG-INFO +0 -382
  214. unstructured_ingest-0.5.21/pyproject.toml +0 -79
  215. unstructured_ingest-0.5.21/setup.cfg +0 -19
  216. unstructured_ingest-0.5.21/setup.py +0 -217
  217. unstructured_ingest-0.5.21/test/integration/chunkers/test_chunkers.py +0 -31
  218. unstructured_ingest-0.5.21/test/integration/connectors/conftest.py +0 -38
  219. unstructured_ingest-0.5.21/test/integration/connectors/databricks/test_volumes_native.py +0 -273
  220. unstructured_ingest-0.5.21/test/integration/connectors/discord/test_discord.py +0 -90
  221. unstructured_ingest-0.5.21/test/integration/connectors/duckdb/conftest.py +0 -14
  222. unstructured_ingest-0.5.21/test/integration/connectors/duckdb/test_duckdb.py +0 -90
  223. unstructured_ingest-0.5.21/test/integration/connectors/duckdb/test_motherduck.py +0 -95
  224. unstructured_ingest-0.5.21/test/integration/connectors/elasticsearch/conftest.py +0 -34
  225. unstructured_ingest-0.5.21/test/integration/connectors/elasticsearch/test_elasticsearch.py +0 -331
  226. unstructured_ingest-0.5.21/test/integration/connectors/elasticsearch/test_opensearch.py +0 -326
  227. unstructured_ingest-0.5.21/test/integration/connectors/sql/test_databricks_delta_tables.py +0 -170
  228. unstructured_ingest-0.5.21/test/integration/connectors/sql/test_postgres.py +0 -201
  229. unstructured_ingest-0.5.21/test/integration/connectors/sql/test_singlestore.py +0 -182
  230. unstructured_ingest-0.5.21/test/integration/connectors/sql/test_snowflake.py +0 -244
  231. unstructured_ingest-0.5.21/test/integration/connectors/sql/test_sqlite.py +0 -168
  232. unstructured_ingest-0.5.21/test/integration/connectors/sql/test_vastdb.py +0 -34
  233. unstructured_ingest-0.5.21/test/integration/connectors/test_astradb.py +0 -287
  234. unstructured_ingest-0.5.21/test/integration/connectors/test_azure_ai_search.py +0 -254
  235. unstructured_ingest-0.5.21/test/integration/connectors/test_chroma.py +0 -136
  236. unstructured_ingest-0.5.21/test/integration/connectors/test_confluence.py +0 -111
  237. unstructured_ingest-0.5.21/test/integration/connectors/test_delta_table.py +0 -183
  238. unstructured_ingest-0.5.21/test/integration/connectors/test_dropbox.py +0 -151
  239. unstructured_ingest-0.5.21/test/integration/connectors/test_google_drive.py +0 -257
  240. unstructured_ingest-0.5.21/test/integration/connectors/test_jira.py +0 -67
  241. unstructured_ingest-0.5.21/test/integration/connectors/test_lancedb.py +0 -247
  242. unstructured_ingest-0.5.21/test/integration/connectors/test_milvus.py +0 -208
  243. unstructured_ingest-0.5.21/test/integration/connectors/test_mongodb.py +0 -335
  244. unstructured_ingest-0.5.21/test/integration/connectors/test_neo4j.py +0 -244
  245. unstructured_ingest-0.5.21/test/integration/connectors/test_notion.py +0 -152
  246. unstructured_ingest-0.5.21/test/integration/connectors/test_onedrive.py +0 -163
  247. unstructured_ingest-0.5.21/test/integration/connectors/test_pinecone.py +0 -387
  248. unstructured_ingest-0.5.21/test/integration/connectors/test_qdrant.py +0 -216
  249. unstructured_ingest-0.5.21/test/integration/connectors/test_redis.py +0 -143
  250. unstructured_ingest-0.5.21/test/integration/connectors/test_s3.py +0 -184
  251. unstructured_ingest-0.5.21/test/integration/connectors/test_sharepoint.py +0 -222
  252. unstructured_ingest-0.5.21/test/integration/connectors/test_vectara.py +0 -270
  253. unstructured_ingest-0.5.21/test/integration/connectors/test_zendesk.py +0 -120
  254. unstructured_ingest-0.5.21/test/integration/connectors/utils/constants.py +0 -13
  255. unstructured_ingest-0.5.21/test/integration/connectors/utils/docker.py +0 -151
  256. unstructured_ingest-0.5.21/test/integration/connectors/utils/docker_compose.py +0 -59
  257. unstructured_ingest-0.5.21/test/integration/connectors/utils/validation/__init__.py +0 -0
  258. unstructured_ingest-0.5.21/test/integration/connectors/utils/validation/destination.py +0 -76
  259. unstructured_ingest-0.5.21/test/integration/connectors/utils/validation/equality.py +0 -76
  260. unstructured_ingest-0.5.21/test/integration/connectors/utils/validation/source.py +0 -330
  261. unstructured_ingest-0.5.21/test/integration/connectors/utils/validation/utils.py +0 -36
  262. unstructured_ingest-0.5.21/test/integration/connectors/weaviate/__init__.py +0 -0
  263. unstructured_ingest-0.5.21/test/integration/connectors/weaviate/conftest.py +0 -15
  264. unstructured_ingest-0.5.21/test/integration/connectors/weaviate/test_cloud.py +0 -39
  265. unstructured_ingest-0.5.21/test/integration/connectors/weaviate/test_local.py +0 -152
  266. unstructured_ingest-0.5.21/test/integration/embedders/__init__.py +0 -0
  267. unstructured_ingest-0.5.21/test/integration/embedders/conftest.py +0 -13
  268. unstructured_ingest-0.5.21/test/integration/embedders/test_azure_openai.py +0 -57
  269. unstructured_ingest-0.5.21/test/integration/embedders/test_bedrock.py +0 -103
  270. unstructured_ingest-0.5.21/test/integration/embedders/test_huggingface.py +0 -24
  271. unstructured_ingest-0.5.21/test/integration/embedders/test_mixedbread.py +0 -71
  272. unstructured_ingest-0.5.21/test/integration/embedders/test_octoai.py +0 -75
  273. unstructured_ingest-0.5.21/test/integration/embedders/test_openai.py +0 -74
  274. unstructured_ingest-0.5.21/test/integration/embedders/test_togetherai.py +0 -71
  275. unstructured_ingest-0.5.21/test/integration/embedders/test_vertexai.py +0 -63
  276. unstructured_ingest-0.5.21/test/integration/embedders/test_voyageai.py +0 -79
  277. unstructured_ingest-0.5.21/test/integration/embedders/utils.py +0 -66
  278. unstructured_ingest-0.5.21/test/integration/partitioners/__init__.py +0 -0
  279. unstructured_ingest-0.5.21/test/integration/partitioners/test_partitioner.py +0 -76
  280. unstructured_ingest-0.5.21/test/integration/utils.py +0 -15
  281. unstructured_ingest-0.5.21/test/unit/__init__.py +0 -0
  282. unstructured_ingest-0.5.21/test/unit/embed/__init__.py +0 -0
  283. unstructured_ingest-0.5.21/test/unit/embed/test_mixedbreadai.py +0 -42
  284. unstructured_ingest-0.5.21/test/unit/embed/test_octoai.py +0 -27
  285. unstructured_ingest-0.5.21/test/unit/embed/test_openai.py +0 -28
  286. unstructured_ingest-0.5.21/test/unit/embed/test_vertexai.py +0 -25
  287. unstructured_ingest-0.5.21/test/unit/embed/test_voyageai.py +0 -24
  288. unstructured_ingest-0.5.21/test/unit/test_error.py +0 -27
  289. unstructured_ingest-0.5.21/test/unit/test_html.py +0 -112
  290. unstructured_ingest-0.5.21/test/unit/test_logger.py +0 -78
  291. unstructured_ingest-0.5.21/test/unit/test_utils.py +0 -211
  292. unstructured_ingest-0.5.21/test/unit/v2/__init__.py +0 -0
  293. unstructured_ingest-0.5.21/test/unit/v2/chunkers/__init__.py +0 -0
  294. unstructured_ingest-0.5.21/test/unit/v2/chunkers/test_chunkers.py +0 -49
  295. unstructured_ingest-0.5.21/test/unit/v2/connectors/__init__.py +0 -0
  296. unstructured_ingest-0.5.21/test/unit/v2/connectors/databricks/__init__.py +0 -0
  297. unstructured_ingest-0.5.21/test/unit/v2/connectors/databricks/test_volumes_table.py +0 -44
  298. unstructured_ingest-0.5.21/test/unit/v2/connectors/ibm_watsonx/__init__.py +0 -0
  299. unstructured_ingest-0.5.21/test/unit/v2/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +0 -459
  300. unstructured_ingest-0.5.21/test/unit/v2/connectors/motherduck/__init__.py +0 -0
  301. unstructured_ingest-0.5.21/test/unit/v2/connectors/motherduck/test_base.py +0 -74
  302. unstructured_ingest-0.5.21/test/unit/v2/connectors/sql/__init__.py +0 -0
  303. unstructured_ingest-0.5.21/test/unit/v2/connectors/sql/test_sql.py +0 -152
  304. unstructured_ingest-0.5.21/test/unit/v2/connectors/test_confluence.py +0 -71
  305. unstructured_ingest-0.5.21/test/unit/v2/connectors/test_jira.py +0 -401
  306. unstructured_ingest-0.5.21/test/unit/v2/embedders/__init__.py +0 -0
  307. unstructured_ingest-0.5.21/test/unit/v2/embedders/test_bedrock.py +0 -36
  308. unstructured_ingest-0.5.21/test/unit/v2/embedders/test_huggingface.py +0 -48
  309. unstructured_ingest-0.5.21/test/unit/v2/embedders/test_mixedbread.py +0 -37
  310. unstructured_ingest-0.5.21/test/unit/v2/embedders/test_octoai.py +0 -35
  311. unstructured_ingest-0.5.21/test/unit/v2/embedders/test_openai.py +0 -35
  312. unstructured_ingest-0.5.21/test/unit/v2/embedders/test_togetherai.py +0 -37
  313. unstructured_ingest-0.5.21/test/unit/v2/embedders/test_vertexai.py +0 -37
  314. unstructured_ingest-0.5.21/test/unit/v2/embedders/test_voyageai.py +0 -38
  315. unstructured_ingest-0.5.21/test/unit/v2/partitioners/__init__.py +0 -0
  316. unstructured_ingest-0.5.21/test/unit/v2/partitioners/test_partitioner.py +0 -63
  317. unstructured_ingest-0.5.21/test/unit/v2/test_interfaces.py +0 -26
  318. unstructured_ingest-0.5.21/test/unit/v2/test_utils.py +0 -82
  319. unstructured_ingest-0.5.21/test/unit/v2/utils/__init__.py +0 -0
  320. unstructured_ingest-0.5.21/test/unit/v2/utils/data_generator.py +0 -32
  321. unstructured_ingest-0.5.21/unstructured_ingest/__version__.py +0 -1
  322. unstructured_ingest-0.5.21/unstructured_ingest/cli/__init__.py +0 -14
  323. unstructured_ingest-0.5.21/unstructured_ingest/cli/base/__init__.py +0 -0
  324. unstructured_ingest-0.5.21/unstructured_ingest/cli/base/cmd.py +0 -19
  325. unstructured_ingest-0.5.21/unstructured_ingest/cli/base/dest.py +0 -87
  326. unstructured_ingest-0.5.21/unstructured_ingest/cli/base/src.py +0 -57
  327. unstructured_ingest-0.5.21/unstructured_ingest/cli/cli.py +0 -37
  328. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmd_factory.py +0 -12
  329. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/__init__.py +0 -145
  330. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/airtable.py +0 -69
  331. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/astradb.py +0 -99
  332. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
  333. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/biomed.py +0 -52
  334. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/chroma.py +0 -104
  335. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/clarifai.py +0 -71
  336. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/confluence.py +0 -69
  337. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
  338. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/delta_table.py +0 -94
  339. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/discord.py +0 -47
  340. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
  341. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
  342. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
  343. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
  344. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
  345. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
  346. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
  347. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
  348. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
  349. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/github.py +0 -54
  350. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/gitlab.py +0 -54
  351. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/google_drive.py +0 -49
  352. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/hubspot.py +0 -70
  353. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/jira.py +0 -71
  354. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/kafka.py +0 -102
  355. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/local.py +0 -43
  356. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/mongodb.py +0 -72
  357. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/notion.py +0 -48
  358. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/onedrive.py +0 -66
  359. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/opensearch.py +0 -117
  360. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/outlook.py +0 -67
  361. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/pinecone.py +0 -71
  362. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/qdrant.py +0 -124
  363. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/reddit.py +0 -67
  364. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/salesforce.py +0 -58
  365. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/sharepoint.py +0 -66
  366. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/slack.py +0 -56
  367. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/sql.py +0 -66
  368. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/vectara.py +0 -66
  369. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/weaviate.py +0 -98
  370. unstructured_ingest-0.5.21/unstructured_ingest/cli/cmds/wikipedia.py +0 -40
  371. unstructured_ingest-0.5.21/unstructured_ingest/cli/common.py +0 -7
  372. unstructured_ingest-0.5.21/unstructured_ingest/cli/interfaces.py +0 -663
  373. unstructured_ingest-0.5.21/unstructured_ingest/cli/utils.py +0 -205
  374. unstructured_ingest-0.5.21/unstructured_ingest/connector/__init__.py +0 -0
  375. unstructured_ingest-0.5.21/unstructured_ingest/connector/airtable.py +0 -309
  376. unstructured_ingest-0.5.21/unstructured_ingest/connector/astradb.py +0 -267
  377. unstructured_ingest-0.5.21/unstructured_ingest/connector/azure_ai_search.py +0 -144
  378. unstructured_ingest-0.5.21/unstructured_ingest/connector/biomed.py +0 -320
  379. unstructured_ingest-0.5.21/unstructured_ingest/connector/chroma.py +0 -158
  380. unstructured_ingest-0.5.21/unstructured_ingest/connector/clarifai.py +0 -122
  381. unstructured_ingest-0.5.21/unstructured_ingest/connector/confluence.py +0 -285
  382. unstructured_ingest-0.5.21/unstructured_ingest/connector/databricks_volumes.py +0 -137
  383. unstructured_ingest-0.5.21/unstructured_ingest/connector/delta_table.py +0 -203
  384. unstructured_ingest-0.5.21/unstructured_ingest/connector/discord.py +0 -180
  385. unstructured_ingest-0.5.21/unstructured_ingest/connector/elasticsearch.py +0 -396
  386. unstructured_ingest-0.5.21/unstructured_ingest/connector/fsspec/__init__.py +0 -0
  387. unstructured_ingest-0.5.21/unstructured_ingest/connector/fsspec/azure.py +0 -78
  388. unstructured_ingest-0.5.21/unstructured_ingest/connector/fsspec/box.py +0 -109
  389. unstructured_ingest-0.5.21/unstructured_ingest/connector/fsspec/dropbox.py +0 -160
  390. unstructured_ingest-0.5.21/unstructured_ingest/connector/fsspec/fsspec.py +0 -359
  391. unstructured_ingest-0.5.21/unstructured_ingest/connector/fsspec/gcs.py +0 -82
  392. unstructured_ingest-0.5.21/unstructured_ingest/connector/fsspec/s3.py +0 -62
  393. unstructured_ingest-0.5.21/unstructured_ingest/connector/fsspec/sftp.py +0 -81
  394. unstructured_ingest-0.5.21/unstructured_ingest/connector/git.py +0 -124
  395. unstructured_ingest-0.5.21/unstructured_ingest/connector/github.py +0 -174
  396. unstructured_ingest-0.5.21/unstructured_ingest/connector/gitlab.py +0 -142
  397. unstructured_ingest-0.5.21/unstructured_ingest/connector/google_drive.py +0 -348
  398. unstructured_ingest-0.5.21/unstructured_ingest/connector/hubspot.py +0 -278
  399. unstructured_ingest-0.5.21/unstructured_ingest/connector/jira.py +0 -469
  400. unstructured_ingest-0.5.21/unstructured_ingest/connector/kafka.py +0 -293
  401. unstructured_ingest-0.5.21/unstructured_ingest/connector/local.py +0 -139
  402. unstructured_ingest-0.5.21/unstructured_ingest/connector/mongodb.py +0 -284
  403. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/__init__.py +0 -0
  404. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/client.py +0 -248
  405. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/connector.py +0 -469
  406. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/helpers.py +0 -584
  407. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/__init__.py +0 -0
  408. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
  409. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +0 -21
  410. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +0 -31
  411. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
  412. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
  413. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/child_page.py +0 -23
  414. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
  415. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
  416. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/embed.py +0 -36
  417. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/image.py +0 -21
  418. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
  419. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
  420. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/paragraph.py +0 -31
  421. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
  422. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
  423. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
  424. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
  425. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
  426. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
  427. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
  428. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
  429. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/database_properties/created_by.py +0 -35
  430. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/database_properties/created_time.py +0 -34
  431. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
  432. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/database_properties/email.py +0 -36
  433. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
  434. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
  435. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
  436. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
  437. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
  438. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/database_properties/verification.py +0 -78
  439. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/date.py +0 -26
  440. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/file.py +0 -51
  441. unstructured_ingest-0.5.21/unstructured_ingest/connector/notion/types/user.py +0 -76
  442. unstructured_ingest-0.5.21/unstructured_ingest/connector/onedrive.py +0 -232
  443. unstructured_ingest-0.5.21/unstructured_ingest/connector/opensearch.py +0 -218
  444. unstructured_ingest-0.5.21/unstructured_ingest/connector/outlook.py +0 -285
  445. unstructured_ingest-0.5.21/unstructured_ingest/connector/pinecone.py +0 -150
  446. unstructured_ingest-0.5.21/unstructured_ingest/connector/qdrant.py +0 -144
  447. unstructured_ingest-0.5.21/unstructured_ingest/connector/reddit.py +0 -166
  448. unstructured_ingest-0.5.21/unstructured_ingest/connector/registry.py +0 -109
  449. unstructured_ingest-0.5.21/unstructured_ingest/connector/salesforce.py +0 -301
  450. unstructured_ingest-0.5.21/unstructured_ingest/connector/sharepoint.py +0 -573
  451. unstructured_ingest-0.5.21/unstructured_ingest/connector/slack.py +0 -224
  452. unstructured_ingest-0.5.21/unstructured_ingest/connector/sql.py +0 -199
  453. unstructured_ingest-0.5.21/unstructured_ingest/connector/vectara.py +0 -253
  454. unstructured_ingest-0.5.21/unstructured_ingest/connector/weaviate.py +0 -190
  455. unstructured_ingest-0.5.21/unstructured_ingest/connector/wikipedia.py +0 -208
  456. unstructured_ingest-0.5.21/unstructured_ingest/embed/__init__.py +0 -0
  457. unstructured_ingest-0.5.21/unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
  458. unstructured_ingest-0.5.21/unstructured_ingest/enhanced_dataclass/core.py +0 -99
  459. unstructured_ingest-0.5.21/unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
  460. unstructured_ingest-0.5.21/unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
  461. unstructured_ingest-0.5.21/unstructured_ingest/error.py +0 -49
  462. unstructured_ingest-0.5.21/unstructured_ingest/interfaces.py +0 -852
  463. unstructured_ingest-0.5.21/unstructured_ingest/logger.py +0 -130
  464. unstructured_ingest-0.5.21/unstructured_ingest/pipeline/__init__.py +0 -22
  465. unstructured_ingest-0.5.21/unstructured_ingest/pipeline/copy.py +0 -19
  466. unstructured_ingest-0.5.21/unstructured_ingest/pipeline/doc_factory.py +0 -12
  467. unstructured_ingest-0.5.21/unstructured_ingest/pipeline/interfaces.py +0 -270
  468. unstructured_ingest-0.5.21/unstructured_ingest/pipeline/partition.py +0 -60
  469. unstructured_ingest-0.5.21/unstructured_ingest/pipeline/permissions.py +0 -12
  470. unstructured_ingest-0.5.21/unstructured_ingest/pipeline/pipeline.py +0 -117
  471. unstructured_ingest-0.5.21/unstructured_ingest/pipeline/reformat/__init__.py +0 -0
  472. unstructured_ingest-0.5.21/unstructured_ingest/pipeline/reformat/chunking.py +0 -134
  473. unstructured_ingest-0.5.21/unstructured_ingest/pipeline/reformat/embedding.py +0 -64
  474. unstructured_ingest-0.5.21/unstructured_ingest/pipeline/source.py +0 -77
  475. unstructured_ingest-0.5.21/unstructured_ingest/pipeline/utils.py +0 -6
  476. unstructured_ingest-0.5.21/unstructured_ingest/pipeline/write.py +0 -18
  477. unstructured_ingest-0.5.21/unstructured_ingest/processor.py +0 -93
  478. unstructured_ingest-0.5.21/unstructured_ingest/runner/__init__.py +0 -104
  479. unstructured_ingest-0.5.21/unstructured_ingest/runner/airtable.py +0 -35
  480. unstructured_ingest-0.5.21/unstructured_ingest/runner/astradb.py +0 -34
  481. unstructured_ingest-0.5.21/unstructured_ingest/runner/base_runner.py +0 -89
  482. unstructured_ingest-0.5.21/unstructured_ingest/runner/biomed.py +0 -45
  483. unstructured_ingest-0.5.21/unstructured_ingest/runner/confluence.py +0 -35
  484. unstructured_ingest-0.5.21/unstructured_ingest/runner/delta_table.py +0 -34
  485. unstructured_ingest-0.5.21/unstructured_ingest/runner/discord.py +0 -35
  486. unstructured_ingest-0.5.21/unstructured_ingest/runner/elasticsearch.py +0 -40
  487. unstructured_ingest-0.5.21/unstructured_ingest/runner/fsspec/__init__.py +0 -0
  488. unstructured_ingest-0.5.21/unstructured_ingest/runner/fsspec/azure.py +0 -30
  489. unstructured_ingest-0.5.21/unstructured_ingest/runner/fsspec/box.py +0 -28
  490. unstructured_ingest-0.5.21/unstructured_ingest/runner/fsspec/dropbox.py +0 -30
  491. unstructured_ingest-0.5.21/unstructured_ingest/runner/fsspec/fsspec.py +0 -40
  492. unstructured_ingest-0.5.21/unstructured_ingest/runner/fsspec/gcs.py +0 -28
  493. unstructured_ingest-0.5.21/unstructured_ingest/runner/fsspec/s3.py +0 -28
  494. unstructured_ingest-0.5.21/unstructured_ingest/runner/fsspec/sftp.py +0 -28
  495. unstructured_ingest-0.5.21/unstructured_ingest/runner/github.py +0 -37
  496. unstructured_ingest-0.5.21/unstructured_ingest/runner/gitlab.py +0 -37
  497. unstructured_ingest-0.5.21/unstructured_ingest/runner/google_drive.py +0 -35
  498. unstructured_ingest-0.5.21/unstructured_ingest/runner/hubspot.py +0 -35
  499. unstructured_ingest-0.5.21/unstructured_ingest/runner/jira.py +0 -35
  500. unstructured_ingest-0.5.21/unstructured_ingest/runner/kafka.py +0 -34
  501. unstructured_ingest-0.5.21/unstructured_ingest/runner/local.py +0 -23
  502. unstructured_ingest-0.5.21/unstructured_ingest/runner/mongodb.py +0 -34
  503. unstructured_ingest-0.5.21/unstructured_ingest/runner/notion.py +0 -61
  504. unstructured_ingest-0.5.21/unstructured_ingest/runner/onedrive.py +0 -35
  505. unstructured_ingest-0.5.21/unstructured_ingest/runner/opensearch.py +0 -40
  506. unstructured_ingest-0.5.21/unstructured_ingest/runner/outlook.py +0 -33
  507. unstructured_ingest-0.5.21/unstructured_ingest/runner/reddit.py +0 -35
  508. unstructured_ingest-0.5.21/unstructured_ingest/runner/salesforce.py +0 -33
  509. unstructured_ingest-0.5.21/unstructured_ingest/runner/sharepoint.py +0 -35
  510. unstructured_ingest-0.5.21/unstructured_ingest/runner/slack.py +0 -33
  511. unstructured_ingest-0.5.21/unstructured_ingest/runner/utils.py +0 -47
  512. unstructured_ingest-0.5.21/unstructured_ingest/runner/wikipedia.py +0 -35
  513. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/__init__.py +0 -48
  514. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/astradb.py +0 -22
  515. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
  516. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/base_writer.py +0 -26
  517. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/chroma.py +0 -22
  518. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/clarifai.py +0 -19
  519. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
  520. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/delta_table.py +0 -24
  521. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/elasticsearch.py +0 -24
  522. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
  523. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
  524. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/fsspec/box.py +0 -21
  525. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
  526. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
  527. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
  528. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/kafka.py +0 -21
  529. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/mongodb.py +0 -21
  530. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/opensearch.py +0 -26
  531. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/pinecone.py +0 -21
  532. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/qdrant.py +0 -19
  533. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/sql.py +0 -22
  534. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/vectara.py +0 -22
  535. unstructured_ingest-0.5.21/unstructured_ingest/runner/writers/weaviate.py +0 -21
  536. unstructured_ingest-0.5.21/unstructured_ingest/utils/__init__.py +0 -0
  537. unstructured_ingest-0.5.21/unstructured_ingest/utils/google_filetype.py +0 -9
  538. unstructured_ingest-0.5.21/unstructured_ingest/v2/__init__.py +0 -1
  539. unstructured_ingest-0.5.21/unstructured_ingest/v2/cli/__init__.py +0 -0
  540. unstructured_ingest-0.5.21/unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  541. unstructured_ingest-0.5.21/unstructured_ingest/v2/errors.py +0 -25
  542. unstructured_ingest-0.5.21/unstructured_ingest/v2/logger.py +0 -126
  543. unstructured_ingest-0.5.21/unstructured_ingest/v2/main.py +0 -11
  544. unstructured_ingest-0.5.21/unstructured_ingest/v2/pipeline/__init__.py +0 -0
  545. unstructured_ingest-0.5.21/unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  546. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
  547. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/confluence.py +0 -307
  548. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py +0 -109
  549. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/delta_table.py +0 -195
  550. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py +0 -195
  551. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/google_drive.py +0 -479
  552. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/jira.py +0 -453
  553. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
  554. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
  555. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
  556. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
  557. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
  558. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/blocks/divider.py +0 -22
  559. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
  560. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
  561. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/blocks/heading.py +0 -37
  562. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
  563. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
  564. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/blocks/synced_block.py +0 -57
  565. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/blocks/template.py +0 -30
  566. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
  567. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
  568. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
  569. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
  570. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/database_properties/formula.py +0 -49
  571. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
  572. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/database_properties/multiselect.py +0 -73
  573. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
  574. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/database_properties/phone_number.py +0 -36
  575. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
  576. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
  577. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
  578. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/database_properties/unique_id.py +0 -50
  579. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
  580. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
  581. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
  582. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
  583. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/sharepoint.py +0 -134
  584. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
  585. unstructured_ingest-0.5.21/unstructured_ingest/v2/processes/utils/__init__.py +0 -0
  586. unstructured_ingest-0.5.21/unstructured_ingest.egg-info/PKG-INFO +0 -382
  587. unstructured_ingest-0.5.21/unstructured_ingest.egg-info/SOURCES.txt +0 -595
  588. unstructured_ingest-0.5.21/unstructured_ingest.egg-info/dependency_links.txt +0 -1
  589. unstructured_ingest-0.5.21/unstructured_ingest.egg-info/entry_points.txt +0 -2
  590. unstructured_ingest-0.5.21/unstructured_ingest.egg-info/requires.txt +0 -417
  591. unstructured_ingest-0.5.21/unstructured_ingest.egg-info/top_level.txt +0 -2
  592. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/LICENSE.md +0 -0
  593. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/README.md +0 -0
  594. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/__init__.py +0 -0
  595. {unstructured_ingest-0.5.21/test → unstructured_ingest-1.2.35/unstructured_ingest/cli}/__init__.py +0 -0
  596. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/cli/base/__init__.py +0 -0
  597. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/cli/base/importer.py +0 -0
  598. {unstructured_ingest-0.5.21/test/integration → unstructured_ingest-1.2.35/unstructured_ingest/cli/utils}/__init__.py +0 -0
  599. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/cli/utils/click.py +0 -0
  600. {unstructured_ingest-0.5.21/test/integration/chunkers → unstructured_ingest-1.2.35/unstructured_ingest/data_types}/__init__.py +0 -0
  601. {unstructured_ingest-0.5.21/test/integration/connectors → unstructured_ingest-1.2.35/unstructured_ingest/embed}/__init__.py +0 -0
  602. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/interfaces/process.py +0 -0
  603. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/interfaces/processor.py +0 -0
  604. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/main.py +0 -0
  605. {unstructured_ingest-0.5.21/test/integration/connectors/databricks → unstructured_ingest-1.2.35/unstructured_ingest/pipeline}/__init__.py +0 -0
  606. {unstructured_ingest-0.5.21/test/integration/connectors/discord → unstructured_ingest-1.2.35/unstructured_ingest/pipeline/steps}/__init__.py +0 -0
  607. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/__init__.py +0 -0
  608. {unstructured_ingest-0.5.21/test/integration/connectors/duckdb → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/assets}/__init__.py +0 -0
  609. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/assets/weaviate_collection_config.json +0 -0
  610. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/fsspec/utils.py +0 -0
  611. {unstructured_ingest-0.5.21/test/integration/connectors/elasticsearch → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/notion}/__init__.py +0 -0
  612. {unstructured_ingest-0.5.21/unstructured_ingest → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/notion}/ingest_backoff/__init__.py +0 -0
  613. {unstructured_ingest-0.5.21/unstructured_ingest → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/notion}/ingest_backoff/_common.py +0 -0
  614. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/interfaces.py +0 -0
  615. {unstructured_ingest-0.5.21/test/integration/connectors/sql → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/notion/types}/__init__.py +0 -0
  616. {unstructured_ingest-0.5.21/unstructured_ingest/connector → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors}/notion/types/blocks/__init__.py +0 -0
  617. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest}/processes/connectors/utils.py +0 -0
  618. {unstructured_ingest-0.5.21/test/integration/connectors/utils → unstructured_ingest-1.2.35/unstructured_ingest/processes/connectors/zendesk}/__init__.py +0 -0
  619. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/utils/chunking.py +0 -0
  620. {unstructured_ingest-0.5.21/unstructured_ingest/v2 → unstructured_ingest-1.2.35/unstructured_ingest/utils}/constants.py +0 -0
  621. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/utils/dep_check.py +0 -0
  622. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/utils/ndjson.py +0 -0
  623. {unstructured_ingest-0.5.21 → unstructured_ingest-1.2.35}/unstructured_ingest/utils/table.py +0 -0
@@ -0,0 +1,213 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ figures/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ pip-wheel-metadata/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Pycharm
42
+ .idea/
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ *.py,cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+ db.sqlite3-journal
67
+
68
+ # Flask stuff:
69
+ instance/
70
+ .webassets-cache
71
+
72
+ # Scrapy stuff:
73
+ .scrapy
74
+
75
+ # Sphinx documentation
76
+ docs/_build/
77
+
78
+ # PyBuilder
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+ nbs/
84
+
85
+ # IPython
86
+ profile_default/
87
+ ipython_config.py
88
+
89
+ # pyenv
90
+ .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100
+ __pypackages__/
101
+
102
+ # Celery stuff
103
+ celerybeat-schedule
104
+ celerybeat.pid
105
+
106
+ # SageMath parsed files
107
+ *.sage.py
108
+
109
+ # Environments
110
+ .env
111
+ .envrc
112
+ .venv
113
+ env/
114
+ venv/
115
+ ENV/
116
+ env.bak/
117
+ venv.bak/
118
+
119
+ # Spyder project settings
120
+ .spyderproject
121
+ .spyproject
122
+
123
+ # Rope project settings
124
+ .ropeproject
125
+
126
+ # mkdocs documentation
127
+ /site
128
+
129
+ # mypy
130
+ .mypy_cache/
131
+ .dmypy.json
132
+ dmypy.json
133
+
134
+ # Pyre type checker
135
+ .pyre/
136
+
137
+ # pyright (Python LSP/type-checker in VSCode) config
138
+ /pyrightconfig.json
139
+
140
+ # ingest outputs
141
+ /structured-output
142
+ test_unstructured_ingest/workdir/
143
+ test_unstructured_ingest/delta-table-dest/
144
+ test_unstructured_ingest/skipped-files.txt
145
+ test_unstructured_ingest/chroma-dest/
146
+
147
+ # suggested ingest mirror directory
148
+ /mirror
149
+
150
+ ## https://github.com/github/gitignore/blob/main/Global/Emacs.gitignore (partial)
151
+
152
+ *~
153
+ \#*\#
154
+ /.emacs.desktop
155
+ /.emacs.desktop.lock
156
+ *.elc
157
+ auto-save-list
158
+ tramp
159
+ .\#*
160
+
161
+ ## https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
162
+ .vscode/*
163
+ !.vscode/tasks.json
164
+ !.vscode/launch.json
165
+ !.vscode/extensions.json
166
+ !.vscode/*.code-snippets
167
+
168
+ # Local History for Visual Studio Code
169
+ .history/
170
+
171
+ # Built Visual Studio Code Extensions
172
+ *.vsix
173
+
174
+ ## https://github.com/github/gitignore/blob/main/Global/Vim.gitignore
175
+ # Swap
176
+ [._]*.s[a-v][a-z]
177
+ !*.svg # comment out if you don't need vector files
178
+ [._]*.sw[a-p]
179
+ [._]s[a-rt-v][a-z]
180
+ [._]ss[a-gi-z]
181
+ [._]sw[a-p]
182
+
183
+ # Session
184
+ Session.vim
185
+ Sessionx.vim
186
+
187
+ # Temporary
188
+ .netrwhist
189
+ # Auto-generated tag files
190
+ tags
191
+ # Persistent undo
192
+ [._]*.un~
193
+
194
+ .DS_Store
195
+
196
+ # Ruff cache
197
+ .ruff_cache/
198
+
199
+ .ppm
200
+ .vs
201
+
202
+ example-docs/*_images
203
+ examples/**/output/
204
+
205
+ outputdiff.txt
206
+ metricsdiff.txt
207
+
208
+ # analysis
209
+ annotated/
210
+
211
+ tmp_ingest/
212
+ .vs
213
+ .report.json
@@ -0,0 +1,235 @@
1
+ Metadata-Version: 2.4
2
+ Name: unstructured_ingest
3
+ Version: 1.2.35
4
+ Summary: Local ETL data pipeline to get data RAG ready
5
+ Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
+ License-Expression: Apache-2.0
7
+ License-File: LICENSE.md
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Education
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Requires-Python: <3.13,>=3.10
20
+ Requires-Dist: certifi>=2025.7.14
21
+ Requires-Dist: click
22
+ Requires-Dist: opentelemetry-sdk
23
+ Requires-Dist: pydantic>=2.7
24
+ Requires-Dist: python-dateutil
25
+ Requires-Dist: tqdm
26
+ Provides-Extra: airtable
27
+ Requires-Dist: pandas; extra == 'airtable'
28
+ Requires-Dist: pyairtable; extra == 'airtable'
29
+ Provides-Extra: astradb
30
+ Requires-Dist: astrapy>2.0.0; extra == 'astradb'
31
+ Provides-Extra: azure
32
+ Requires-Dist: adlfs; extra == 'azure'
33
+ Requires-Dist: fsspec; extra == 'azure'
34
+ Provides-Extra: azure-ai-search
35
+ Requires-Dist: azure-search-documents; extra == 'azure-ai-search'
36
+ Provides-Extra: bedrock
37
+ Requires-Dist: aioboto3; extra == 'bedrock'
38
+ Requires-Dist: aiobotocore[boto3]!=2.24.2; extra == 'bedrock'
39
+ Requires-Dist: boto3; extra == 'bedrock'
40
+ Provides-Extra: biomed
41
+ Requires-Dist: bs4; extra == 'biomed'
42
+ Requires-Dist: requests; extra == 'biomed'
43
+ Provides-Extra: box
44
+ Requires-Dist: boxfs; extra == 'box'
45
+ Requires-Dist: fsspec; extra == 'box'
46
+ Provides-Extra: chroma
47
+ Requires-Dist: chromadb; extra == 'chroma'
48
+ Provides-Extra: clarifai
49
+ Requires-Dist: clarifai; extra == 'clarifai'
50
+ Provides-Extra: confluence
51
+ Requires-Dist: atlassian-python-api; extra == 'confluence'
52
+ Requires-Dist: requests; extra == 'confluence'
53
+ Provides-Extra: couchbase
54
+ Requires-Dist: couchbase; extra == 'couchbase'
55
+ Provides-Extra: databricks-delta-tables
56
+ Requires-Dist: databricks-sql-connector; extra == 'databricks-delta-tables'
57
+ Requires-Dist: pandas; extra == 'databricks-delta-tables'
58
+ Provides-Extra: databricks-volumes
59
+ Requires-Dist: databricks-sdk>=0.70.0; extra == 'databricks-volumes'
60
+ Provides-Extra: delta-table
61
+ Requires-Dist: boto3; extra == 'delta-table'
62
+ Requires-Dist: deltalake; extra == 'delta-table'
63
+ Requires-Dist: pandas; extra == 'delta-table'
64
+ Requires-Dist: pyarrow; extra == 'delta-table'
65
+ Requires-Dist: tenacity; extra == 'delta-table'
66
+ Provides-Extra: discord
67
+ Requires-Dist: discord-py; extra == 'discord'
68
+ Provides-Extra: doc
69
+ Requires-Dist: unstructured[doc]; extra == 'doc'
70
+ Provides-Extra: docx
71
+ Requires-Dist: unstructured[docx]; extra == 'docx'
72
+ Provides-Extra: dropbox
73
+ Requires-Dist: dropboxdrivefs; extra == 'dropbox'
74
+ Requires-Dist: fsspec; extra == 'dropbox'
75
+ Provides-Extra: duckdb
76
+ Requires-Dist: duckdb; extra == 'duckdb'
77
+ Requires-Dist: pandas; extra == 'duckdb'
78
+ Provides-Extra: elasticsearch
79
+ Requires-Dist: elasticsearch[async]<9.0.0; extra == 'elasticsearch'
80
+ Provides-Extra: epub
81
+ Requires-Dist: unstructured[epub]; extra == 'epub'
82
+ Provides-Extra: gcs
83
+ Requires-Dist: bs4; extra == 'gcs'
84
+ Requires-Dist: fsspec; extra == 'gcs'
85
+ Requires-Dist: gcsfs; extra == 'gcs'
86
+ Provides-Extra: github
87
+ Requires-Dist: pygithub>1.58.0; extra == 'github'
88
+ Requires-Dist: requests; extra == 'github'
89
+ Provides-Extra: gitlab
90
+ Requires-Dist: python-gitlab; extra == 'gitlab'
91
+ Provides-Extra: google-drive
92
+ Requires-Dist: google-api-python-client; extra == 'google-drive'
93
+ Requires-Dist: tenacity; extra == 'google-drive'
94
+ Provides-Extra: hubspot
95
+ Requires-Dist: hubspot-api-client; extra == 'hubspot'
96
+ Requires-Dist: urllib3; extra == 'hubspot'
97
+ Provides-Extra: huggingface
98
+ Requires-Dist: sentence-transformers; extra == 'huggingface'
99
+ Provides-Extra: ibm-watsonx-s3
100
+ Requires-Dist: httpx; extra == 'ibm-watsonx-s3'
101
+ Requires-Dist: pandas; extra == 'ibm-watsonx-s3'
102
+ Requires-Dist: pyarrow; extra == 'ibm-watsonx-s3'
103
+ Requires-Dist: pyiceberg; extra == 'ibm-watsonx-s3'
104
+ Requires-Dist: tenacity; extra == 'ibm-watsonx-s3'
105
+ Provides-Extra: image
106
+ Requires-Dist: unstructured[image]; extra == 'image'
107
+ Provides-Extra: jira
108
+ Requires-Dist: atlassian-python-api; extra == 'jira'
109
+ Provides-Extra: kafka
110
+ Requires-Dist: confluent-kafka; extra == 'kafka'
111
+ Provides-Extra: kdbai
112
+ Requires-Dist: kdbai-client>=1.4.0; extra == 'kdbai'
113
+ Requires-Dist: pandas; extra == 'kdbai'
114
+ Provides-Extra: lancedb
115
+ Requires-Dist: lancedb; extra == 'lancedb'
116
+ Provides-Extra: md
117
+ Requires-Dist: unstructured[md]; extra == 'md'
118
+ Provides-Extra: milvus
119
+ Requires-Dist: pymilvus; extra == 'milvus'
120
+ Provides-Extra: mixedbreadai
121
+ Requires-Dist: mixedbread; extra == 'mixedbreadai'
122
+ Provides-Extra: mongodb
123
+ Requires-Dist: pymongo; extra == 'mongodb'
124
+ Provides-Extra: msg
125
+ Requires-Dist: unstructured[msg]; extra == 'msg'
126
+ Provides-Extra: neo4j
127
+ Requires-Dist: cymple; extra == 'neo4j'
128
+ Requires-Dist: neo4j-rust-ext; extra == 'neo4j'
129
+ Requires-Dist: networkx; extra == 'neo4j'
130
+ Provides-Extra: notion
131
+ Requires-Dist: backoff; extra == 'notion'
132
+ Requires-Dist: htmlbuilder; extra == 'notion'
133
+ Requires-Dist: httpx; extra == 'notion'
134
+ Requires-Dist: notion-client; extra == 'notion'
135
+ Provides-Extra: octoai
136
+ Requires-Dist: openai; extra == 'octoai'
137
+ Requires-Dist: tiktoken; extra == 'octoai'
138
+ Provides-Extra: odt
139
+ Requires-Dist: unstructured[odt]; extra == 'odt'
140
+ Provides-Extra: onedrive
141
+ Requires-Dist: msal; extra == 'onedrive'
142
+ Requires-Dist: office365-rest-python-client; extra == 'onedrive'
143
+ Requires-Dist: requests; extra == 'onedrive'
144
+ Provides-Extra: openai
145
+ Requires-Dist: openai; extra == 'openai'
146
+ Requires-Dist: tiktoken; extra == 'openai'
147
+ Provides-Extra: opensearch
148
+ Requires-Dist: boto3>=1.26.0; extra == 'opensearch'
149
+ Requires-Dist: botocore>=1.29.0; extra == 'opensearch'
150
+ Requires-Dist: opensearch-py<3.0.0,>=2.4.0; extra == 'opensearch'
151
+ Provides-Extra: org
152
+ Requires-Dist: unstructured[org]; extra == 'org'
153
+ Provides-Extra: outlook
154
+ Requires-Dist: msal; extra == 'outlook'
155
+ Requires-Dist: office365-rest-python-client; extra == 'outlook'
156
+ Provides-Extra: pdf
157
+ Requires-Dist: unstructured[pdf]; extra == 'pdf'
158
+ Provides-Extra: pinecone
159
+ Requires-Dist: pinecone; extra == 'pinecone'
160
+ Provides-Extra: postgres
161
+ Requires-Dist: pandas; extra == 'postgres'
162
+ Requires-Dist: psycopg2-binary; extra == 'postgres'
163
+ Provides-Extra: ppt
164
+ Requires-Dist: unstructured[ppt]; extra == 'ppt'
165
+ Provides-Extra: pptx
166
+ Requires-Dist: unstructured[pptx]; extra == 'pptx'
167
+ Provides-Extra: qdrant
168
+ Requires-Dist: qdrant-client; extra == 'qdrant'
169
+ Provides-Extra: reddit
170
+ Requires-Dist: praw; extra == 'reddit'
171
+ Provides-Extra: redis
172
+ Requires-Dist: redis<=5.3.0; extra == 'redis'
173
+ Provides-Extra: remote
174
+ Requires-Dist: unstructured-client>=0.30.0; extra == 'remote'
175
+ Provides-Extra: rst
176
+ Requires-Dist: unstructured[rst]; extra == 'rst'
177
+ Provides-Extra: rtf
178
+ Requires-Dist: unstructured[rtf]; extra == 'rtf'
179
+ Provides-Extra: s3
180
+ Requires-Dist: fsspec; extra == 's3'
181
+ Requires-Dist: s3fs; extra == 's3'
182
+ Provides-Extra: salesforce
183
+ Requires-Dist: simple-salesforce; extra == 'salesforce'
184
+ Provides-Extra: sftp
185
+ Requires-Dist: fsspec; extra == 'sftp'
186
+ Requires-Dist: paramiko; extra == 'sftp'
187
+ Provides-Extra: sharepoint
188
+ Requires-Dist: msal; extra == 'sharepoint'
189
+ Requires-Dist: office365-rest-python-client; extra == 'sharepoint'
190
+ Requires-Dist: requests; extra == 'sharepoint'
191
+ Provides-Extra: singlestore
192
+ Requires-Dist: pandas; extra == 'singlestore'
193
+ Requires-Dist: singlestoredb; extra == 'singlestore'
194
+ Provides-Extra: slack
195
+ Requires-Dist: slack-sdk[optional]; extra == 'slack'
196
+ Provides-Extra: snowflake
197
+ Requires-Dist: pandas; extra == 'snowflake'
198
+ Requires-Dist: psycopg2-binary; extra == 'snowflake'
199
+ Requires-Dist: snowflake-connector-python; extra == 'snowflake'
200
+ Provides-Extra: teradata
201
+ Requires-Dist: pandas; extra == 'teradata'
202
+ Requires-Dist: teradatasql; extra == 'teradata'
203
+ Provides-Extra: togetherai
204
+ Requires-Dist: together; extra == 'togetherai'
205
+ Provides-Extra: tsv
206
+ Requires-Dist: unstructured[tsv]; extra == 'tsv'
207
+ Provides-Extra: vastdb
208
+ Requires-Dist: ibis; extra == 'vastdb'
209
+ Requires-Dist: pandas; extra == 'vastdb'
210
+ Requires-Dist: pyarrow; extra == 'vastdb'
211
+ Requires-Dist: vastdb; extra == 'vastdb'
212
+ Provides-Extra: vectara
213
+ Requires-Dist: aiofiles; extra == 'vectara'
214
+ Requires-Dist: httpx; extra == 'vectara'
215
+ Requires-Dist: requests; extra == 'vectara'
216
+ Provides-Extra: vertexai
217
+ Requires-Dist: vertexai; extra == 'vertexai'
218
+ Provides-Extra: voyageai
219
+ Requires-Dist: langchain-core<1.0.0,>=0.3.81; extra == 'voyageai'
220
+ Requires-Dist: voyageai; extra == 'voyageai'
221
+ Provides-Extra: weaviate
222
+ Requires-Dist: weaviate-client; extra == 'weaviate'
223
+ Provides-Extra: wikipedia
224
+ Requires-Dist: wikipedia; extra == 'wikipedia'
225
+ Provides-Extra: xlsx
226
+ Requires-Dist: unstructured[xlsx]; extra == 'xlsx'
227
+ Provides-Extra: zendesk
228
+ Requires-Dist: aiofiles; extra == 'zendesk'
229
+ Requires-Dist: bs4; extra == 'zendesk'
230
+ Requires-Dist: httpx; extra == 'zendesk'
231
+ Description-Content-Type: text/markdown
232
+
233
+ # Unstructured Ingest
234
+
235
+ For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.
@@ -0,0 +1,215 @@
1
+ [project]
2
+ name = "unstructured_ingest"
3
+ description = "Local ETL data pipeline to get data RAG ready"
4
+ requires-python = ">=3.10, <3.13"
5
+ authors = [{name = "Unstructured Technologies", email = "devops@unstructuredai.io"}]
6
+ classifiers = [
7
+ "Development Status :: 4 - Beta",
8
+ "Intended Audience :: Developers",
9
+ "Intended Audience :: Education",
10
+ "Intended Audience :: Science/Research",
11
+ "License :: OSI Approved :: Apache Software License",
12
+ "Operating System :: OS Independent",
13
+ "Programming Language :: Python :: 3",
14
+ "Programming Language :: Python :: 3.10",
15
+ "Programming Language :: Python :: 3.11",
16
+ "Programming Language :: Python :: 3.12",
17
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
18
+ ]
19
+ readme = "README.md"
20
+ license = "Apache-2.0"
21
+ dynamic = ["version", "dependencies", "optional-dependencies"]
22
+
23
+ [tool.hatch.metadata.hooks.requirements_txt]
24
+ files = ["requirements/base.txt"]
25
+
26
+ [tool.hatch.metadata.hooks.requirements_txt.optional-dependencies]
27
+ # Connectors
28
+ airtable = ["requirements/connectors/airtable.txt"]
29
+ astradb = ["requirements/connectors/astradb.txt"]
30
+ azure-ai-search = ["requirements/connectors/azure-ai-search.txt"]
31
+ azure = ["requirements/connectors/azure.txt"]
32
+ biomed = ["requirements/connectors/biomed.txt"]
33
+ box = ["requirements/connectors/box.txt"]
34
+ chroma = ["requirements/connectors/chroma.txt"]
35
+ clarifai = ["requirements/connectors/clarifai.txt"]
36
+ confluence = ["requirements/connectors/confluence.txt"]
37
+ couchbase = ["requirements/connectors/couchbase.txt"]
38
+ databricks-delta-tables = ["requirements/connectors/databricks-delta-tables.txt"]
39
+ databricks-volumes = ["requirements/connectors/databricks-volumes.txt"]
40
+ delta-table = ["requirements/connectors/delta-table.txt"]
41
+ discord = ["requirements/connectors/discord.txt"]
42
+ dropbox = ["requirements/connectors/dropbox.txt"]
43
+ duckdb = ["requirements/connectors/duckdb.txt"]
44
+ elasticsearch = ["requirements/connectors/elasticsearch.txt"]
45
+ gcs = ["requirements/connectors/gcs.txt"]
46
+ github = ["requirements/connectors/github.txt"]
47
+ gitlab = ["requirements/connectors/gitlab.txt"]
48
+ google-drive = ["requirements/connectors/google-drive.txt"]
49
+ hubspot = ["requirements/connectors/hubspot.txt"]
50
+ ibm-watsonx-s3 = ["requirements/connectors/ibm-watsonx-s3.txt"]
51
+ jira = ["requirements/connectors/jira.txt"]
52
+ kafka = ["requirements/connectors/kafka.txt"]
53
+ kdbai = ["requirements/connectors/kdbai.txt"]
54
+ lancedb = ["requirements/connectors/lancedb.txt"]
55
+ milvus = ["requirements/connectors/milvus.txt"]
56
+ mongodb = ["requirements/connectors/mongodb.txt"]
57
+ neo4j = ["requirements/connectors/neo4j.txt"]
58
+ notion = ["requirements/connectors/notion.txt"]
59
+ onedrive = ["requirements/connectors/onedrive.txt"]
60
+ opensearch = ["requirements/connectors/opensearch.txt"]
61
+ outlook = ["requirements/connectors/outlook.txt"]
62
+ pinecone = ["requirements/connectors/pinecone.txt"]
63
+ postgres = ["requirements/connectors/postgres.txt"]
64
+ qdrant = ["requirements/connectors/qdrant.txt"]
65
+ reddit = ["requirements/connectors/reddit.txt"]
66
+ redis = ["requirements/connectors/redis.txt"]
67
+ s3 = ["requirements/connectors/s3.txt"]
68
+ salesforce = ["requirements/connectors/salesforce.txt"]
69
+ sftp = ["requirements/connectors/sftp.txt"]
70
+ sharepoint = ["requirements/connectors/sharepoint.txt"]
71
+ singlestore = ["requirements/connectors/singlestore.txt"]
72
+ slack = ["requirements/connectors/slack.txt"]
73
+ snowflake = ["requirements/connectors/snowflake.txt"]
74
+ teradata = ["requirements/connectors/teradata.txt"]
75
+ vastdb = ["requirements/connectors/vastdb.txt"]
76
+ vectara = ["requirements/connectors/vectara.txt"]
77
+ weaviate = ["requirements/connectors/weaviate.txt"]
78
+ wikipedia = ["requirements/connectors/wikipedia.txt"]
79
+ zendesk = ["requirements/connectors/zendesk.txt"]
80
+
81
+ # Embedders
82
+ bedrock = ["requirements/embed/bedrock.txt"]
83
+ huggingface = ["requirements/embed/huggingface.txt"]
84
+ mixedbreadai = ["requirements/embed/mixedbreadai.txt"]
85
+ octoai = ["requirements/embed/octoai.txt"]
86
+ openai = ["requirements/embed/openai.txt"]
87
+ togetherai = ["requirements/embed/togetherai.txt"]
88
+ vertexai = ["requirements/embed/vertexai.txt"]
89
+ voyageai = ["requirements/embed/voyageai.txt"]
90
+
91
+ # remote
92
+ remote = ["requirements/remote/client.txt"]
93
+
94
+ # local partition
95
+ doc = ["requirements/local_partition/doc.txt"]
96
+ docx = ["requirements/local_partition/docx.txt"]
97
+ epub = ["requirements/local_partition/epub.txt"]
98
+ image = ["requirements/local_partition/image.txt"]
99
+ md = ["requirements/local_partition/md.txt"]
100
+ msg = ["requirements/local_partition/msg.txt"]
101
+ odt = ["requirements/local_partition/odt.txt"]
102
+ org = ["requirements/local_partition/org.txt"]
103
+ pdf = ["requirements/local_partition/pdf.txt"]
104
+ ppt = ["requirements/local_partition/ppt.txt"]
105
+ pptx = ["requirements/local_partition/pptx.txt"]
106
+ rst = ["requirements/local_partition/rst.txt"]
107
+ rtf = ["requirements/local_partition/rtf.txt"]
108
+ tsv = ["requirements/local_partition/tsv.txt"]
109
+ xlsx = ["requirements/local_partition/xlsx.txt"]
110
+
111
+
112
+ [tool.hatch.version]
113
+ path = "unstructured_ingest/__version__.py"
114
+
115
+ [dependency-groups]
116
+ release = [
117
+ "twine",
118
+ "wheel",
119
+ "build"
120
+ ]
121
+ lint = [
122
+ "ruff",
123
+ ]
124
+ test = [
125
+ "pytest",
126
+ "pytest-cov",
127
+ "pytest-mock",
128
+ "pytest-check",
129
+ "pytest-asyncio",
130
+ "pytest_tagging",
131
+ "pytest-json-report",
132
+ "pytest-timeout",
133
+ "faker",
134
+ "docker",
135
+ "universal_pathlib",
136
+ "deepdiff",
137
+ "bs4",
138
+ "pandas",
139
+ # Connector specific deps
140
+ "cryptography",
141
+ "fsspec",
142
+ "vertexai",
143
+ "pyiceberg",
144
+ "pyarrow",
145
+ "networkx",
146
+ "htmlbuilder",
147
+ "office365-rest-python-client",
148
+ ]
149
+ # Add constraints needed for CI
150
+ ci = [
151
+ "grpcio>=1.65.5",
152
+ # TODO: Pinned in transformers package, remove when that gets updated
153
+ "tokenizers>=0.19,<0.20",
154
+ # TODO: Constriant due to aiobotocore, remove when that gets updates:
155
+ "botocore<1.34.132",
156
+ # TODO: Constriant due to both 8.5.0 and 8.4.0 being installed during pip-compile
157
+ "importlib-metadata>=8.5.0",
158
+ "unstructured-client>= 0.25.8",
159
+ "fsspec==2024.5.0",
160
+ # python 3.12 support
161
+ "wrapt>=1.14.0",
162
+ "numpy<2",
163
+ # deltalake >=0.23.0 currently has a bug with the versio of pyarrow it installs
164
+ "deltalake<=0.22.0",
165
+ # TODO: investigate breaking changed introduced in lancedb>0.15.0
166
+ "lancedb<=0.15.0",
167
+ # TODO: versions higher than this are missing the macos wheel
168
+ "pykx==2.5.3",
169
+ # TODO: Constraint due to perf-analyzer platform compatibility issues
170
+ "tritonclient<=2.60.0", # Allow 2.60.0 (was working), prevent 2.61.0 (has perf-analyzer issues)
171
+ ]
172
+
173
+ [project.scripts]
174
+ unstructured-ingest = "unstructured_ingest.main:main"
175
+
176
+
177
+ [build-system]
178
+ requires = ["hatchling", "hatch-requirements-txt"]
179
+ build-backend = "hatchling.build"
180
+
181
+ [tool.ruff]
182
+ line-length = 100
183
+
184
+ [tool.ruff.lint]
185
+ select = [
186
+ # pycodestyle
187
+ "E",
188
+ # Pyflakes
189
+ "F",
190
+ # flake8-simplify
191
+ "SIM",
192
+ # isort
193
+ "I",
194
+ ]
195
+
196
+ [tool.pytest.ini_options]
197
+ asyncio_mode = "auto"
198
+
199
+ [tool.coverage.report]
200
+ # TODO: Update as this improves
201
+ fail_under = 0
202
+
203
+ [tool.hatch.build.targets.wheel]
204
+ packages = ["/unstructured_ingest"]
205
+
206
+ [tool.hatch.build.targets.sdist]
207
+ packages = ["/unstructured_ingest"]
208
+
209
+ [tool.codeflash]
210
+ # All paths are relative to this pyproject.toml's directory.
211
+ module-root = "unstructured_ingest"
212
+ tests-root = "test"
213
+ test-framework = "pytest"
214
+ ignore-paths = []
215
+ formatter-cmds = ["ruff check --exit-zero --fix $file", "ruff format $file"]
@@ -0,0 +1 @@
1
+ __version__ = "1.2.35" # pragma: no cover
@@ -0,0 +1,28 @@
1
+ # Ingest CLI
2
+ This package helps map user input via a cli to the underlying ingest code to run a small ETL pipeline.
3
+
4
+ ## Design Reference
5
+ [cli.py](cli.py) is the main entrypoint to run the cli itself. The key points for this is the interaction between all
6
+ source and destination connectors.
7
+
8
+ To manually run the cli:
9
+ ```shell
10
+ PYTHONPATH=. python unstructured_ingest/main.py --help
11
+ ```
12
+
13
+ The `main.py` file simply wraps the generated Click command created in `cli.py`.
14
+
15
+ ### Source Commands
16
+ All source commands are added as sub commands to the parent ingest Click group. This allows each command to map to
17
+ different connectors with shared and unique parameters.
18
+
19
+ ### Destination Commands
20
+ All destination commands are added as sub commands to each parent source command. This allows each invocation of the source
21
+ sub command to display all possible destination subcommands. The code un [utils.py](./utils.py) helps structure the
22
+ generated text from the Click library to be more intuitive on this approach (i.e. list sub commands as `Destinations`).
23
+
24
+ ### Configs
25
+ The configs in [configs/](./configs) and connector specific ones in [cmds/](./cmds) help surface all user parameters that
26
+ are needed to marshall the input dictionary from Click into all the respective configs needed to create a full pipeline run.
27
+ Because click returns a flat dictionary of user inputs, the `extract_config` method in `utils.py` helps deserialize this dictionary
28
+ into dataclasses that have nested fields (such as access configs).