unstructured-ingest 0.0.23__tar.gz → 0.0.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/PKG-INFO +16 -16
- unstructured_ingest-0.0.25/unstructured_ingest/__version__.py +1 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/dep_check.py +12 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/utils/model_conversion.py +3 -3
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/__init__.py +12 -1
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/databricks_volumes.py +125 -32
- unstructured_ingest-0.0.25/unstructured_ingest/v2/processes/connectors/outlook.py +239 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/pinecone.py +41 -22
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest.egg-info/PKG-INFO +16 -16
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest.egg-info/SOURCES.txt +1 -0
- unstructured_ingest-0.0.23/unstructured_ingest/__version__.py +0 -1
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/LICENSE.md +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/README.md +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/pyproject.toml +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/setup.cfg +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/setup.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/test/test_chunking_utils.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/test/test_error.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/test/test_interfaces.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/test/test_logger.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/test/test_utils.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/test/test_utils_v2.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/base/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/base/cmd.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/base/dest.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/base/src.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cli.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmd_factory.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/airtable.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/astradb.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/azure_cognitive_search.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/biomed.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/chroma.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/clarifai.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/confluence.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/databricks_volumes.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/delta_table.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/discord.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/elasticsearch.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/azure.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/box.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/s3.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/github.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/gitlab.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/google_drive.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/hubspot.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/jira.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/kafka.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/local.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/mongodb.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/notion.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/onedrive.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/opensearch.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/outlook.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/pinecone.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/qdrant.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/reddit.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/salesforce.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/sharepoint.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/slack.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/sql.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/vectara.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/weaviate.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/cmds/wikipedia.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/common.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/interfaces.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/cli/utils.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/airtable.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/astradb.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/azure_cognitive_search.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/biomed.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/chroma.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/clarifai.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/confluence.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/databricks_volumes.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/delta_table.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/discord.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/elasticsearch.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/azure.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/box.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/dropbox.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/fsspec.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/gcs.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/s3.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/fsspec/sftp.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/git.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/github.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/gitlab.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/google_drive.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/hubspot.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/jira.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/kafka.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/local.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/mongodb.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/client.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/connector.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/helpers.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/interfaces.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/block.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/callout.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/child_page.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/code.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/divider.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/embed.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/equation.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/file.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/heading.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/image.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/link_to_page.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/paragraph.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/pdf.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/quote.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/table.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/template.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/todo.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/toggle.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/blocks/video.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/created_by.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/created_time.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/date.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/email.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/files.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/number.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/people.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/rich_text.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/rollup.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/select.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/status.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/title.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/url.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/database_properties/verification.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/date.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/file.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/page.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/parent.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/rich_text.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/notion/types/user.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/onedrive.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/opensearch.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/outlook.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/pinecone.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/qdrant.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/reddit.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/registry.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/salesforce.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/sharepoint.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/slack.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/sql.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/vectara.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/weaviate.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/connector/wikipedia.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/bedrock.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/huggingface.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/interfaces.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/mixedbreadai.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/octoai.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/openai.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/vertexai.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/embed/voyageai.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/enhanced_dataclass/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/enhanced_dataclass/core.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/error.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/ingest_backoff/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/ingest_backoff/_common.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/ingest_backoff/_wrapper.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/interfaces.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/logger.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/main.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/copy.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/doc_factory.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/interfaces.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/partition.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/permissions.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/pipeline.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/reformat/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/reformat/chunking.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/reformat/embedding.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/source.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/utils.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/pipeline/write.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/processor.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/airtable.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/astradb.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/base_runner.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/biomed.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/confluence.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/delta_table.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/discord.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/elasticsearch.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/azure.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/box.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/dropbox.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/fsspec.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/gcs.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/s3.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/fsspec/sftp.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/github.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/gitlab.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/google_drive.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/hubspot.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/jira.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/kafka.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/local.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/mongodb.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/notion.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/onedrive.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/opensearch.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/outlook.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/reddit.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/salesforce.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/sharepoint.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/slack.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/utils.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/wikipedia.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/astradb.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/azure_cognitive_search.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/base_writer.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/chroma.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/clarifai.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/databricks_volumes.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/delta_table.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/elasticsearch.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/fsspec/azure.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/fsspec/box.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/fsspec/gcs.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/fsspec/s3.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/kafka.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/mongodb.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/opensearch.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/pinecone.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/qdrant.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/sql.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/vectara.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/runner/writers/weaviate.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/chunking.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/compression.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/data_prep.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/google_filetype.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/table.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/base/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/base/cmd.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/base/dest.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/base/importer.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/base/src.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/cli.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/cmds.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/cli/utils/click.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/connector.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/downloader.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/file_data.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/indexer.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/process.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/processor.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/upload_stager.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/interfaces/uploader.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/logger.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/main.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/otel.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/interfaces.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/otel.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/pipeline.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/chunk.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/download.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/embed.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/filter.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/index.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/partition.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/stage.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/uncompress.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/pipeline/steps/upload.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/chunker.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connector_registry.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/airtable.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/astradb.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/chroma.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/couchbase.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/elasticsearch.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/azure.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/box.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/s3.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/fsspec/utils.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/google_drive.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/kdbai.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/local.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/milvus.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/mongodb.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/onedrive.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/opensearch.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/salesforce.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/sharepoint.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/singlestore.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/sql.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/utils.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/connectors/weaviate.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/embedder.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/filter.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/partitioner.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/processes/uncompress.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/unstructured_api.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/v2/utils.py +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest.egg-info/dependency_links.txt +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest.egg-info/entry_points.txt +0 -0
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest.egg-info/requires.txt +15 -15
- {unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.25
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,13 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.13
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
-
Requires-Dist: pydantic>=2.7
|
|
26
|
-
Requires-Dist: pandas
|
|
27
25
|
Requires-Dist: dataclasses_json
|
|
28
26
|
Requires-Dist: click
|
|
27
|
+
Requires-Dist: pandas
|
|
28
|
+
Requires-Dist: python-dateutil
|
|
29
29
|
Requires-Dist: tqdm
|
|
30
|
+
Requires-Dist: pydantic>=2.7
|
|
30
31
|
Requires-Dist: opentelemetry-sdk
|
|
31
|
-
Requires-Dist: python-dateutil
|
|
32
32
|
Provides-Extra: remote
|
|
33
33
|
Requires-Dist: unstructured-client>=0.25.8; extra == "remote"
|
|
34
34
|
Provides-Extra: csv
|
|
@@ -66,16 +66,16 @@ Requires-Dist: pyairtable; extra == "airtable"
|
|
|
66
66
|
Provides-Extra: astradb
|
|
67
67
|
Requires-Dist: astrapy; extra == "astradb"
|
|
68
68
|
Provides-Extra: azure
|
|
69
|
-
Requires-Dist: fsspec; extra == "azure"
|
|
70
69
|
Requires-Dist: adlfs; extra == "azure"
|
|
70
|
+
Requires-Dist: fsspec; extra == "azure"
|
|
71
71
|
Provides-Extra: azure-cognitive-search
|
|
72
72
|
Requires-Dist: azure-search-documents; extra == "azure-cognitive-search"
|
|
73
73
|
Provides-Extra: biomed
|
|
74
|
-
Requires-Dist: bs4; extra == "biomed"
|
|
75
74
|
Requires-Dist: requests; extra == "biomed"
|
|
75
|
+
Requires-Dist: bs4; extra == "biomed"
|
|
76
76
|
Provides-Extra: box
|
|
77
|
-
Requires-Dist: boxfs; extra == "box"
|
|
78
77
|
Requires-Dist: fsspec; extra == "box"
|
|
78
|
+
Requires-Dist: boxfs; extra == "box"
|
|
79
79
|
Provides-Extra: chroma
|
|
80
80
|
Requires-Dist: chromadb; extra == "chroma"
|
|
81
81
|
Provides-Extra: clarifai
|
|
@@ -97,8 +97,8 @@ Provides-Extra: elasticsearch
|
|
|
97
97
|
Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
|
|
98
98
|
Provides-Extra: gcs
|
|
99
99
|
Requires-Dist: gcsfs; extra == "gcs"
|
|
100
|
-
Requires-Dist: fsspec; extra == "gcs"
|
|
101
100
|
Requires-Dist: bs4; extra == "gcs"
|
|
101
|
+
Requires-Dist: fsspec; extra == "gcs"
|
|
102
102
|
Provides-Extra: github
|
|
103
103
|
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
104
104
|
Requires-Dist: requests; extra == "github"
|
|
@@ -120,19 +120,19 @@ Requires-Dist: pymilvus; extra == "milvus"
|
|
|
120
120
|
Provides-Extra: mongodb
|
|
121
121
|
Requires-Dist: pymongo; extra == "mongodb"
|
|
122
122
|
Provides-Extra: notion
|
|
123
|
-
Requires-Dist: notion-client; extra == "notion"
|
|
124
123
|
Requires-Dist: httpx; extra == "notion"
|
|
125
|
-
Requires-Dist: htmlBuilder; extra == "notion"
|
|
126
124
|
Requires-Dist: backoff; extra == "notion"
|
|
125
|
+
Requires-Dist: notion-client; extra == "notion"
|
|
126
|
+
Requires-Dist: htmlBuilder; extra == "notion"
|
|
127
127
|
Provides-Extra: onedrive
|
|
128
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
128
129
|
Requires-Dist: bs4; extra == "onedrive"
|
|
129
130
|
Requires-Dist: msal; extra == "onedrive"
|
|
130
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
131
131
|
Provides-Extra: opensearch
|
|
132
132
|
Requires-Dist: opensearch-py; extra == "opensearch"
|
|
133
133
|
Provides-Extra: outlook
|
|
134
|
-
Requires-Dist: msal; extra == "outlook"
|
|
135
134
|
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
135
|
+
Requires-Dist: msal; extra == "outlook"
|
|
136
136
|
Provides-Extra: pinecone
|
|
137
137
|
Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
|
|
138
138
|
Provides-Extra: postgres
|
|
@@ -145,13 +145,13 @@ Provides-Extra: s3
|
|
|
145
145
|
Requires-Dist: fsspec; extra == "s3"
|
|
146
146
|
Requires-Dist: s3fs; extra == "s3"
|
|
147
147
|
Provides-Extra: sharepoint
|
|
148
|
-
Requires-Dist: msal; extra == "sharepoint"
|
|
149
148
|
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
149
|
+
Requires-Dist: msal; extra == "sharepoint"
|
|
150
150
|
Provides-Extra: salesforce
|
|
151
151
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
152
152
|
Provides-Extra: sftp
|
|
153
|
-
Requires-Dist: fsspec; extra == "sftp"
|
|
154
153
|
Requires-Dist: paramiko; extra == "sftp"
|
|
154
|
+
Requires-Dist: fsspec; extra == "sftp"
|
|
155
155
|
Provides-Extra: slack
|
|
156
156
|
Requires-Dist: slack_sdk; extra == "slack"
|
|
157
157
|
Provides-Extra: wikipedia
|
|
@@ -167,8 +167,8 @@ Requires-Dist: requests; extra == "vectara"
|
|
|
167
167
|
Provides-Extra: embed-huggingface
|
|
168
168
|
Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
169
169
|
Provides-Extra: embed-octoai
|
|
170
|
-
Requires-Dist: openai; extra == "embed-octoai"
|
|
171
170
|
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
171
|
+
Requires-Dist: openai; extra == "embed-octoai"
|
|
172
172
|
Provides-Extra: embed-vertexai
|
|
173
173
|
Requires-Dist: vertexai; extra == "embed-vertexai"
|
|
174
174
|
Provides-Extra: embed-voyageai
|
|
@@ -176,8 +176,8 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
|
|
|
176
176
|
Provides-Extra: embed-mixedbreadai
|
|
177
177
|
Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
|
|
178
178
|
Provides-Extra: openai
|
|
179
|
-
Requires-Dist: openai; extra == "openai"
|
|
180
179
|
Requires-Dist: tiktoken; extra == "openai"
|
|
180
|
+
Requires-Dist: openai; extra == "openai"
|
|
181
181
|
Provides-Extra: bedrock
|
|
182
182
|
Requires-Dist: boto3; extra == "bedrock"
|
|
183
183
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.25" # pragma: no cover
|
{unstructured_ingest-0.0.23 → unstructured_ingest-0.0.25}/unstructured_ingest/utils/dep_check.py
RENAMED
|
@@ -20,6 +20,18 @@ def requires_dependencies(
|
|
|
20
20
|
dependencies: str | list[str],
|
|
21
21
|
extras: Optional[str] = None,
|
|
22
22
|
) -> Callable[[Callable[_P, _T]], Callable[_P, _T]]:
|
|
23
|
+
"""Decorator ensuring required modules are installed.
|
|
24
|
+
|
|
25
|
+
Use on functions with local imports to ensure required modules are available and log
|
|
26
|
+
an installation instruction if they're not.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
dependencies: Name(s) of module(s) required by the decorated function.
|
|
30
|
+
extras: unstructured-ingest extra which installs required `dependencies`. Defaults to None.
|
|
31
|
+
|
|
32
|
+
Raises:
|
|
33
|
+
ImportError: When at least one of the `dependencies` is not available.
|
|
34
|
+
"""
|
|
23
35
|
if isinstance(dependencies, str):
|
|
24
36
|
dependencies = [dependencies]
|
|
25
37
|
|
|
@@ -155,14 +155,14 @@ def _get_type_from_field(field: FieldInfo) -> click.ParamType:
|
|
|
155
155
|
|
|
156
156
|
def get_option_from_field(option_name: str, field_info: FieldInfo) -> Option:
|
|
157
157
|
param_decls = [option_name]
|
|
158
|
-
|
|
158
|
+
help_text = field_info.description or ""
|
|
159
159
|
if examples := field_info.examples:
|
|
160
|
-
|
|
160
|
+
help_text += f" [Examples: {', '.join(examples)}]"
|
|
161
161
|
option_kwargs = {
|
|
162
162
|
"type": _get_type_from_field(field_info),
|
|
163
163
|
"default": get_default_value_from_field(field_info),
|
|
164
164
|
"required": field_info.is_required(),
|
|
165
|
-
"help":
|
|
165
|
+
"help": str(help_text),
|
|
166
166
|
"is_flag": is_boolean_flag(field_info),
|
|
167
167
|
"show_default": field_info.default is not PydanticUndefined,
|
|
168
168
|
}
|
|
@@ -17,7 +17,10 @@ from .chroma import chroma_destination_entry
|
|
|
17
17
|
from .couchbase import CONNECTOR_TYPE as COUCHBASE_CONNECTOR_TYPE
|
|
18
18
|
from .couchbase import couchbase_destination_entry, couchbase_source_entry
|
|
19
19
|
from .databricks_volumes import CONNECTOR_TYPE as DATABRICKS_VOLUMES_CONNECTOR_TYPE
|
|
20
|
-
from .databricks_volumes import
|
|
20
|
+
from .databricks_volumes import (
|
|
21
|
+
databricks_volumes_destination_entry,
|
|
22
|
+
databricks_volumes_source_entry,
|
|
23
|
+
)
|
|
21
24
|
from .elasticsearch import CONNECTOR_TYPE as ELASTICSEARCH_CONNECTOR_TYPE
|
|
22
25
|
from .elasticsearch import elasticsearch_destination_entry, elasticsearch_source_entry
|
|
23
26
|
from .google_drive import CONNECTOR_TYPE as GOOGLE_DRIVE_CONNECTOR_TYPE
|
|
@@ -34,6 +37,8 @@ from .onedrive import CONNECTOR_TYPE as ONEDRIVE_CONNECTOR_TYPE
|
|
|
34
37
|
from .onedrive import onedrive_source_entry
|
|
35
38
|
from .opensearch import CONNECTOR_TYPE as OPENSEARCH_CONNECTOR_TYPE
|
|
36
39
|
from .opensearch import opensearch_destination_entry, opensearch_source_entry
|
|
40
|
+
from .outlook import CONNECTOR_TYPE as OUTLOOK_CONNECTOR_TYPE
|
|
41
|
+
from .outlook import outlook_source_entry
|
|
37
42
|
from .pinecone import CONNECTOR_TYPE as PINECONE_CONNECTOR_TYPE
|
|
38
43
|
from .pinecone import pinecone_destination_entry
|
|
39
44
|
from .salesforce import CONNECTOR_TYPE as SALESFORCE_CONNECTOR_TYPE
|
|
@@ -78,6 +83,10 @@ add_destination_entry(destination_type=WEAVIATE_CONNECTOR_TYPE, entry=weaviate_d
|
|
|
78
83
|
add_destination_entry(
|
|
79
84
|
destination_type=DATABRICKS_VOLUMES_CONNECTOR_TYPE, entry=databricks_volumes_destination_entry
|
|
80
85
|
)
|
|
86
|
+
add_source_entry(
|
|
87
|
+
source_type=DATABRICKS_VOLUMES_CONNECTOR_TYPE, entry=databricks_volumes_source_entry
|
|
88
|
+
)
|
|
89
|
+
|
|
81
90
|
|
|
82
91
|
add_destination_entry(destination_type=SQL_CONNECTOR_TYPE, entry=sql_destination_entry)
|
|
83
92
|
|
|
@@ -95,3 +104,5 @@ add_destination_entry(
|
|
|
95
104
|
|
|
96
105
|
add_destination_entry(destination_type=KDBAI_CONNECTOR_TYPE, entry=kdbai_destination_entry)
|
|
97
106
|
add_source_entry(source_type=AIRTABLE_CONNECTOR_TYPE, entry=airtable_source_entry)
|
|
107
|
+
|
|
108
|
+
add_source_entry(source_type=OUTLOOK_CONNECTOR_TYPE, entry=outlook_source_entry)
|
|
@@ -1,21 +1,35 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
5
5
|
|
|
6
6
|
from pydantic import Field, Secret
|
|
7
7
|
|
|
8
|
-
from unstructured_ingest.error import
|
|
8
|
+
from unstructured_ingest.error import (
|
|
9
|
+
DestinationConnectionError,
|
|
10
|
+
SourceConnectionError,
|
|
11
|
+
SourceConnectionNetworkError,
|
|
12
|
+
)
|
|
9
13
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
10
14
|
from unstructured_ingest.v2.interfaces import (
|
|
11
15
|
AccessConfig,
|
|
12
16
|
ConnectionConfig,
|
|
17
|
+
Downloader,
|
|
18
|
+
DownloaderConfig,
|
|
19
|
+
DownloadResponse,
|
|
13
20
|
FileData,
|
|
21
|
+
FileDataSourceMetadata,
|
|
22
|
+
Indexer,
|
|
23
|
+
IndexerConfig,
|
|
24
|
+
SourceIdentifiers,
|
|
14
25
|
Uploader,
|
|
15
26
|
UploaderConfig,
|
|
16
27
|
)
|
|
17
28
|
from unstructured_ingest.v2.logger import logger
|
|
18
|
-
from unstructured_ingest.v2.processes.connector_registry import
|
|
29
|
+
from unstructured_ingest.v2.processes.connector_registry import (
|
|
30
|
+
DestinationRegistryEntry,
|
|
31
|
+
SourceRegistryEntry,
|
|
32
|
+
)
|
|
19
33
|
|
|
20
34
|
if TYPE_CHECKING:
|
|
21
35
|
from databricks.sdk import WorkspaceClient
|
|
@@ -32,16 +46,6 @@ class DatabricksVolumesAccessConfig(AccessConfig):
|
|
|
32
46
|
"https://accounts.azuredatabricks.net/ (Azure), "
|
|
33
47
|
"or https://accounts.gcp.databricks.com/ (GCP).",
|
|
34
48
|
)
|
|
35
|
-
username: Optional[str] = Field(
|
|
36
|
-
default=None,
|
|
37
|
-
description="The Databricks username part of basic authentication. "
|
|
38
|
-
"Only possible when Host is *.cloud.databricks.com (AWS).",
|
|
39
|
-
)
|
|
40
|
-
password: Optional[str] = Field(
|
|
41
|
-
default=None,
|
|
42
|
-
description="The Databricks password part of basic authentication. "
|
|
43
|
-
"Only possible when Host is *.cloud.databricks.com (AWS).",
|
|
44
|
-
)
|
|
45
49
|
client_id: Optional[str] = Field(default=None, description="Client ID of the OAuth app.")
|
|
46
50
|
client_secret: Optional[str] = Field(
|
|
47
51
|
default=None, description="Client Secret of the OAuth app."
|
|
@@ -78,7 +82,6 @@ class DatabricksVolumesAccessConfig(AccessConfig):
|
|
|
78
82
|
"argument. This argument also holds the currently "
|
|
79
83
|
"selected auth.",
|
|
80
84
|
)
|
|
81
|
-
cluster_id: Optional[str] = None
|
|
82
85
|
google_credentials: Optional[str] = None
|
|
83
86
|
google_service_account: Optional[str] = None
|
|
84
87
|
|
|
@@ -93,17 +96,11 @@ class DatabricksVolumesConnectionConfig(ConnectionConfig):
|
|
|
93
96
|
"Databricks workspace endpoint or the "
|
|
94
97
|
"Databricks accounts endpoint.",
|
|
95
98
|
)
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
class DatabricksVolumesUploaderConfig(UploaderConfig):
|
|
99
99
|
volume: str = Field(description="Name of volume in the Unity Catalog")
|
|
100
100
|
catalog: str = Field(description="Name of the catalog in the Databricks Unity Catalog service")
|
|
101
101
|
volume_path: Optional[str] = Field(
|
|
102
102
|
default=None, description="Optional path within the volume to write to"
|
|
103
103
|
)
|
|
104
|
-
overwrite: bool = Field(
|
|
105
|
-
default=False, description="If true, an existing file will be overwritten."
|
|
106
|
-
)
|
|
107
104
|
databricks_schema: str = Field(
|
|
108
105
|
default="default",
|
|
109
106
|
alias="schema",
|
|
@@ -117,33 +114,121 @@ class DatabricksVolumesUploaderConfig(UploaderConfig):
|
|
|
117
114
|
path = f"{path}/{self.volume_path}"
|
|
118
115
|
return path
|
|
119
116
|
|
|
120
|
-
|
|
121
|
-
@dataclass
|
|
122
|
-
class DatabricksVolumesUploader(Uploader):
|
|
123
|
-
connector_type: str = CONNECTOR_TYPE
|
|
124
|
-
upload_config: DatabricksVolumesUploaderConfig
|
|
125
|
-
connection_config: DatabricksVolumesConnectionConfig
|
|
126
|
-
|
|
127
117
|
@requires_dependencies(dependencies=["databricks.sdk"], extras="databricks-volumes")
|
|
128
118
|
def get_client(self) -> "WorkspaceClient":
|
|
129
119
|
from databricks.sdk import WorkspaceClient
|
|
130
120
|
|
|
131
121
|
return WorkspaceClient(
|
|
132
|
-
host=self.
|
|
133
|
-
**self.
|
|
122
|
+
host=self.host,
|
|
123
|
+
**self.access_config.get_secret_value().model_dump(),
|
|
134
124
|
)
|
|
135
125
|
|
|
126
|
+
|
|
127
|
+
@dataclass
|
|
128
|
+
class DatabricksVolumesIndexerConfig(IndexerConfig):
|
|
129
|
+
recursive: bool = False
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@dataclass
|
|
133
|
+
class DatabricksVolumesIndexer(Indexer):
|
|
134
|
+
index_config: DatabricksVolumesIndexerConfig
|
|
135
|
+
connection_config: DatabricksVolumesConnectionConfig
|
|
136
|
+
connector_type: str = CONNECTOR_TYPE
|
|
137
|
+
|
|
136
138
|
def precheck(self) -> None:
|
|
137
139
|
try:
|
|
138
|
-
|
|
140
|
+
self.connection_config.get_client()
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
143
|
+
raise SourceConnectionError(f"failed to validate connection: {e}")
|
|
144
|
+
|
|
145
|
+
def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
|
|
146
|
+
for file_info in self.connection_config.get_client().dbfs.list(
|
|
147
|
+
path=self.connection_config.path, recursive=self.index_config.recursive
|
|
148
|
+
):
|
|
149
|
+
if file_info.is_dir:
|
|
150
|
+
continue
|
|
151
|
+
rel_path = file_info.path.replace(self.connection_config.path, "")
|
|
152
|
+
if rel_path.startswith("/"):
|
|
153
|
+
rel_path = rel_path[1:]
|
|
154
|
+
filename = Path(file_info.path).name
|
|
155
|
+
yield FileData(
|
|
156
|
+
identifier=file_info.path,
|
|
157
|
+
connector_type=CONNECTOR_TYPE,
|
|
158
|
+
source_identifiers=SourceIdentifiers(
|
|
159
|
+
filename=filename,
|
|
160
|
+
rel_path=rel_path,
|
|
161
|
+
fullpath=file_info.path,
|
|
162
|
+
),
|
|
163
|
+
additional_metadata={
|
|
164
|
+
"catalog": self.connection_config.catalog,
|
|
165
|
+
},
|
|
166
|
+
metadata=FileDataSourceMetadata(
|
|
167
|
+
url=file_info.path, date_modified=str(file_info.modification_time)
|
|
168
|
+
),
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@dataclass
|
|
173
|
+
class DatabricksVolumesDownloaderConfig(DownloaderConfig):
|
|
174
|
+
pass
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@dataclass
|
|
178
|
+
class DatabricksVolumesDownloader(Downloader):
|
|
179
|
+
download_config: DatabricksVolumesDownloaderConfig
|
|
180
|
+
connection_config: DatabricksVolumesConnectionConfig
|
|
181
|
+
connector_type: str = CONNECTOR_TYPE
|
|
182
|
+
|
|
183
|
+
def precheck(self) -> None:
|
|
184
|
+
try:
|
|
185
|
+
self.connection_config.get_client()
|
|
186
|
+
except Exception as e:
|
|
187
|
+
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
188
|
+
raise SourceConnectionError(f"failed to validate connection: {e}")
|
|
189
|
+
|
|
190
|
+
def get_download_path(self, file_data: FileData) -> Path:
|
|
191
|
+
return self.download_config.download_dir / Path(file_data.source_identifiers.relative_path)
|
|
192
|
+
|
|
193
|
+
def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
|
|
194
|
+
download_path = self.get_download_path(file_data=file_data)
|
|
195
|
+
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
196
|
+
logger.info(f"Writing {file_data.identifier} to {download_path}")
|
|
197
|
+
try:
|
|
198
|
+
with self.connection_config.get_client().dbfs.download(path=file_data.identifier) as c:
|
|
199
|
+
read_content = c._read_handle.read()
|
|
200
|
+
with open(download_path, "wb") as f:
|
|
201
|
+
f.write(read_content)
|
|
202
|
+
except Exception as e:
|
|
203
|
+
logger.error(f"failed to download file {file_data.identifier}: {e}", exc_info=True)
|
|
204
|
+
raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}")
|
|
205
|
+
|
|
206
|
+
return self.generate_download_response(file_data=file_data, download_path=download_path)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class DatabricksVolumesUploaderConfig(UploaderConfig):
|
|
210
|
+
overwrite: bool = Field(
|
|
211
|
+
default=False, description="If true, an existing file will be overwritten."
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
@dataclass
|
|
216
|
+
class DatabricksVolumesUploader(Uploader):
|
|
217
|
+
upload_config: DatabricksVolumesUploaderConfig
|
|
218
|
+
connection_config: DatabricksVolumesConnectionConfig
|
|
219
|
+
connector_type: str = CONNECTOR_TYPE
|
|
220
|
+
|
|
221
|
+
def precheck(self) -> None:
|
|
222
|
+
try:
|
|
223
|
+
assert self.connection_config.get_client().current_user.me().active
|
|
139
224
|
except Exception as e:
|
|
140
225
|
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
141
226
|
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
142
227
|
|
|
143
228
|
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
144
|
-
output_path = os.path.join(self.
|
|
229
|
+
output_path = os.path.join(self.connection_config.path, path.name)
|
|
145
230
|
with open(path, "rb") as elements_file:
|
|
146
|
-
self.get_client().files.upload(
|
|
231
|
+
self.connection_config.get_client().files.upload(
|
|
147
232
|
file_path=output_path,
|
|
148
233
|
contents=elements_file,
|
|
149
234
|
overwrite=self.upload_config.overwrite,
|
|
@@ -155,3 +240,11 @@ databricks_volumes_destination_entry = DestinationRegistryEntry(
|
|
|
155
240
|
uploader=DatabricksVolumesUploader,
|
|
156
241
|
uploader_config=DatabricksVolumesUploaderConfig,
|
|
157
242
|
)
|
|
243
|
+
|
|
244
|
+
databricks_volumes_source_entry = SourceRegistryEntry(
|
|
245
|
+
connection_config=DatabricksVolumesConnectionConfig,
|
|
246
|
+
indexer=DatabricksVolumesIndexer,
|
|
247
|
+
indexer_config=DatabricksVolumesIndexerConfig,
|
|
248
|
+
downloader=DatabricksVolumesDownloader,
|
|
249
|
+
downloader_config=DatabricksVolumesDownloaderConfig,
|
|
250
|
+
)
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import time
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import timezone
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Coroutine, Generator
|
|
7
|
+
|
|
8
|
+
from pydantic import Field, Secret
|
|
9
|
+
|
|
10
|
+
from unstructured_ingest.error import SourceConnectionError
|
|
11
|
+
from unstructured_ingest.logger import logger
|
|
12
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
|
+
from unstructured_ingest.v2.interfaces import (
|
|
14
|
+
AccessConfig,
|
|
15
|
+
ConnectionConfig,
|
|
16
|
+
Downloader,
|
|
17
|
+
DownloaderConfig,
|
|
18
|
+
FileData,
|
|
19
|
+
Indexer,
|
|
20
|
+
IndexerConfig,
|
|
21
|
+
download_responses,
|
|
22
|
+
)
|
|
23
|
+
from unstructured_ingest.v2.interfaces.file_data import FileDataSourceMetadata, SourceIdentifiers
|
|
24
|
+
from unstructured_ingest.v2.processes.connector_registry import SourceRegistryEntry
|
|
25
|
+
|
|
26
|
+
MAX_EMAILS_PER_FOLDER = 1_000_000 # Maximum number of emails per folder
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from office365.graph_client import GraphClient
|
|
30
|
+
from office365.outlook.mail.folders.folder import MailFolder
|
|
31
|
+
from office365.outlook.mail.messages.message import Message
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
CONNECTOR_TYPE = "outlook"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class OutlookAccessConfig(AccessConfig):
|
|
38
|
+
client_credential: str = Field(description="Azure AD App client secret", alias="client_cred")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class OutlookConnectionConfig(ConnectionConfig):
|
|
42
|
+
access_config: Secret[OutlookAccessConfig]
|
|
43
|
+
client_id: str = Field(description="Azure AD App client ID")
|
|
44
|
+
tenant: str = Field(
|
|
45
|
+
default="common", description="ID or domain name associated with your Azure AD instance"
|
|
46
|
+
)
|
|
47
|
+
authority_url: str = Field(
|
|
48
|
+
default="https://login.microsoftonline.com",
|
|
49
|
+
description="Authentication token provider for Microsoft apps",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
@requires_dependencies(["msal"], extras="outlook")
|
|
53
|
+
def _acquire_token(self):
|
|
54
|
+
"""Acquire token via MSAL"""
|
|
55
|
+
from msal import ConfidentialClientApplication
|
|
56
|
+
|
|
57
|
+
# NOTE: It'd be nice to use `msal.authority.AuthorityBuilder` here paired with AZURE_PUBLIC
|
|
58
|
+
# constant as default in the future but they do not fit well with `authority_url` right now
|
|
59
|
+
authority_url = f"{self.authority_url.rstrip('/')}/{self.tenant}"
|
|
60
|
+
app = ConfidentialClientApplication(
|
|
61
|
+
authority=authority_url,
|
|
62
|
+
client_id=self.client_id,
|
|
63
|
+
client_credential=self.access_config.get_secret_value().client_credential,
|
|
64
|
+
)
|
|
65
|
+
token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
|
|
66
|
+
return token
|
|
67
|
+
|
|
68
|
+
@requires_dependencies(["office365"], extras="outlook")
|
|
69
|
+
@SourceConnectionError.wrap
|
|
70
|
+
def get_client(self) -> "GraphClient":
|
|
71
|
+
from office365.graph_client import GraphClient
|
|
72
|
+
|
|
73
|
+
return GraphClient(self._acquire_token)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class OutlookIndexerConfig(IndexerConfig):
|
|
77
|
+
outlook_folders: list[str] = Field(
|
|
78
|
+
description="Folders to download email messages from. Do not specify subfolders. "
|
|
79
|
+
"Use quotes if there are spaces in folder names."
|
|
80
|
+
)
|
|
81
|
+
recursive: bool = Field(
|
|
82
|
+
default=False,
|
|
83
|
+
description="Recursively download files in their respective folders otherwise stop at the"
|
|
84
|
+
" files in provided folder level.",
|
|
85
|
+
)
|
|
86
|
+
user_email: str = Field(description="Outlook email to download messages from.")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class OutlookIndexer(Indexer):
|
|
91
|
+
index_config: OutlookIndexerConfig
|
|
92
|
+
connection_config: OutlookConnectionConfig
|
|
93
|
+
connector_type: str = CONNECTOR_TYPE
|
|
94
|
+
|
|
95
|
+
def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
|
|
96
|
+
messages = self._list_messages(recursive=self.index_config.recursive)
|
|
97
|
+
|
|
98
|
+
for message in messages:
|
|
99
|
+
yield self._message_to_file_data(message)
|
|
100
|
+
|
|
101
|
+
def run_async(self, **kwargs: Any) -> Coroutine[Any, Any, Any]:
|
|
102
|
+
raise NotImplementedError
|
|
103
|
+
|
|
104
|
+
@SourceConnectionError.wrap
|
|
105
|
+
def precheck(self) -> None:
|
|
106
|
+
client = self.connection_config.get_client()
|
|
107
|
+
client.users[self.index_config.user_email].get().execute_query()
|
|
108
|
+
|
|
109
|
+
def is_async(self) -> bool:
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
def _list_messages(self, recursive: bool) -> list["Message"]:
|
|
113
|
+
mail_folders = self._get_selected_root_folders()
|
|
114
|
+
messages = []
|
|
115
|
+
|
|
116
|
+
while mail_folders:
|
|
117
|
+
mail_folder = mail_folders.pop()
|
|
118
|
+
messages += list(mail_folder.messages.get().top(MAX_EMAILS_PER_FOLDER).execute_query())
|
|
119
|
+
|
|
120
|
+
if recursive:
|
|
121
|
+
mail_folders += list(mail_folder.child_folders.get().execute_query())
|
|
122
|
+
|
|
123
|
+
return messages
|
|
124
|
+
|
|
125
|
+
def _get_selected_root_folders(self) -> list["MailFolder"]:
|
|
126
|
+
client_user = self.connection_config.get_client().users[self.index_config.user_email]
|
|
127
|
+
root_mail_folders = client_user.mail_folders.get().execute_query()
|
|
128
|
+
|
|
129
|
+
selected_names_normalized = [
|
|
130
|
+
folder_name.lower() for folder_name in self.index_config.outlook_folders
|
|
131
|
+
]
|
|
132
|
+
selected_root_mail_folders = [
|
|
133
|
+
folder
|
|
134
|
+
for folder in root_mail_folders
|
|
135
|
+
if folder.display_name.lower() in selected_names_normalized
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
if not selected_root_mail_folders:
|
|
139
|
+
logger.error(
|
|
140
|
+
f"Root folders selected in configuration: {self.index_config.outlook_folders}"
|
|
141
|
+
f"not found for user email {self.index_config.user_email}. Aborting."
|
|
142
|
+
)
|
|
143
|
+
raise ValueError("Root folders selected in configuration not found.")
|
|
144
|
+
|
|
145
|
+
return selected_root_mail_folders
|
|
146
|
+
|
|
147
|
+
def _message_to_file_data(self, message: "Message") -> FileData:
|
|
148
|
+
fullpath = self._generate_fullpath(message)
|
|
149
|
+
|
|
150
|
+
return FileData(
|
|
151
|
+
identifier=message.id,
|
|
152
|
+
connector_type=CONNECTOR_TYPE,
|
|
153
|
+
source_identifiers=SourceIdentifiers(filename=fullpath.name, fullpath=str(fullpath)),
|
|
154
|
+
metadata=FileDataSourceMetadata(
|
|
155
|
+
url=message.resource_url,
|
|
156
|
+
version=message.change_key,
|
|
157
|
+
date_modified=str(
|
|
158
|
+
message.last_modified_datetime.replace(tzinfo=timezone.utc).timestamp()
|
|
159
|
+
),
|
|
160
|
+
date_created=str(message.created_datetime.replace(tzinfo=timezone.utc).timestamp()),
|
|
161
|
+
date_processed=str(time.time()),
|
|
162
|
+
record_locator={
|
|
163
|
+
"message_id": message.id,
|
|
164
|
+
"user_email": self.index_config.user_email,
|
|
165
|
+
},
|
|
166
|
+
),
|
|
167
|
+
additional_metadata={
|
|
168
|
+
"sent_from": str(message.sent_from),
|
|
169
|
+
"to_recipients": [str(recipient) for recipient in message.to_recipients],
|
|
170
|
+
"bcc_recipients": [str(recipient) for recipient in message.to_recipients],
|
|
171
|
+
"subject": message.subject,
|
|
172
|
+
"conversation_id": message.conversation_id,
|
|
173
|
+
"is_draft": message.is_draft,
|
|
174
|
+
"is_read": message.is_read,
|
|
175
|
+
"has_attachments": message.has_attachments,
|
|
176
|
+
"importance": message.importance,
|
|
177
|
+
},
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def _generate_fullpath(self, message: "Message") -> Path:
|
|
181
|
+
return Path(hashlib.sha256(message.id.encode("utf-8")).hexdigest()[:16] + ".eml")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class OutlookDownloaderConfig(DownloaderConfig):
|
|
185
|
+
pass
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@dataclass
|
|
189
|
+
class OutlookDownloader(Downloader):
|
|
190
|
+
connector_type: str = CONNECTOR_TYPE
|
|
191
|
+
connection_config: OutlookConnectionConfig
|
|
192
|
+
download_config: OutlookDownloaderConfig = field(default_factory=OutlookDownloaderConfig)
|
|
193
|
+
|
|
194
|
+
def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
|
|
195
|
+
# NOTE: Indexer should provide source identifiers required to generate the download path
|
|
196
|
+
download_path = self.get_download_path(file_data)
|
|
197
|
+
if download_path is None:
|
|
198
|
+
logger.error(
|
|
199
|
+
"Generated download path is None, source_identifiers might be missing"
|
|
200
|
+
"from FileData."
|
|
201
|
+
)
|
|
202
|
+
raise ValueError("Generated invalid download path.")
|
|
203
|
+
|
|
204
|
+
self._download_message(file_data, download_path)
|
|
205
|
+
return self.generate_download_response(file_data, download_path)
|
|
206
|
+
|
|
207
|
+
def is_async(self) -> bool:
|
|
208
|
+
return False
|
|
209
|
+
|
|
210
|
+
def _download_message(self, file_data: FileData, download_path: Path) -> None:
|
|
211
|
+
# NOTE: Indexer should supply the record locator in metadata
|
|
212
|
+
if (
|
|
213
|
+
file_data.metadata.record_locator is None
|
|
214
|
+
or "user_email" not in file_data.metadata.record_locator
|
|
215
|
+
or "message_id" not in file_data.metadata.record_locator
|
|
216
|
+
):
|
|
217
|
+
logger.error(
|
|
218
|
+
f"Invalid record locator in metadata: {file_data.metadata.record_locator}."
|
|
219
|
+
"Keys 'user_email' and 'message_id' must be present."
|
|
220
|
+
)
|
|
221
|
+
raise ValueError("Invalid record locator.")
|
|
222
|
+
|
|
223
|
+
user_email = file_data.metadata.record_locator["user_email"]
|
|
224
|
+
message_id = file_data.metadata.record_locator["message_id"]
|
|
225
|
+
|
|
226
|
+
message = self.connection_config.get_client().users[user_email].messages[message_id]
|
|
227
|
+
download_path.parent.mkdir(exist_ok=True, parents=True)
|
|
228
|
+
|
|
229
|
+
with open(download_path, "wb") as file:
|
|
230
|
+
message.download(file).execute_query()
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
outlook_source_entry = SourceRegistryEntry(
|
|
234
|
+
indexer=OutlookIndexer,
|
|
235
|
+
indexer_config=OutlookIndexerConfig,
|
|
236
|
+
downloader=OutlookDownloader,
|
|
237
|
+
downloader_config=OutlookDownloaderConfig,
|
|
238
|
+
connection_config=OutlookConnectionConfig,
|
|
239
|
+
)
|