unstructured-ingest 0.0.2__tar.gz → 0.0.2.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/PKG-INFO +1 -1
- unstructured-ingest-0.0.2.dev0/unstructured_ingest/__version__.py +1 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/base/cmd.py +0 -10
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/base/src.py +0 -2
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py +9 -1
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/local.py +8 -0
- unstructured-ingest-0.0.2.dev0/unstructured_ingest/v2/cli/configs/__init__.py +6 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/interfaces/__init__.py +1 -2
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/interfaces/downloader.py +3 -9
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/interfaces/file_data.py +1 -6
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/interfaces/process.py +0 -3
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/pipeline/interfaces.py +5 -3
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/pipeline/pipeline.py +2 -72
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/pipeline/steps/download.py +13 -77
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/astra.py +0 -8
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +0 -8
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/chroma.py +6 -8
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/databricks_volumes.py +0 -9
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/elasticsearch.py +9 -23
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +27 -12
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/fsspec/s3.py +5 -13
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/google_drive.py +9 -13
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/local.py +15 -15
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/mongodb.py +4 -10
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/onedrive.py +2 -14
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/pinecone.py +3 -6
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/salesforce.py +8 -10
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/sharepoint.py +8 -14
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/sql.py +9 -24
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/weaviate.py +5 -13
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest.egg-info/PKG-INFO +1 -1
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest.egg-info/SOURCES.txt +0 -3
- unstructured-ingest-0.0.2/unstructured_ingest/__version__.py +0 -1
- unstructured-ingest-0.0.2/unstructured_ingest/v2/cli/configs/__init__.py +0 -13
- unstructured-ingest-0.0.2/unstructured_ingest/v2/cli/configs/filter.py +0 -28
- unstructured-ingest-0.0.2/unstructured_ingest/v2/pipeline/steps/filter.py +0 -40
- unstructured-ingest-0.0.2/unstructured_ingest/v2/processes/filter.py +0 -54
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/README.md +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/pyproject.toml +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/setup.cfg +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/setup.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/test/test_error.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/test/test_interfaces.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/test/test_logger.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/test/test_utils.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/base/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/base/cmd.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/base/dest.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/base/src.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cli.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmd_factory.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/airtable.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/astra.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/azure_cognitive_search.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/biomed.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/chroma.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/clarifai.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/confluence.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/databricks_volumes.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/delta_table.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/discord.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/elasticsearch.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/fsspec/azure.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/fsspec/box.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/fsspec/s3.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/github.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/gitlab.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/google_drive.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/hubspot.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/jira.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/kafka.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/local.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/mongodb.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/notion.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/onedrive.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/opensearch.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/outlook.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/pinecone.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/qdrant.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/reddit.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/salesforce.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/sharepoint.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/slack.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/sql.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/vectara.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/weaviate.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/cmds/wikipedia.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/common.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/interfaces.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/cli/utils.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/airtable.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/astra.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/azure_cognitive_search.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/biomed.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/chroma.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/clarifai.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/confluence.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/databricks_volumes.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/delta_table.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/discord.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/elasticsearch.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/fsspec/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/fsspec/azure.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/fsspec/box.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/fsspec/dropbox.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/fsspec/fsspec.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/fsspec/gcs.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/fsspec/s3.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/fsspec/sftp.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/git.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/github.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/gitlab.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/google_drive.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/hubspot.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/jira.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/kafka.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/local.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/mongodb.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/client.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/connector.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/helpers.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/interfaces.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/block.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/callout.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/child_page.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/code.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/divider.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/embed.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/equation.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/file.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/heading.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/image.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/link_to_page.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/paragraph.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/pdf.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/quote.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/table.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/template.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/todo.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/toggle.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/blocks/video.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/created_by.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/created_time.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/date.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/email.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/files.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/number.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/people.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/rich_text.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/rollup.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/select.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/status.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/title.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/url.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/database_properties/verification.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/date.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/file.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/page.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/parent.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/rich_text.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/notion/types/user.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/onedrive.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/opensearch.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/outlook.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/pinecone.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/qdrant.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/reddit.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/registry.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/salesforce.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/sharepoint.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/slack.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/sql.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/vectara.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/weaviate.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/connector/wikipedia.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/enhanced_dataclass/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/enhanced_dataclass/core.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/error.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/evaluate.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/ingest_backoff/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/ingest_backoff/_common.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/ingest_backoff/_wrapper.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/interfaces.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/logger.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/main.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/pipeline/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/pipeline/copy.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/pipeline/doc_factory.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/pipeline/interfaces.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/pipeline/partition.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/pipeline/permissions.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/pipeline/pipeline.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/pipeline/reformat/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/pipeline/reformat/chunking.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/pipeline/reformat/embedding.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/pipeline/source.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/pipeline/utils.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/pipeline/write.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/processor.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/airtable.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/astra.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/base_runner.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/biomed.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/confluence.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/delta_table.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/discord.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/elasticsearch.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/fsspec/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/fsspec/azure.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/fsspec/box.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/fsspec/dropbox.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/fsspec/fsspec.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/fsspec/gcs.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/fsspec/s3.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/fsspec/sftp.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/github.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/gitlab.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/google_drive.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/hubspot.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/jira.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/kafka.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/local.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/mongodb.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/notion.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/onedrive.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/opensearch.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/outlook.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/reddit.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/salesforce.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/sharepoint.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/slack.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/utils.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/wikipedia.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/astra.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/azure_cognitive_search.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/base_writer.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/chroma.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/clarifai.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/databricks_volumes.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/delta_table.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/elasticsearch.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/fsspec/azure.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/fsspec/box.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/fsspec/gcs.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/fsspec/s3.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/kafka.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/mongodb.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/opensearch.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/pinecone.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/qdrant.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/sql.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/vectara.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/runner/writers/weaviate.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/utils/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/utils/compression.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/utils/data_prep.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/utils/dep_check.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/utils/table.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/base/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/base/dest.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/base/importer.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cli.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/astra.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/azure_cognitive_search.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/chroma.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/databricks_volumes.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/elasticsearch.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/fsspec/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/fsspec/azure.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/fsspec/box.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/fsspec/dropbox.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/fsspec/gcs.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/fsspec/s3.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/fsspec/sftp.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/google_drive.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/milvus.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/mongodb.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/onedrive.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/opensearch.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/pinecone.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/salesforce.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/sharepoint.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/singlestore.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/sql.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/cmds/weaviate.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/configs/chunk.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/configs/embed.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/configs/partition.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/configs/processor.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/interfaces.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/utils.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/example.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/interfaces/connector.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/interfaces/indexer.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/interfaces/processor.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/interfaces/upload_stager.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/interfaces/uploader.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/logger.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/main.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/pipeline/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/pipeline/steps/chunk.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/pipeline/steps/embed.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/pipeline/steps/index.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/pipeline/steps/partition.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/pipeline/steps/stage.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/pipeline/steps/uncompress.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/pipeline/steps/upload.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/pipeline/utils.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/chunker.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connector_registry.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/fsspec/azure.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/fsspec/box.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/fsspec/utils.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/milvus.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/opensearch.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/singlestore.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/connectors/utils.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/embedder.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/partitioner.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/processes/uncompress.py +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest.egg-info/dependency_links.txt +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest.egg-info/entry_points.txt +0 -0
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest.egg-info/requires.txt +12 -12
- {unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.2-dev0" # pragma: no cover
|
{unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/base/cmd.py
RENAMED
|
@@ -24,7 +24,6 @@ from unstructured_ingest.v2.processes.connector_registry import (
|
|
|
24
24
|
)
|
|
25
25
|
from unstructured_ingest.v2.processes.connectors.local import LocalUploader, LocalUploaderConfig
|
|
26
26
|
from unstructured_ingest.v2.processes.embedder import Embedder, EmbedderConfig
|
|
27
|
-
from unstructured_ingest.v2.processes.filter import Filterer, FiltererConfig
|
|
28
27
|
from unstructured_ingest.v2.processes.partitioner import Partitioner, PartitionerConfig
|
|
29
28
|
|
|
30
29
|
CommandT = TypeVar("CommandT", bound=click.Command)
|
|
@@ -76,8 +75,6 @@ class BaseCmd(ABC):
|
|
|
76
75
|
}
|
|
77
76
|
if chunker := self.get_chunker(options=source_options):
|
|
78
77
|
pipeline_kwargs["chunker"] = chunker
|
|
79
|
-
if filterer := self.get_filterer(options=source_options):
|
|
80
|
-
pipeline_kwargs["filterer"] = filterer
|
|
81
78
|
if embedder := self.get_embeder(options=source_options):
|
|
82
79
|
pipeline_kwargs["embedder"] = embedder
|
|
83
80
|
if dest:
|
|
@@ -108,13 +105,6 @@ class BaseCmd(ABC):
|
|
|
108
105
|
return None
|
|
109
106
|
return Chunker(config=chunker_config)
|
|
110
107
|
|
|
111
|
-
@staticmethod
|
|
112
|
-
def get_filterer(options: dict[str, Any]) -> Optional[Filterer]:
|
|
113
|
-
filterer_configs = extract_config(flat_data=options, config=FiltererConfig)
|
|
114
|
-
if not filterer_configs.to_dict():
|
|
115
|
-
return None
|
|
116
|
-
return Filterer(config=filterer_configs)
|
|
117
|
-
|
|
118
108
|
@staticmethod
|
|
119
109
|
def get_embeder(options: dict[str, Any]) -> Optional[Embedder]:
|
|
120
110
|
embedder_config = extract_config(flat_data=options, config=EmbedderConfig)
|
{unstructured-ingest-0.0.2 → unstructured-ingest-0.0.2.dev0}/unstructured_ingest/v2/cli/base/src.py
RENAMED
|
@@ -8,7 +8,6 @@ from unstructured_ingest.v2.cli.base.cmd import BaseCmd
|
|
|
8
8
|
from unstructured_ingest.v2.cli.configs import (
|
|
9
9
|
ChunkerCliConfig,
|
|
10
10
|
EmbedderCliConfig,
|
|
11
|
-
FilterCliConfig,
|
|
12
11
|
PartitionerCliConfig,
|
|
13
12
|
ProcessorCliConfig,
|
|
14
13
|
)
|
|
@@ -27,7 +26,6 @@ class SrcCmd(BaseCmd):
|
|
|
27
26
|
ProcessorCliConfig,
|
|
28
27
|
PartitionerCliConfig,
|
|
29
28
|
EmbedderCliConfig,
|
|
30
|
-
FilterCliConfig,
|
|
31
29
|
ChunkerCliConfig,
|
|
32
30
|
]
|
|
33
31
|
)
|
|
@@ -3,6 +3,7 @@ from dataclasses import dataclass
|
|
|
3
3
|
import click
|
|
4
4
|
|
|
5
5
|
from unstructured_ingest.v2.cli.interfaces import CliConfig
|
|
6
|
+
from unstructured_ingest.v2.cli.utils import DelimitedString
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
@dataclass
|
|
@@ -13,7 +14,7 @@ class FsspecCliDownloadConfig(CliConfig):
|
|
|
13
14
|
click.Option(
|
|
14
15
|
["--download-dir"],
|
|
15
16
|
help="Where files are downloaded to, defaults to a location at"
|
|
16
|
-
"`$HOME/.cache/
|
|
17
|
+
"`$HOME/.cache/unstructured_ingest/<connector name>/<SHA256>`.",
|
|
17
18
|
),
|
|
18
19
|
]
|
|
19
20
|
|
|
@@ -64,6 +65,13 @@ class FsspecCliIndexerConfig(FsspecCliFileConfig):
|
|
|
64
65
|
help="Recursively download files in their respective folders "
|
|
65
66
|
"otherwise stop at the files in provided folder level.",
|
|
66
67
|
),
|
|
68
|
+
click.Option(
|
|
69
|
+
["--file-glob"],
|
|
70
|
+
default=None,
|
|
71
|
+
type=DelimitedString(),
|
|
72
|
+
help="A comma-separated list of file globs to limit which types of "
|
|
73
|
+
"local files are accepted, e.g. '*.html,*.txt'",
|
|
74
|
+
),
|
|
67
75
|
]
|
|
68
76
|
)
|
|
69
77
|
return options
|
|
@@ -4,6 +4,7 @@ import click
|
|
|
4
4
|
|
|
5
5
|
from unstructured_ingest.v2.cli.base import DestCmd, SrcCmd
|
|
6
6
|
from unstructured_ingest.v2.cli.interfaces import CliConfig
|
|
7
|
+
from unstructured_ingest.v2.cli.utils import DelimitedString
|
|
7
8
|
from unstructured_ingest.v2.processes.connectors.local import CONNECTOR_TYPE
|
|
8
9
|
|
|
9
10
|
|
|
@@ -18,6 +19,13 @@ class LocalCliIndexerConfig(CliConfig):
|
|
|
18
19
|
type=click.Path(file_okay=True, dir_okay=True, exists=True),
|
|
19
20
|
help="Path to the location in the local file system that will be processed.",
|
|
20
21
|
),
|
|
22
|
+
click.Option(
|
|
23
|
+
["--file-glob"],
|
|
24
|
+
default=None,
|
|
25
|
+
type=DelimitedString(),
|
|
26
|
+
help="A comma-separated list of file globs to limit which types of "
|
|
27
|
+
"local files are accepted, e.g. '*.html,*.txt'",
|
|
28
|
+
),
|
|
21
29
|
click.Option(
|
|
22
30
|
["--recursive"],
|
|
23
31
|
is_flag=True,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from .connector import AccessConfig, BaseConnector, ConnectionConfig
|
|
2
2
|
from .downloader import Downloader, DownloaderConfig, DownloadResponse, download_responses
|
|
3
|
-
from .file_data import FileData,
|
|
3
|
+
from .file_data import FileData, SourceIdentifiers
|
|
4
4
|
from .indexer import Indexer, IndexerConfig
|
|
5
5
|
from .process import BaseProcess
|
|
6
6
|
from .processor import ProcessorConfig
|
|
@@ -26,5 +26,4 @@ __all__ = [
|
|
|
26
26
|
"AccessConfig",
|
|
27
27
|
"ConnectionConfig",
|
|
28
28
|
"BaseConnector",
|
|
29
|
-
"FileDataSourceMetadata",
|
|
30
29
|
]
|
|
@@ -30,15 +30,6 @@ class Downloader(BaseProcess, BaseConnector, ABC):
|
|
|
30
30
|
connector_type: str
|
|
31
31
|
download_config: DownloaderConfigT
|
|
32
32
|
|
|
33
|
-
def get_download_path(self, file_data: FileData) -> Optional[Path]:
|
|
34
|
-
if not file_data.source_identifiers:
|
|
35
|
-
return None
|
|
36
|
-
rel_path = file_data.source_identifiers.relative_path
|
|
37
|
-
if not rel_path:
|
|
38
|
-
return None
|
|
39
|
-
rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path
|
|
40
|
-
return self.download_dir / Path(rel_path)
|
|
41
|
-
|
|
42
33
|
@staticmethod
|
|
43
34
|
def is_float(value: str):
|
|
44
35
|
try:
|
|
@@ -77,6 +68,9 @@ class Downloader(BaseProcess, BaseConnector, ABC):
|
|
|
77
68
|
def is_async(self) -> bool:
|
|
78
69
|
return True
|
|
79
70
|
|
|
71
|
+
def get_download_path(self, file_data: FileData) -> Optional[Path]:
|
|
72
|
+
return None
|
|
73
|
+
|
|
80
74
|
@abstractmethod
|
|
81
75
|
def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
|
|
82
76
|
pass
|
|
@@ -22,18 +22,13 @@ class SourceIdentifiers:
|
|
|
22
22
|
return self.rel_path or self.fullpath
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
@dataclass
|
|
26
|
-
class FileDataSourceMetadata(DataSourceMetadata):
|
|
27
|
-
filesize_bytes: Optional[int] = None
|
|
28
|
-
|
|
29
|
-
|
|
30
25
|
@dataclass
|
|
31
26
|
class FileData(DataClassJsonMixin):
|
|
32
27
|
identifier: str
|
|
33
28
|
connector_type: str
|
|
34
29
|
source_identifiers: Optional[SourceIdentifiers] = None
|
|
35
30
|
doc_type: Literal["file", "batch"] = field(default="file")
|
|
36
|
-
metadata:
|
|
31
|
+
metadata: DataSourceMetadata = field(default_factory=DataSourceMetadata)
|
|
37
32
|
additional_metadata: dict[str, Any] = field(default_factory=dict)
|
|
38
33
|
reprocess: bool = False
|
|
39
34
|
|
|
@@ -92,7 +92,7 @@ class PipelineStep(ABC):
|
|
|
92
92
|
|
|
93
93
|
if iterable:
|
|
94
94
|
if len(iterable) == 1:
|
|
95
|
-
return self.process_serially(iterable)
|
|
95
|
+
return [self.process_serially(iterable)]
|
|
96
96
|
if self.context.num_processes == 1:
|
|
97
97
|
return self.process_serially(iterable)
|
|
98
98
|
with mp.Pool(
|
|
@@ -126,8 +126,6 @@ class PipelineStep(ABC):
|
|
|
126
126
|
logger.info(
|
|
127
127
|
f"Calling {self.__class__.__name__} " f"with {len(iterable)} docs", # type: ignore
|
|
128
128
|
)
|
|
129
|
-
else:
|
|
130
|
-
logger.info(f"Calling {self.__class__.__name__} with no inputs")
|
|
131
129
|
if self.context.async_supported and self.process.is_async():
|
|
132
130
|
return self.process_async(iterable=iterable)
|
|
133
131
|
if self.context.mp_supported:
|
|
@@ -148,6 +146,8 @@ class PipelineStep(ABC):
|
|
|
148
146
|
logger.error(f"Exception raised while running {self.identifier}", exc_info=e)
|
|
149
147
|
if "file_data_path" in kwargs:
|
|
150
148
|
self.context.status[kwargs["file_data_path"]] = {self.identifier: str(e)}
|
|
149
|
+
else:
|
|
150
|
+
self.context.status[self.identifier] = {"step_error": str(e)}
|
|
151
151
|
if self.context.raise_on_error:
|
|
152
152
|
raise e
|
|
153
153
|
return None
|
|
@@ -160,6 +160,8 @@ class PipelineStep(ABC):
|
|
|
160
160
|
logger.error(f"Exception raised while running {self.identifier}", exc_info=e)
|
|
161
161
|
if "file_data_path" in kwargs:
|
|
162
162
|
self.context.status[kwargs["file_data_path"]] = {self.identifier: str(e)}
|
|
163
|
+
else:
|
|
164
|
+
self.context.status[self.identifier] = {"step_error": str(e)}
|
|
163
165
|
if self.context.raise_on_error:
|
|
164
166
|
raise e
|
|
165
167
|
return None
|
|
@@ -9,7 +9,6 @@ from unstructured_ingest.v2.logger import logger, make_default_logger
|
|
|
9
9
|
from unstructured_ingest.v2.pipeline.steps.chunk import Chunker, ChunkStep
|
|
10
10
|
from unstructured_ingest.v2.pipeline.steps.download import DownloaderT, DownloadStep
|
|
11
11
|
from unstructured_ingest.v2.pipeline.steps.embed import Embedder, EmbedStep
|
|
12
|
-
from unstructured_ingest.v2.pipeline.steps.filter import Filterer, FilterStep
|
|
13
12
|
from unstructured_ingest.v2.pipeline.steps.index import IndexerT, IndexStep
|
|
14
13
|
from unstructured_ingest.v2.pipeline.steps.partition import Partitioner, PartitionStep
|
|
15
14
|
from unstructured_ingest.v2.pipeline.steps.stage import UploadStager, UploadStageStep
|
|
@@ -28,7 +27,6 @@ from unstructured_ingest.v2.processes.connector_registry import (
|
|
|
28
27
|
)
|
|
29
28
|
from unstructured_ingest.v2.processes.connectors.local import LocalUploader
|
|
30
29
|
from unstructured_ingest.v2.processes.embedder import EmbedderConfig
|
|
31
|
-
from unstructured_ingest.v2.processes.filter import FiltererConfig
|
|
32
30
|
from unstructured_ingest.v2.processes.partitioner import PartitionerConfig
|
|
33
31
|
|
|
34
32
|
|
|
@@ -39,33 +37,22 @@ class PipelineError(Exception):
|
|
|
39
37
|
@dataclass
|
|
40
38
|
class Pipeline:
|
|
41
39
|
context: ProcessorConfig
|
|
42
|
-
|
|
43
40
|
indexer: InitVar[IndexerT]
|
|
44
41
|
indexer_step: IndexStep = field(init=False)
|
|
45
|
-
|
|
46
42
|
downloader: InitVar[DownloaderT]
|
|
47
43
|
downloader_step: DownloadStep = field(init=False)
|
|
48
|
-
|
|
49
44
|
partitioner: InitVar[Partitioner]
|
|
50
45
|
partitioner_step: PartitionStep = field(init=False)
|
|
51
|
-
|
|
52
46
|
chunker: InitVar[Optional[Chunker]] = None
|
|
53
47
|
chunker_step: ChunkStep = field(init=False, default=None)
|
|
54
|
-
|
|
55
48
|
embedder: InitVar[Optional[Embedder]] = None
|
|
56
49
|
embedder_step: EmbedStep = field(init=False, default=None)
|
|
57
|
-
|
|
58
50
|
stager: InitVar[Optional[UploadStager]] = None
|
|
59
51
|
stager_step: UploadStageStep = field(init=False, default=None)
|
|
60
|
-
|
|
61
52
|
uploader: InitVar[Uploader] = field(default=LocalUploader())
|
|
62
53
|
uploader_step: UploadStep = field(init=False, default=None)
|
|
63
|
-
|
|
64
54
|
uncompress_step: UncompressStep = field(init=False, default=None)
|
|
65
55
|
|
|
66
|
-
filterer: InitVar[Optional[Filterer]] = None
|
|
67
|
-
filter_step: FilterStep = field(init=False, default=None)
|
|
68
|
-
|
|
69
56
|
def __post_init__(
|
|
70
57
|
self,
|
|
71
58
|
indexer: IndexerT,
|
|
@@ -75,12 +62,10 @@ class Pipeline:
|
|
|
75
62
|
embedder: Embedder = None,
|
|
76
63
|
stager: UploadStager = None,
|
|
77
64
|
uploader: Uploader = None,
|
|
78
|
-
filterer: Filterer = None,
|
|
79
65
|
):
|
|
80
66
|
make_default_logger(level=logging.DEBUG if self.context.verbose else logging.INFO)
|
|
81
67
|
self.indexer_step = IndexStep(process=indexer, context=self.context)
|
|
82
68
|
self.downloader_step = DownloadStep(process=downloader, context=self.context)
|
|
83
|
-
self.filter_step = FilterStep(process=filterer, context=self.context) if filterer else None
|
|
84
69
|
self.partitioner_step = PartitionStep(process=partitioner, context=self.context)
|
|
85
70
|
self.chunker_step = ChunkStep(process=chunker, context=self.context) if chunker else None
|
|
86
71
|
|
|
@@ -124,7 +109,6 @@ class Pipeline:
|
|
|
124
109
|
def run(self):
|
|
125
110
|
try:
|
|
126
111
|
start_time = time()
|
|
127
|
-
self._run_prechecks()
|
|
128
112
|
self._run()
|
|
129
113
|
logger.info(f"Finished ingest process in {time() - start_time}s")
|
|
130
114
|
finally:
|
|
@@ -146,37 +130,6 @@ class Pipeline:
|
|
|
146
130
|
final = [f for f in flat if f]
|
|
147
131
|
return final or None
|
|
148
132
|
|
|
149
|
-
def _run_prechecks(self):
|
|
150
|
-
steps = [self.indexer_step, self.downloader_step, self.partitioner_step, self.uploader_step]
|
|
151
|
-
if self.chunker_step:
|
|
152
|
-
steps.append(self.chunker_step)
|
|
153
|
-
if self.embedder_step:
|
|
154
|
-
steps.append(self.embedder_step)
|
|
155
|
-
if self.uncompress_step:
|
|
156
|
-
steps.append(self.uncompress_step)
|
|
157
|
-
if self.stager_step:
|
|
158
|
-
steps.append(self.stager_step)
|
|
159
|
-
failures = {}
|
|
160
|
-
for step in steps:
|
|
161
|
-
try:
|
|
162
|
-
step.process.precheck()
|
|
163
|
-
except Exception as e:
|
|
164
|
-
failures[step.process.__class__.__name__] = f"[{type(e).__name__}] {e}"
|
|
165
|
-
if failures:
|
|
166
|
-
for k, v in failures.items():
|
|
167
|
-
logger.error(f"Step precheck failure: {k}: {v}")
|
|
168
|
-
raise PipelineError("Precheck failed")
|
|
169
|
-
|
|
170
|
-
def apply_filter(self, records: list[dict]) -> list[dict]:
|
|
171
|
-
if not self.filter_step:
|
|
172
|
-
return records
|
|
173
|
-
data_to_filter = [{"file_data_path": i["file_data_path"]} for i in records]
|
|
174
|
-
filtered_data = self.filter_step(data_to_filter)
|
|
175
|
-
filtered_data = [f for f in filtered_data if f is not None]
|
|
176
|
-
filtered_file_data_paths = [r["file_data_path"] for r in filtered_data]
|
|
177
|
-
filtered_records = [r for r in records if r["file_data_path"] in filtered_file_data_paths]
|
|
178
|
-
return filtered_records
|
|
179
|
-
|
|
180
133
|
def _run(self):
|
|
181
134
|
logger.info(
|
|
182
135
|
f"Running local pipline: {self} with configs: "
|
|
@@ -194,33 +147,18 @@ class Pipeline:
|
|
|
194
147
|
if not indices_inputs:
|
|
195
148
|
return
|
|
196
149
|
|
|
197
|
-
# Initial filtering on indexed content
|
|
198
|
-
indices_inputs = self.apply_filter(records=indices_inputs)
|
|
199
|
-
if not indices_inputs:
|
|
200
|
-
return
|
|
201
|
-
|
|
202
150
|
# Download associated content to local file system
|
|
203
151
|
downloaded_data = self.downloader_step(indices_inputs)
|
|
204
152
|
downloaded_data = self.clean_results(results=downloaded_data)
|
|
205
153
|
if not downloaded_data:
|
|
206
154
|
return
|
|
207
155
|
|
|
208
|
-
# Post download filtering
|
|
209
|
-
downloaded_data = self.apply_filter(records=downloaded_data)
|
|
210
|
-
if not downloaded_data:
|
|
211
|
-
return
|
|
212
|
-
|
|
213
156
|
# Run uncompress if available
|
|
214
157
|
if self.uncompress_step:
|
|
215
158
|
downloaded_data = self.uncompress_step(downloaded_data)
|
|
216
159
|
# Flatten list of lists
|
|
217
160
|
downloaded_data = self.clean_results(results=downloaded_data)
|
|
218
161
|
|
|
219
|
-
# Post uncompress filtering
|
|
220
|
-
downloaded_data = self.apply_filter(records=downloaded_data)
|
|
221
|
-
if not downloaded_data:
|
|
222
|
-
return
|
|
223
|
-
|
|
224
162
|
if not downloaded_data:
|
|
225
163
|
return
|
|
226
164
|
|
|
@@ -241,14 +179,9 @@ class Pipeline:
|
|
|
241
179
|
self.uploader_step(iterable=elements)
|
|
242
180
|
|
|
243
181
|
def __str__(self):
|
|
244
|
-
s = [str(self.indexer_step)]
|
|
245
|
-
if filter_step := self.filter_step:
|
|
246
|
-
s.append(str(filter_step))
|
|
247
|
-
s.append(str(self.downloader_step))
|
|
248
|
-
if filter_step := self.filter_step:
|
|
249
|
-
s.append(str(filter_step))
|
|
182
|
+
s = [str(self.indexer_step), str(self.downloader_step)]
|
|
250
183
|
if uncompress_step := self.uncompress_step:
|
|
251
|
-
s.
|
|
184
|
+
s.append(str(uncompress_step))
|
|
252
185
|
s.append(str(self.partitioner_step))
|
|
253
186
|
if chunker_step := self.chunker_step:
|
|
254
187
|
s.append(str(chunker_step))
|
|
@@ -267,7 +200,6 @@ class Pipeline:
|
|
|
267
200
|
downloader_config: DownloaderConfigT,
|
|
268
201
|
source_connection_config: ConnectionConfig,
|
|
269
202
|
partitioner_config: PartitionerConfig,
|
|
270
|
-
filterer_config: FiltererConfig = None,
|
|
271
203
|
chunker_config: Optional[ChunkerConfig] = None,
|
|
272
204
|
embedder_config: Optional[EmbedderConfig] = None,
|
|
273
205
|
destination_connection_config: Optional[ConnectionConfig] = None,
|
|
@@ -303,8 +235,6 @@ class Pipeline:
|
|
|
303
235
|
),
|
|
304
236
|
"partitioner": Partitioner(config=partitioner_config),
|
|
305
237
|
}
|
|
306
|
-
if filterer_config:
|
|
307
|
-
pipeline_kwargs["filterer"] = Filterer(config=filterer_config)
|
|
308
238
|
if chunker_config:
|
|
309
239
|
pipeline_kwargs["chunker"] = Chunker(config=chunker_config)
|
|
310
240
|
if embedder_config:
|
|
@@ -2,7 +2,6 @@ import asyncio
|
|
|
2
2
|
import hashlib
|
|
3
3
|
import json
|
|
4
4
|
from dataclasses import dataclass
|
|
5
|
-
from pathlib import Path
|
|
6
5
|
from typing import Callable, Optional, TypedDict, TypeVar
|
|
7
6
|
|
|
8
7
|
from unstructured_ingest.v2.interfaces import FileData, download_responses
|
|
@@ -71,40 +70,11 @@ class DownloadStep(PipelineStep):
|
|
|
71
70
|
return True
|
|
72
71
|
return False
|
|
73
72
|
|
|
74
|
-
def update_file_data(
|
|
75
|
-
self, file_data: FileData, file_data_path: Path, download_path: Path
|
|
76
|
-
) -> None:
|
|
77
|
-
file_size_bytes = download_path.stat().st_size
|
|
78
|
-
changed = False
|
|
79
|
-
if not file_data.metadata.filesize_bytes and file_size_bytes:
|
|
80
|
-
changed = True
|
|
81
|
-
file_data.metadata.filesize_bytes = file_size_bytes
|
|
82
|
-
if (
|
|
83
|
-
file_data.metadata.filesize_bytes
|
|
84
|
-
and file_data.metadata.filesize_bytes != file_size_bytes
|
|
85
|
-
):
|
|
86
|
-
logger.warning(
|
|
87
|
-
f"file size in original file data "
|
|
88
|
-
f"({file_data.metadata.filesize_bytes}) doesn't "
|
|
89
|
-
f"match size of local file: {file_size_bytes}, updating"
|
|
90
|
-
)
|
|
91
|
-
changed = True
|
|
92
|
-
file_data.metadata.filesize_bytes = file_size_bytes
|
|
93
|
-
if changed:
|
|
94
|
-
logger.debug(f"Updating file data with new content: {file_data.to_dict()}")
|
|
95
|
-
with file_data_path.open("w") as file:
|
|
96
|
-
json.dump(file_data.to_dict(), file, indent=2)
|
|
97
|
-
|
|
98
73
|
async def _run_async(self, fn: Callable, file_data_path: str) -> list[DownloadStepResponse]:
|
|
99
74
|
file_data = FileData.from_file(path=file_data_path)
|
|
100
75
|
download_path = self.process.get_download_path(file_data=file_data)
|
|
101
76
|
if not self.should_download(file_data=file_data, file_data_path=file_data_path):
|
|
102
77
|
logger.debug(f"Skipping download, file already exists locally: {download_path}")
|
|
103
|
-
self.update_file_data(
|
|
104
|
-
file_data=file_data,
|
|
105
|
-
file_data_path=Path(file_data_path),
|
|
106
|
-
download_path=download_path,
|
|
107
|
-
)
|
|
108
78
|
return [DownloadStepResponse(file_data_path=file_data_path, path=str(download_path))]
|
|
109
79
|
fn_kwargs = {"file_data": file_data}
|
|
110
80
|
if not asyncio.iscoroutinefunction(fn):
|
|
@@ -115,60 +85,26 @@ class DownloadStep(PipelineStep):
|
|
|
115
85
|
else:
|
|
116
86
|
download_results = await fn(**fn_kwargs)
|
|
117
87
|
return self.create_step_results(
|
|
118
|
-
current_file_data_path=file_data_path,
|
|
119
|
-
download_results=download_results,
|
|
120
|
-
current_file_data=file_data,
|
|
88
|
+
current_file_data_path=file_data_path, download_results=download_results
|
|
121
89
|
)
|
|
122
90
|
|
|
123
91
|
def create_step_results(
|
|
124
|
-
self,
|
|
125
|
-
current_file_data_path: str,
|
|
126
|
-
current_file_data: FileData,
|
|
127
|
-
download_results: download_responses,
|
|
92
|
+
self, current_file_data_path: str, download_results: download_responses
|
|
128
93
|
) -> list[DownloadStepResponse]:
|
|
129
|
-
responses = []
|
|
130
94
|
if not isinstance(download_results, list):
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
if download_results["file_data"].identifier == current_file_data.identifier:
|
|
135
|
-
self.update_file_data(
|
|
136
|
-
file_data=file_data,
|
|
137
|
-
file_data_path=Path(file_data_path),
|
|
138
|
-
download_path=download_path,
|
|
139
|
-
)
|
|
140
|
-
responses = [
|
|
141
|
-
DownloadStepResponse(file_data_path=file_data_path, path=str(download_path))
|
|
142
|
-
]
|
|
143
|
-
else:
|
|
144
|
-
file_data = download_results["file_data"]
|
|
145
|
-
file_data_path = self.persist_new_file_data(file_data=file_data)
|
|
146
|
-
self.update_file_data(
|
|
147
|
-
file_data=file_data,
|
|
148
|
-
file_data_path=Path(file_data_path),
|
|
149
|
-
download_path=download_path,
|
|
95
|
+
return [
|
|
96
|
+
DownloadStepResponse(
|
|
97
|
+
file_data_path=current_file_data_path, path=str(download_results["path"])
|
|
150
98
|
)
|
|
151
|
-
|
|
152
|
-
DownloadStepResponse(
|
|
153
|
-
file_data_path=current_file_data_path, path=str(download_results["path"])
|
|
154
|
-
)
|
|
155
|
-
]
|
|
156
|
-
else:
|
|
99
|
+
]
|
|
157
100
|
# Supplemental results generated as part of the download process
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
download_path=download_path,
|
|
166
|
-
)
|
|
167
|
-
responses.append(
|
|
168
|
-
DownloadStepResponse(file_data_path=file_data_path, path=res["path"])
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
return responses
|
|
101
|
+
download_step_results = []
|
|
102
|
+
for res in download_results:
|
|
103
|
+
file_data_path = self.persist_new_file_data(file_data=res["file_data"])
|
|
104
|
+
download_step_results.append(
|
|
105
|
+
DownloadStepResponse(file_data_path=file_data_path, path=res["path"])
|
|
106
|
+
)
|
|
107
|
+
return download_step_results
|
|
172
108
|
|
|
173
109
|
def persist_new_file_data(self, file_data: FileData) -> str:
|
|
174
110
|
record_hash = self.get_hash(extras=[file_data.identifier])
|
|
@@ -7,7 +7,6 @@ from unstructured import __name__ as integration_name
|
|
|
7
7
|
from unstructured.__version__ import __version__ as integration_version
|
|
8
8
|
|
|
9
9
|
from unstructured_ingest.enhanced_dataclass import enhanced_field
|
|
10
|
-
from unstructured_ingest.error import DestinationConnectionError
|
|
11
10
|
from unstructured_ingest.utils.data_prep import batch_generator
|
|
12
11
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
12
|
from unstructured_ingest.v2.interfaces import (
|
|
@@ -95,13 +94,6 @@ class AstraUploader(Uploader):
|
|
|
95
94
|
upload_config: AstraUploaderConfig
|
|
96
95
|
connector_type: str = CONNECTOR_TYPE
|
|
97
96
|
|
|
98
|
-
def precheck(self) -> None:
|
|
99
|
-
try:
|
|
100
|
-
self.get_collection()
|
|
101
|
-
except Exception as e:
|
|
102
|
-
logger.error(f"Failed to validate connection {e}", exc_info=True)
|
|
103
|
-
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
104
|
-
|
|
105
97
|
@requires_dependencies(["astrapy"], extras="astra")
|
|
106
98
|
def get_collection(self) -> "AstraDBCollection":
|
|
107
99
|
from astrapy.db import AstraDB
|
|
@@ -175,14 +175,6 @@ class AzureCognitiveSearchUploader(Uploader):
|
|
|
175
175
|
),
|
|
176
176
|
)
|
|
177
177
|
|
|
178
|
-
def precheck(self) -> None:
|
|
179
|
-
try:
|
|
180
|
-
client = self.connection_config.generate_client()
|
|
181
|
-
client.get_document_count()
|
|
182
|
-
except Exception as e:
|
|
183
|
-
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
184
|
-
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
185
|
-
|
|
186
178
|
def write_dict_wrapper(self, elements_dict):
|
|
187
179
|
return self.write_dict(elements_dict=elements_dict)
|
|
188
180
|
|
|
@@ -111,13 +111,10 @@ class ChromaUploader(Uploader):
|
|
|
111
111
|
connector_type: str = CONNECTOR_TYPE
|
|
112
112
|
upload_config: ChromaUploaderConfig
|
|
113
113
|
connection_config: ChromaConnectionConfig
|
|
114
|
+
client: Optional["Client"] = field(init=False)
|
|
114
115
|
|
|
115
|
-
def
|
|
116
|
-
|
|
117
|
-
self.create_client()
|
|
118
|
-
except Exception as e:
|
|
119
|
-
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
120
|
-
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
116
|
+
def __post_init__(self):
|
|
117
|
+
self.client = self.create_client()
|
|
121
118
|
|
|
122
119
|
@requires_dependencies(["chromadb"], extras="chroma")
|
|
123
120
|
def create_client(self) -> "Client":
|
|
@@ -190,9 +187,10 @@ class ChromaUploader(Uploader):
|
|
|
190
187
|
f"collection {self.connection_config.collection_name} "
|
|
191
188
|
f"at {self.connection_config.host}",
|
|
192
189
|
)
|
|
193
|
-
client = self.create_client()
|
|
194
190
|
|
|
195
|
-
collection = client.get_or_create_collection(
|
|
191
|
+
collection = self.client.get_or_create_collection(
|
|
192
|
+
name=self.connection_config.collection_name
|
|
193
|
+
)
|
|
196
194
|
for chunk in batch_generator(elements_dict, self.upload_config.batch_size):
|
|
197
195
|
self.upsert_batch(collection, self.prepare_chroma_list(chunk))
|
|
198
196
|
|
|
@@ -3,7 +3,6 @@ from dataclasses import dataclass, field
|
|
|
3
3
|
from typing import TYPE_CHECKING, Any, Optional
|
|
4
4
|
|
|
5
5
|
from unstructured_ingest.enhanced_dataclass import enhanced_field
|
|
6
|
-
from unstructured_ingest.error import DestinationConnectionError
|
|
7
6
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
8
7
|
from unstructured_ingest.v2.interfaces import (
|
|
9
8
|
AccessConfig,
|
|
@@ -12,7 +11,6 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
12
11
|
Uploader,
|
|
13
12
|
UploaderConfig,
|
|
14
13
|
)
|
|
15
|
-
from unstructured_ingest.v2.logger import logger
|
|
16
14
|
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
17
15
|
|
|
18
16
|
if TYPE_CHECKING:
|
|
@@ -80,13 +78,6 @@ class DatabricksVolumesUploader(Uploader):
|
|
|
80
78
|
host=self.connection_config.host, **self.connection_config.access_config.to_dict()
|
|
81
79
|
)
|
|
82
80
|
|
|
83
|
-
def precheck(self) -> None:
|
|
84
|
-
try:
|
|
85
|
-
assert self.client.current_user.me().active
|
|
86
|
-
except Exception as e:
|
|
87
|
-
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
88
|
-
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
89
|
-
|
|
90
81
|
def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
|
|
91
82
|
for content in contents:
|
|
92
83
|
with open(content.path, "rb") as elements_file:
|