unstructured-ingest 0.0.6__tar.gz → 0.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- {unstructured-ingest-0.0.6/unstructured_ingest.egg-info → unstructured-ingest-0.0.7}/PKG-INFO +1 -1
- unstructured-ingest-0.0.7/unstructured_ingest/__version__.py +1 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/interfaces/processor.py +6 -1
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/interfaces/uploader.py +9 -4
- unstructured-ingest-0.0.7/unstructured_ingest/v2/otel.py +111 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/interfaces.py +61 -28
- unstructured-ingest-0.0.7/unstructured_ingest/v2/pipeline/otel.py +32 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/pipeline.py +11 -7
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/steps/index.py +2 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/steps/upload.py +7 -19
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/astradb.py +3 -8
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +4 -9
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/chroma.py +3 -8
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/couchbase.py +5 -9
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/databricks_volumes.py +9 -10
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/elasticsearch.py +4 -7
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/fsspec/azure.py +3 -3
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/fsspec/box.py +3 -3
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +3 -3
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +4 -6
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +3 -3
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/fsspec/s3.py +2 -3
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +3 -3
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/kdbai.py +7 -8
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/local.py +15 -22
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/milvus.py +2 -14
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/mongodb.py +3 -8
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/pinecone.py +6 -24
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/singlestore.py +6 -6
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/sql.py +5 -7
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/weaviate.py +4 -11
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/partitioner.py +8 -1
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7/unstructured_ingest.egg-info}/PKG-INFO +1 -1
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest.egg-info/SOURCES.txt +2 -1
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest.egg-info/requires.txt +334 -270
- unstructured-ingest-0.0.6/unstructured_ingest/__version__.py +0 -1
- unstructured-ingest-0.0.6/unstructured_ingest/v2/example.py +0 -37
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/LICENSE.md +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/README.md +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/pyproject.toml +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/setup.cfg +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/setup.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/test/test_chunking_utils.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/test/test_error.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/test/test_interfaces.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/test/test_logger.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/test/test_utils.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/test/test_utils_v2.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/base/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/base/cmd.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/base/dest.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/base/src.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cli.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmd_factory.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/airtable.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/astradb.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/azure_cognitive_search.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/biomed.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/chroma.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/clarifai.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/confluence.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/databricks_volumes.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/delta_table.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/discord.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/elasticsearch.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/fsspec/azure.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/fsspec/box.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/fsspec/s3.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/github.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/gitlab.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/google_drive.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/hubspot.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/jira.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/kafka.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/local.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/mongodb.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/notion.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/onedrive.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/opensearch.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/outlook.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/pinecone.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/qdrant.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/reddit.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/salesforce.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/sharepoint.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/slack.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/sql.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/vectara.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/weaviate.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/cmds/wikipedia.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/common.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/interfaces.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/cli/utils.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/airtable.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/astradb.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/azure_cognitive_search.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/biomed.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/chroma.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/clarifai.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/confluence.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/databricks_volumes.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/delta_table.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/discord.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/elasticsearch.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/fsspec/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/fsspec/azure.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/fsspec/box.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/fsspec/dropbox.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/fsspec/fsspec.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/fsspec/gcs.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/fsspec/s3.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/fsspec/sftp.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/git.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/github.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/gitlab.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/google_drive.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/hubspot.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/jira.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/kafka.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/local.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/mongodb.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/client.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/connector.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/helpers.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/interfaces.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/block.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/callout.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/child_page.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/code.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/divider.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/embed.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/equation.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/file.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/heading.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/image.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/link_to_page.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/paragraph.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/pdf.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/quote.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/table.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/template.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/todo.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/toggle.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/blocks/video.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/created_by.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/created_time.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/date.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/email.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/files.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/number.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/people.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/rich_text.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/rollup.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/select.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/status.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/title.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/url.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/database_properties/verification.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/date.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/file.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/page.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/parent.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/rich_text.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/notion/types/user.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/onedrive.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/opensearch.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/outlook.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/pinecone.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/qdrant.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/reddit.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/registry.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/salesforce.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/sharepoint.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/slack.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/sql.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/vectara.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/weaviate.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/connector/wikipedia.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/enhanced_dataclass/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/enhanced_dataclass/core.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/error.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/evaluate.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/ingest_backoff/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/ingest_backoff/_common.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/ingest_backoff/_wrapper.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/interfaces.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/logger.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/main.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/pipeline/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/pipeline/copy.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/pipeline/doc_factory.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/pipeline/interfaces.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/pipeline/partition.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/pipeline/permissions.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/pipeline/pipeline.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/pipeline/reformat/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/pipeline/reformat/chunking.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/pipeline/reformat/embedding.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/pipeline/source.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/pipeline/utils.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/pipeline/write.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/processor.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/airtable.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/astradb.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/base_runner.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/biomed.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/confluence.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/delta_table.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/discord.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/elasticsearch.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/fsspec/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/fsspec/azure.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/fsspec/box.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/fsspec/dropbox.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/fsspec/fsspec.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/fsspec/gcs.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/fsspec/s3.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/fsspec/sftp.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/github.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/gitlab.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/google_drive.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/hubspot.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/jira.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/kafka.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/local.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/mongodb.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/notion.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/onedrive.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/opensearch.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/outlook.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/reddit.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/salesforce.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/sharepoint.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/slack.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/utils.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/wikipedia.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/astradb.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/azure_cognitive_search.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/base_writer.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/chroma.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/clarifai.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/databricks_volumes.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/delta_table.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/elasticsearch.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/fsspec/azure.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/fsspec/box.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/fsspec/gcs.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/fsspec/s3.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/kafka.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/mongodb.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/opensearch.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/pinecone.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/qdrant.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/sql.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/vectara.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/runner/writers/weaviate.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/utils/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/utils/chunking.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/utils/compression.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/utils/data_prep.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/utils/dep_check.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/utils/google_filetype.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/utils/table.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/cli/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/cli/base/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/cli/base/cmd.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/cli/base/dest.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/cli/base/importer.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/cli/base/src.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/cli/cli.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/cli/cmds.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/cli/utils/click.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/cli/utils/model_conversion.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/interfaces/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/interfaces/connector.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/interfaces/downloader.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/interfaces/file_data.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/interfaces/indexer.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/interfaces/process.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/interfaces/upload_stager.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/logger.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/main.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/steps/chunk.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/steps/download.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/steps/embed.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/steps/filter.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/steps/partition.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/steps/stage.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/steps/uncompress.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/chunker.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connector_registry.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/fsspec/utils.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/google_drive.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/onedrive.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/opensearch.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/salesforce.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/sharepoint.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/connectors/utils.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/embedder.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/filter.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/processes/uncompress.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/utils.py +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest.egg-info/dependency_links.txt +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest.egg-info/entry_points.txt +0 -0
- {unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.7" # pragma: no cover
|
|
@@ -27,9 +27,14 @@ class ProcessorConfig(BaseModel):
|
|
|
27
27
|
re_download: bool = False
|
|
28
28
|
uncompress: bool = False
|
|
29
29
|
|
|
30
|
+
# OTEL support
|
|
31
|
+
otel_endpoint: Optional[str] = Field(
|
|
32
|
+
default=None, description="OTEL endpoint to publish trace data to"
|
|
33
|
+
)
|
|
34
|
+
|
|
30
35
|
# Used to keep track of state in pipeline
|
|
31
36
|
status: dict = Field(default_factory=dict)
|
|
32
|
-
semaphore: Optional[Semaphore] = Field(init=False, default=None)
|
|
37
|
+
semaphore: Optional[Semaphore] = Field(init=False, default=None, exclude=True)
|
|
33
38
|
|
|
34
39
|
def model_post_init(self, __context: Any) -> None:
|
|
35
40
|
if self.max_connections is not None:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from abc import ABC
|
|
1
|
+
from abc import ABC
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any, TypeVar
|
|
@@ -31,9 +31,14 @@ class Uploader(BaseProcess, BaseConnector, ABC):
|
|
|
31
31
|
def is_async(self) -> bool:
|
|
32
32
|
return False
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
def is_batch(self) -> bool:
|
|
35
|
+
return False
|
|
36
|
+
|
|
37
|
+
def run_batch(self, contents: list[UploadContent], **kwargs: Any) -> None:
|
|
38
|
+
raise NotImplementedError()
|
|
39
|
+
|
|
40
|
+
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
41
|
+
raise NotImplementedError()
|
|
37
42
|
|
|
38
43
|
async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
39
44
|
return self.run(contents=[UploadContent(path=path, file_data=file_data)], **kwargs)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Callable, ClassVar, Optional, Protocol, Sequence
|
|
4
|
+
|
|
5
|
+
from opentelemetry import trace
|
|
6
|
+
from opentelemetry.context import attach, get_current
|
|
7
|
+
from opentelemetry.propagate import extract, inject
|
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
|
|
9
|
+
from opentelemetry.sdk.trace import ReadableSpan, Tracer, TracerProvider
|
|
10
|
+
from opentelemetry.sdk.trace.export import (
|
|
11
|
+
ConsoleSpanExporter,
|
|
12
|
+
SimpleSpanProcessor,
|
|
13
|
+
SpanExportResult,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
from unstructured_ingest.v2.logger import logger
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AddTraceCallable(Protocol):
|
|
20
|
+
def __call__(self, provider: TracerProvider) -> None:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LogSpanExporter(ConsoleSpanExporter):
|
|
25
|
+
def __init__(self, log_out: Callable = logger.info, **kwargs):
|
|
26
|
+
self.log_out = log_out
|
|
27
|
+
super().__init__(**kwargs)
|
|
28
|
+
|
|
29
|
+
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
|
|
30
|
+
for span in spans:
|
|
31
|
+
self.log_out(self.formatter(span))
|
|
32
|
+
return SpanExportResult.SUCCESS
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class OtelHandler:
|
|
37
|
+
otel_endpoint: Optional[str] = None
|
|
38
|
+
service_name: str = "unstructured-ingest"
|
|
39
|
+
trace_provider: TracerProvider = field(init=False)
|
|
40
|
+
log_out: Callable = field(default=logger.info)
|
|
41
|
+
trace_context_key: ClassVar[str] = "_trace_context"
|
|
42
|
+
|
|
43
|
+
def init_trace(self):
|
|
44
|
+
# Should only be done once
|
|
45
|
+
resource = Resource(attributes={SERVICE_NAME: self.service_name})
|
|
46
|
+
trace_provider = self.init_trace_provider(resource=resource)
|
|
47
|
+
trace.set_tracer_provider(trace_provider)
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def set_attributes(span, attributes_dict):
|
|
51
|
+
if attributes_dict:
|
|
52
|
+
for att in attributes_dict:
|
|
53
|
+
span.set_attribute(att, attributes_dict[att])
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def inject_context() -> dict:
|
|
57
|
+
trace_context = {}
|
|
58
|
+
current_context = get_current()
|
|
59
|
+
inject(trace_context, current_context)
|
|
60
|
+
return trace_context
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def attach_context(trace_context: dict) -> object:
|
|
64
|
+
extracted_context = extract(trace_context)
|
|
65
|
+
return attach(extracted_context)
|
|
66
|
+
|
|
67
|
+
def get_otel_endpoint(self) -> Optional[str]:
|
|
68
|
+
if otel_endpoint := self.otel_endpoint:
|
|
69
|
+
return otel_endpoint
|
|
70
|
+
if otlp_endpoint := os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT"):
|
|
71
|
+
return otlp_endpoint
|
|
72
|
+
if otlp_traces_endpoint := os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"):
|
|
73
|
+
return otlp_traces_endpoint
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
def _add_console_trace_processor(self, provider: TracerProvider) -> None:
|
|
77
|
+
def custom_formatter(span: ReadableSpan) -> str:
|
|
78
|
+
duration = (span.end_time - span.start_time) / 1e9
|
|
79
|
+
s = f"{span.name} finished in {duration}s"
|
|
80
|
+
if span.attributes:
|
|
81
|
+
attributes_str = ", ".join([f"{k}={v}" for k, v in span.attributes.items()])
|
|
82
|
+
s += f", attributes: {attributes_str}"
|
|
83
|
+
return s
|
|
84
|
+
|
|
85
|
+
tracer_exporter = LogSpanExporter(formatter=custom_formatter, log_out=self.log_out)
|
|
86
|
+
processor = SimpleSpanProcessor(tracer_exporter)
|
|
87
|
+
provider.add_span_processor(span_processor=processor)
|
|
88
|
+
|
|
89
|
+
def _add_otel_trace_processor(self, provider: TracerProvider) -> None:
|
|
90
|
+
otel_endpoint = self.get_otel_endpoint()
|
|
91
|
+
if not otel_endpoint:
|
|
92
|
+
return None
|
|
93
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
|
94
|
+
|
|
95
|
+
logger.debug(f"Adding otel exported at {otel_endpoint}")
|
|
96
|
+
trace_exporter = OTLPSpanExporter()
|
|
97
|
+
processor = SimpleSpanProcessor(trace_exporter)
|
|
98
|
+
provider.add_span_processor(processor)
|
|
99
|
+
|
|
100
|
+
def init_trace_provider(self, resource: Resource) -> TracerProvider:
|
|
101
|
+
trace_provider = TracerProvider(resource=resource)
|
|
102
|
+
add_fns: list[AddTraceCallable] = [
|
|
103
|
+
self._add_otel_trace_processor,
|
|
104
|
+
self._add_console_trace_processor,
|
|
105
|
+
]
|
|
106
|
+
for add_fn in add_fns:
|
|
107
|
+
add_fn(provider=trace_provider)
|
|
108
|
+
return trace_provider
|
|
109
|
+
|
|
110
|
+
def get_tracer(self) -> Tracer:
|
|
111
|
+
return trace.get_tracer(self.service_name)
|
|
@@ -1,40 +1,24 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import logging
|
|
3
3
|
import multiprocessing as mp
|
|
4
|
-
from abc import ABC
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
5
|
from concurrent.futures import ThreadPoolExecutor
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from functools import wraps
|
|
8
7
|
from pathlib import Path
|
|
9
|
-
from time import time
|
|
10
8
|
from typing import Any, Awaitable, Callable, Optional, TypeVar
|
|
11
9
|
|
|
12
10
|
from tqdm import tqdm
|
|
13
11
|
from tqdm.asyncio import tqdm as tqdm_asyncio
|
|
14
12
|
|
|
15
|
-
from unstructured_ingest.v2.interfaces import BaseProcess, ProcessorConfig
|
|
13
|
+
from unstructured_ingest.v2.interfaces import BaseProcess, ProcessorConfig, Uploader
|
|
16
14
|
from unstructured_ingest.v2.logger import logger, make_default_logger
|
|
15
|
+
from unstructured_ingest.v2.otel import OtelHandler
|
|
16
|
+
from unstructured_ingest.v2.pipeline.otel import instrument
|
|
17
17
|
|
|
18
18
|
BaseProcessT = TypeVar("BaseProcessT", bound=BaseProcess)
|
|
19
19
|
iterable_input = list[dict[str, Any]]
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
def timed(func):
|
|
23
|
-
@wraps(func)
|
|
24
|
-
def time_it(self, *args, **kwargs):
|
|
25
|
-
start = time()
|
|
26
|
-
try:
|
|
27
|
-
return func(self, *args, **kwargs)
|
|
28
|
-
finally:
|
|
29
|
-
if func.__name__ == "__call__":
|
|
30
|
-
reported_name = f"{self.__class__.__name__} [cls]"
|
|
31
|
-
else:
|
|
32
|
-
reported_name = func.__name__
|
|
33
|
-
logger.info(f"{reported_name} took {time() - start} seconds")
|
|
34
|
-
|
|
35
|
-
return time_it
|
|
36
|
-
|
|
37
|
-
|
|
38
22
|
@dataclass
|
|
39
23
|
class PipelineStep(ABC):
|
|
40
24
|
process: BaseProcessT
|
|
@@ -97,9 +81,15 @@ class PipelineStep(ABC):
|
|
|
97
81
|
return self.process_serially(iterable)
|
|
98
82
|
with mp.Pool(
|
|
99
83
|
processes=self.context.num_processes,
|
|
100
|
-
initializer=self.
|
|
101
|
-
initargs=(
|
|
84
|
+
initializer=self._init_mp,
|
|
85
|
+
initargs=(
|
|
86
|
+
logging.DEBUG if self.context.verbose else logging.INFO,
|
|
87
|
+
self.context.otel_endpoint,
|
|
88
|
+
),
|
|
102
89
|
) as pool:
|
|
90
|
+
otel_context = OtelHandler.inject_context()
|
|
91
|
+
for iter in iterable:
|
|
92
|
+
iter[OtelHandler.trace_context_key] = otel_context
|
|
103
93
|
if self.context.tqdm:
|
|
104
94
|
return list(
|
|
105
95
|
tqdm(
|
|
@@ -115,11 +105,13 @@ class PipelineStep(ABC):
|
|
|
115
105
|
# Allow mapping of kwargs via multiprocessing map()
|
|
116
106
|
return self.run(**input_kwargs)
|
|
117
107
|
|
|
118
|
-
def
|
|
108
|
+
def _init_mp(self, log_level: int, endpoint: Optional[str] = None) -> None:
|
|
119
109
|
# Init logger for each spawned process when using multiprocessing pool
|
|
120
110
|
make_default_logger(level=log_level)
|
|
111
|
+
otel_handler = OtelHandler(otel_endpoint=endpoint, log_out=logger.debug)
|
|
112
|
+
otel_handler.init_trace()
|
|
121
113
|
|
|
122
|
-
@
|
|
114
|
+
@instrument()
|
|
123
115
|
def __call__(self, iterable: Optional[iterable_input] = None) -> Any:
|
|
124
116
|
iterable = iterable or []
|
|
125
117
|
if iterable:
|
|
@@ -141,9 +133,19 @@ class PipelineStep(ABC):
|
|
|
141
133
|
raise NotImplementedError
|
|
142
134
|
|
|
143
135
|
def run(self, _fn: Optional[Callable] = None, **kwargs: Any) -> Optional[Any]:
|
|
136
|
+
kwargs = kwargs.copy()
|
|
137
|
+
otel_handler = OtelHandler(otel_endpoint=self.context.otel_endpoint, log_out=logger.debug)
|
|
138
|
+
tracer = otel_handler.get_tracer()
|
|
139
|
+
if trace_context := kwargs.pop(otel_handler.trace_context_key, {}):
|
|
140
|
+
otel_handler.attach_context(trace_context=trace_context)
|
|
141
|
+
attributes = {}
|
|
142
|
+
if file_data_path := kwargs.get("file_data_path"):
|
|
143
|
+
attributes["file_id"] = Path(file_data_path).stem
|
|
144
144
|
try:
|
|
145
|
-
|
|
146
|
-
|
|
145
|
+
with tracer.start_as_current_span(self.identifier, record_exception=True) as span:
|
|
146
|
+
otel_handler.set_attributes(span, attributes)
|
|
147
|
+
fn = _fn or self.process.run
|
|
148
|
+
return self._run(fn=fn, **kwargs)
|
|
147
149
|
except Exception as e:
|
|
148
150
|
logger.error(f"Exception raised while running {self.identifier}", exc_info=e)
|
|
149
151
|
if "file_data_path" in kwargs:
|
|
@@ -153,9 +155,17 @@ class PipelineStep(ABC):
|
|
|
153
155
|
return None
|
|
154
156
|
|
|
155
157
|
async def run_async(self, _fn: Optional[Callable] = None, **kwargs: Any) -> Optional[Any]:
|
|
158
|
+
otel_handler = OtelHandler(otel_endpoint=self.context.otel_endpoint, log_out=logger.debug)
|
|
156
159
|
try:
|
|
157
|
-
|
|
158
|
-
|
|
160
|
+
attributes = {}
|
|
161
|
+
if file_data_path := kwargs.get("file_data_path"):
|
|
162
|
+
attributes["file_id"] = Path(file_data_path).stem
|
|
163
|
+
with otel_handler.get_tracer().start_as_current_span(
|
|
164
|
+
self.identifier, record_exception=True
|
|
165
|
+
) as span:
|
|
166
|
+
otel_handler.set_attributes(span, attributes)
|
|
167
|
+
fn = _fn or self.process.run_async
|
|
168
|
+
return await self._run_async(fn=fn, **kwargs)
|
|
159
169
|
except Exception as e:
|
|
160
170
|
logger.error(f"Exception raised while running {self.identifier}", exc_info=e)
|
|
161
171
|
if "file_data_path" in kwargs:
|
|
@@ -167,3 +177,26 @@ class PipelineStep(ABC):
|
|
|
167
177
|
@property
|
|
168
178
|
def cache_dir(self) -> Path:
|
|
169
179
|
return Path(self.context.work_dir) / self.identifier
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@dataclass
|
|
183
|
+
class BatchPipelineStep(PipelineStep, ABC):
|
|
184
|
+
process: Uploader
|
|
185
|
+
|
|
186
|
+
def __call__(self, iterable: Optional[iterable_input] = None) -> Any:
|
|
187
|
+
if self.context.mp_supported and self.process.is_batch():
|
|
188
|
+
return self.run_batch(contents=iterable)
|
|
189
|
+
super().__call__(iterable=iterable)
|
|
190
|
+
|
|
191
|
+
@abstractmethod
|
|
192
|
+
def _run_batch(self, contents: iterable_input, **kwargs) -> Any:
|
|
193
|
+
pass
|
|
194
|
+
|
|
195
|
+
def run_batch(self, contents: iterable_input, **kwargs) -> Any:
|
|
196
|
+
try:
|
|
197
|
+
return self._run_batch(contents=contents, **kwargs)
|
|
198
|
+
except Exception as e:
|
|
199
|
+
self.context.status[self.identifier] = {"step_error": str(e)}
|
|
200
|
+
if self.context.raise_on_error:
|
|
201
|
+
raise e
|
|
202
|
+
return None
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from functools import wraps
|
|
2
|
+
from typing import Callable, Optional
|
|
3
|
+
|
|
4
|
+
from unstructured_ingest.v2.logger import logger
|
|
5
|
+
from unstructured_ingest.v2.otel import OtelHandler
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def instrument(
|
|
9
|
+
span_name: Optional[str] = None,
|
|
10
|
+
record_exception: bool = True,
|
|
11
|
+
attributes: dict[str, str] = None,
|
|
12
|
+
log_out: Callable = logger.info,
|
|
13
|
+
) -> Callable[[Callable], Callable]:
|
|
14
|
+
def span_decorator(func: Callable) -> Callable:
|
|
15
|
+
def get_name(self) -> str:
|
|
16
|
+
if span_name:
|
|
17
|
+
return span_name
|
|
18
|
+
return f"{self.identifier} step"
|
|
19
|
+
|
|
20
|
+
@wraps(func)
|
|
21
|
+
def wrap_with_span(self, *args, **kwargs):
|
|
22
|
+
name = get_name(self=self)
|
|
23
|
+
otel_handler = OtelHandler(otel_endpoint=self.context.otel_endpoint, log_out=log_out)
|
|
24
|
+
with otel_handler.get_tracer().start_as_current_span(
|
|
25
|
+
name, record_exception=record_exception
|
|
26
|
+
) as span:
|
|
27
|
+
otel_handler.set_attributes(span, attributes)
|
|
28
|
+
return func(self, *args, **kwargs)
|
|
29
|
+
|
|
30
|
+
return wrap_with_span
|
|
31
|
+
|
|
32
|
+
return span_decorator
|
{unstructured-ingest-0.0.6 → unstructured-ingest-0.0.7}/unstructured_ingest/v2/pipeline/pipeline.py
RENAMED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import multiprocessing as mp
|
|
3
3
|
from dataclasses import InitVar, dataclass, field
|
|
4
|
-
from time import time
|
|
5
4
|
from typing import Any, Optional, Union
|
|
6
5
|
|
|
7
|
-
from unstructured_ingest.v2.interfaces import ProcessorConfig
|
|
6
|
+
from unstructured_ingest.v2.interfaces import ProcessorConfig, Uploader
|
|
8
7
|
from unstructured_ingest.v2.logger import logger, make_default_logger
|
|
8
|
+
from unstructured_ingest.v2.otel import OtelHandler
|
|
9
9
|
from unstructured_ingest.v2.pipeline.steps.chunk import Chunker, ChunkStep
|
|
10
10
|
from unstructured_ingest.v2.pipeline.steps.download import DownloaderT, DownloadStep
|
|
11
11
|
from unstructured_ingest.v2.pipeline.steps.embed import Embedder, EmbedStep
|
|
@@ -14,7 +14,7 @@ from unstructured_ingest.v2.pipeline.steps.index import IndexerT, IndexStep
|
|
|
14
14
|
from unstructured_ingest.v2.pipeline.steps.partition import Partitioner, PartitionStep
|
|
15
15
|
from unstructured_ingest.v2.pipeline.steps.stage import UploadStager, UploadStageStep
|
|
16
16
|
from unstructured_ingest.v2.pipeline.steps.uncompress import Uncompressor, UncompressStep
|
|
17
|
-
from unstructured_ingest.v2.pipeline.steps.upload import
|
|
17
|
+
from unstructured_ingest.v2.pipeline.steps.upload import UploadStep
|
|
18
18
|
from unstructured_ingest.v2.processes.chunker import ChunkerConfig
|
|
19
19
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
20
20
|
ConnectionConfig,
|
|
@@ -77,6 +77,8 @@ class Pipeline:
|
|
|
77
77
|
filterer: Filterer = None,
|
|
78
78
|
):
|
|
79
79
|
make_default_logger(level=logging.DEBUG if self.context.verbose else logging.INFO)
|
|
80
|
+
otel_handler = OtelHandler(otel_endpoint=self.context.otel_endpoint)
|
|
81
|
+
otel_handler.init_trace()
|
|
80
82
|
self.indexer_step = IndexStep(process=indexer, context=self.context)
|
|
81
83
|
self.downloader_step = DownloadStep(process=downloader, context=self.context)
|
|
82
84
|
self.filter_step = FilterStep(process=filterer, context=self.context) if filterer else None
|
|
@@ -121,11 +123,13 @@ class Pipeline:
|
|
|
121
123
|
logger.error(f"{k}: [{kk}] {vv}")
|
|
122
124
|
|
|
123
125
|
def run(self):
|
|
126
|
+
otel_handler = OtelHandler(otel_endpoint=self.context.otel_endpoint, log_out=logger.info)
|
|
124
127
|
try:
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
128
|
+
with otel_handler.get_tracer().start_as_current_span(
|
|
129
|
+
"ingest process", record_exception=True
|
|
130
|
+
):
|
|
131
|
+
self._run_prechecks()
|
|
132
|
+
self._run()
|
|
129
133
|
finally:
|
|
130
134
|
self.log_statuses()
|
|
131
135
|
self.cleanup()
|
|
@@ -6,6 +6,7 @@ from typing import Generator, Optional, TypeVar
|
|
|
6
6
|
from unstructured_ingest.v2.interfaces.indexer import Indexer
|
|
7
7
|
from unstructured_ingest.v2.logger import logger
|
|
8
8
|
from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
|
|
9
|
+
from unstructured_ingest.v2.pipeline.otel import instrument
|
|
9
10
|
from unstructured_ingest.v2.utils import serialize_base_model_json
|
|
10
11
|
|
|
11
12
|
IndexerT = TypeVar("IndexerT", bound=Indexer)
|
|
@@ -31,6 +32,7 @@ class IndexStep(PipelineStep):
|
|
|
31
32
|
f"connection configs: {connection_config}"
|
|
32
33
|
)
|
|
33
34
|
|
|
35
|
+
@instrument(span_name=STEP_ID)
|
|
34
36
|
def run(self) -> Generator[str, None, None]:
|
|
35
37
|
for file_data in self.process.run():
|
|
36
38
|
logger.debug(f"Generated file data: {file_data.to_dict()}")
|
|
@@ -4,9 +4,10 @@ from pathlib import Path
|
|
|
4
4
|
from typing import Callable, Optional, TypedDict
|
|
5
5
|
|
|
6
6
|
from unstructured_ingest.v2.interfaces import FileData
|
|
7
|
-
from unstructured_ingest.v2.interfaces.uploader import UploadContent
|
|
7
|
+
from unstructured_ingest.v2.interfaces.uploader import UploadContent
|
|
8
8
|
from unstructured_ingest.v2.logger import logger
|
|
9
|
-
from unstructured_ingest.v2.pipeline.interfaces import
|
|
9
|
+
from unstructured_ingest.v2.pipeline.interfaces import BatchPipelineStep
|
|
10
|
+
from unstructured_ingest.v2.pipeline.otel import instrument
|
|
10
11
|
|
|
11
12
|
STEP_ID = "upload"
|
|
12
13
|
|
|
@@ -17,8 +18,7 @@ class UploadStepContent(TypedDict):
|
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
@dataclass
|
|
20
|
-
class UploadStep(
|
|
21
|
-
process: Uploader
|
|
21
|
+
class UploadStep(BatchPipelineStep):
|
|
22
22
|
identifier: str = STEP_ID
|
|
23
23
|
|
|
24
24
|
def __str__(self):
|
|
@@ -34,25 +34,13 @@ class UploadStep(PipelineStep):
|
|
|
34
34
|
f"connection configs: {connection_config}"
|
|
35
35
|
)
|
|
36
36
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
@timed
|
|
41
|
-
def __call__(self, iterable: iterable_input):
|
|
42
|
-
logger.info(
|
|
43
|
-
f"Calling {self.__class__.__name__} " f"with {len(iterable)} docs", # type: ignore
|
|
44
|
-
)
|
|
45
|
-
if self.process.is_async():
|
|
46
|
-
self.process_async(iterable=iterable)
|
|
47
|
-
else:
|
|
48
|
-
self.process_whole(iterable=iterable)
|
|
49
|
-
|
|
50
|
-
def _run(self, fn: Callable, contents: list[UploadStepContent]):
|
|
37
|
+
@instrument(span_name=STEP_ID)
|
|
38
|
+
def _run_batch(self, contents: list[UploadStepContent]) -> None:
|
|
51
39
|
upload_contents = [
|
|
52
40
|
UploadContent(path=Path(c["path"]), file_data=FileData.from_file(c["file_data_path"]))
|
|
53
41
|
for c in contents
|
|
54
42
|
]
|
|
55
|
-
|
|
43
|
+
self.process.run_batch(contents=upload_contents)
|
|
56
44
|
|
|
57
45
|
async def _run_async(self, path: str, file_data_path: str, fn: Optional[Callable] = None):
|
|
58
46
|
fn = fn or self.process.run_async
|
|
@@ -14,7 +14,6 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
14
14
|
AccessConfig,
|
|
15
15
|
ConnectionConfig,
|
|
16
16
|
FileData,
|
|
17
|
-
UploadContent,
|
|
18
17
|
Uploader,
|
|
19
18
|
UploaderConfig,
|
|
20
19
|
UploadStager,
|
|
@@ -139,13 +138,9 @@ class AstraDBUploader(Uploader):
|
|
|
139
138
|
)
|
|
140
139
|
return astra_db_collection
|
|
141
140
|
|
|
142
|
-
def run(self,
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
with open(content.path) as elements_file:
|
|
146
|
-
elements = json.load(elements_file)
|
|
147
|
-
elements_dict.extend(elements)
|
|
148
|
-
|
|
141
|
+
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
142
|
+
with path.open("r") as file:
|
|
143
|
+
elements_dict = json.load(file)
|
|
149
144
|
logger.info(
|
|
150
145
|
f"writing {len(elements_dict)} objects to destination "
|
|
151
146
|
f"collection {self.upload_config.collection_name}"
|
|
@@ -12,7 +12,7 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
|
12
12
|
from unstructured_ingest.v2.interfaces import (
|
|
13
13
|
AccessConfig,
|
|
14
14
|
ConnectionConfig,
|
|
15
|
-
|
|
15
|
+
FileData,
|
|
16
16
|
Uploader,
|
|
17
17
|
UploaderConfig,
|
|
18
18
|
UploadStager,
|
|
@@ -192,14 +192,9 @@ class AzureCognitiveSearchUploader(Uploader):
|
|
|
192
192
|
def write_dict_wrapper(self, elements_dict):
|
|
193
193
|
return self.write_dict(elements_dict=elements_dict)
|
|
194
194
|
|
|
195
|
-
def run(self,
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
for content in contents:
|
|
199
|
-
with open(content.path) as elements_file:
|
|
200
|
-
elements = json.load(elements_file)
|
|
201
|
-
elements_dict.extend(elements)
|
|
202
|
-
|
|
195
|
+
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
196
|
+
with path.open("r") as file:
|
|
197
|
+
elements_dict = json.load(file)
|
|
203
198
|
logger.info(
|
|
204
199
|
f"writing document batches to destination"
|
|
205
200
|
f" endpoint at {str(self.connection_config.endpoint)}"
|
|
@@ -15,7 +15,6 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
15
15
|
AccessConfig,
|
|
16
16
|
ConnectionConfig,
|
|
17
17
|
FileData,
|
|
18
|
-
UploadContent,
|
|
19
18
|
Uploader,
|
|
20
19
|
UploaderConfig,
|
|
21
20
|
UploadStager,
|
|
@@ -186,13 +185,9 @@ class ChromaUploader(Uploader):
|
|
|
186
185
|
)
|
|
187
186
|
return chroma_dict
|
|
188
187
|
|
|
189
|
-
def run(self,
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
for content in contents:
|
|
193
|
-
with open(content.path) as elements_file:
|
|
194
|
-
elements = json.load(elements_file)
|
|
195
|
-
elements_dict.extend(elements)
|
|
188
|
+
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
189
|
+
with path.open("r") as file:
|
|
190
|
+
elements_dict = json.load(file)
|
|
196
191
|
|
|
197
192
|
logger.info(
|
|
198
193
|
f"writing {len(elements_dict)} objects to destination "
|
|
@@ -26,7 +26,6 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
26
26
|
FileDataSourceMetadata,
|
|
27
27
|
Indexer,
|
|
28
28
|
IndexerConfig,
|
|
29
|
-
UploadContent,
|
|
30
29
|
Uploader,
|
|
31
30
|
UploaderConfig,
|
|
32
31
|
UploadStager,
|
|
@@ -134,14 +133,11 @@ class CouchbaseUploader(Uploader):
|
|
|
134
133
|
logger.error(f"Failed to validate connection {e}", exc_info=True)
|
|
135
134
|
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
136
135
|
|
|
137
|
-
def run(self,
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
with open(content.path) as elements_file:
|
|
141
|
-
elements.extend(json.load(elements_file))
|
|
142
|
-
|
|
136
|
+
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
137
|
+
with path.open("r") as file:
|
|
138
|
+
elements_dict = json.load(file)
|
|
143
139
|
logger.info(
|
|
144
|
-
f"writing {len(
|
|
140
|
+
f"writing {len(elements_dict)} objects to destination "
|
|
145
141
|
f"bucket, {self.connection_config.bucket} "
|
|
146
142
|
f"at {self.connection_config.connection_string}",
|
|
147
143
|
)
|
|
@@ -150,7 +146,7 @@ class CouchbaseUploader(Uploader):
|
|
|
150
146
|
scope = bucket.scope(self.connection_config.scope)
|
|
151
147
|
collection = scope.collection(self.connection_config.collection)
|
|
152
148
|
|
|
153
|
-
for chunk in batch_generator(
|
|
149
|
+
for chunk in batch_generator(elements_dict, self.upload_config.batch_size):
|
|
154
150
|
collection.upsert_multi({doc_id: doc for doc in chunk for doc_id, doc in doc.items()})
|
|
155
151
|
|
|
156
152
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
from typing import TYPE_CHECKING, Any, Optional
|
|
4
5
|
|
|
5
6
|
from pydantic import Field, Secret
|
|
@@ -9,7 +10,7 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
|
9
10
|
from unstructured_ingest.v2.interfaces import (
|
|
10
11
|
AccessConfig,
|
|
11
12
|
ConnectionConfig,
|
|
12
|
-
|
|
13
|
+
FileData,
|
|
13
14
|
Uploader,
|
|
14
15
|
UploaderConfig,
|
|
15
16
|
)
|
|
@@ -142,15 +143,13 @@ class DatabricksVolumesUploader(Uploader):
|
|
|
142
143
|
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
143
144
|
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
144
145
|
|
|
145
|
-
def run(self,
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
overwrite=self.upload_config.overwrite,
|
|
153
|
-
)
|
|
146
|
+
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
147
|
+
output_path = os.path.join(self.upload_config.path, path.name)
|
|
148
|
+
self.get_client().files.upload(
|
|
149
|
+
file_path=output_path,
|
|
150
|
+
contents=path,
|
|
151
|
+
overwrite=self.upload_config.overwrite,
|
|
152
|
+
)
|
|
154
153
|
|
|
155
154
|
|
|
156
155
|
databricks_volumes_destination_entry = DestinationRegistryEntry(
|
|
@@ -26,7 +26,6 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
26
26
|
FileDataSourceMetadata,
|
|
27
27
|
Indexer,
|
|
28
28
|
IndexerConfig,
|
|
29
|
-
UploadContent,
|
|
30
29
|
Uploader,
|
|
31
30
|
UploaderConfig,
|
|
32
31
|
UploadStager,
|
|
@@ -384,14 +383,12 @@ class ElasticsearchUploader(Uploader):
|
|
|
384
383
|
|
|
385
384
|
return parallel_bulk
|
|
386
385
|
|
|
387
|
-
def run(self,
|
|
386
|
+
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
388
387
|
parallel_bulk = self.load_parallel_bulk()
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
with open(content.path) as elements_file:
|
|
392
|
-
elements = json.load(elements_file)
|
|
393
|
-
elements_dict.extend(elements)
|
|
388
|
+
with path.open("r") as file:
|
|
389
|
+
elements_dict = json.load(file)
|
|
394
390
|
upload_destination = self.connection_config.hosts or self.connection_config.cloud_id
|
|
391
|
+
|
|
395
392
|
logger.info(
|
|
396
393
|
f"writing {len(elements_dict)} elements via document batches to destination "
|
|
397
394
|
f"index named {self.upload_config.index_name} at {upload_destination} with "
|
|
@@ -7,7 +7,7 @@ from typing import Any, Generator, Optional
|
|
|
7
7
|
from pydantic import Field, Secret
|
|
8
8
|
|
|
9
9
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
10
|
-
from unstructured_ingest.v2.interfaces import DownloadResponse, FileData
|
|
10
|
+
from unstructured_ingest.v2.interfaces import DownloadResponse, FileData
|
|
11
11
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
12
12
|
DestinationRegistryEntry,
|
|
13
13
|
SourceRegistryEntry,
|
|
@@ -152,8 +152,8 @@ class AzureUploader(FsspecUploader):
|
|
|
152
152
|
super().precheck()
|
|
153
153
|
|
|
154
154
|
@requires_dependencies(["adlfs", "fsspec"], extras="azure")
|
|
155
|
-
def run(self,
|
|
156
|
-
return super().run(
|
|
155
|
+
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
156
|
+
return super().run(path=path, file_data=file_data, **kwargs)
|
|
157
157
|
|
|
158
158
|
@requires_dependencies(["adlfs", "fsspec"], extras="azure")
|
|
159
159
|
async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|