unstructured-ingest 1.0.11__tar.gz → 1.0.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/PKG-INFO +1 -1
- unstructured_ingest-1.0.12/unstructured_ingest/__version__.py +1 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/google_drive.py +155 -63
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +35 -5
- unstructured_ingest-1.0.11/unstructured_ingest/__version__.py +0 -1
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/.gitignore +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/LICENSE.md +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/README.md +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/pyproject.toml +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/README.md +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/base/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/base/cmd.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/base/dest.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/base/importer.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/base/src.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/cli.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/cmds.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/utils/click.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/data_types/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/data_types/entities.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/data_types/file_data.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/azure_openai.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/bedrock.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/huggingface.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/interfaces.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/mixedbreadai.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/octoai.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/openai.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/togetherai.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/vertexai.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/voyageai.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/error.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/errors_v2.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/interfaces/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/interfaces/connector.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/interfaces/downloader.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/interfaces/indexer.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/interfaces/process.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/interfaces/processor.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/interfaces/upload_stager.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/interfaces/uploader.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/logger.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/main.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/otel.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/interfaces.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/otel.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/pipeline.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/steps/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/steps/download.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/steps/embed.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/steps/filter.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/steps/index.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/steps/partition.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/steps/stage.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/steps/upload.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/chunker.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connector_registry.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/airtable.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/astradb.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/azure_ai_search.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/chroma.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/confluence.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/databricks/volumes.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/delta_table.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/discord.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/duckdb/base.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/fsspec/s3.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/github.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/gitlab.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/jira.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/kdbai.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/local.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/milvus.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/mongodb.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/neo4j.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/connector.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/onedrive.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/outlook.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/pinecone.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/redisdb.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/salesforce.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/sharepoint.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/slack.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/sql/postgres.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/sql/singlestore.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/sql/snowflake.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/sql/sql.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/sql/sqlite.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/sql/vastdb.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/utils.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/vectara.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/embedder.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/filter.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/partitioner.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/uncompress.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/utils/blob_storage.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/unstructured_api.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/utils/chunking.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/utils/compression.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/utils/constants.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/utils/data_prep.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/utils/dep_check.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/utils/html.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/utils/ndjson.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/utils/pydantic_models.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
- {unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/utils/table.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.12" # pragma: no cover
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import io
|
|
2
1
|
import json
|
|
3
2
|
from contextlib import contextmanager
|
|
4
3
|
from dataclasses import dataclass, field
|
|
@@ -16,7 +15,6 @@ from unstructured_ingest.data_types.file_data import (
|
|
|
16
15
|
)
|
|
17
16
|
from unstructured_ingest.error import (
|
|
18
17
|
SourceConnectionError,
|
|
19
|
-
SourceConnectionNetworkError,
|
|
20
18
|
)
|
|
21
19
|
from unstructured_ingest.interfaces import (
|
|
22
20
|
AccessConfig,
|
|
@@ -34,25 +32,34 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
|
34
32
|
|
|
35
33
|
if TYPE_CHECKING:
|
|
36
34
|
from googleapiclient.discovery import Resource as GoogleAPIResource
|
|
37
|
-
from googleapiclient.http import MediaIoBaseDownload
|
|
38
35
|
|
|
39
36
|
CONNECTOR_TYPE = "google_drive"
|
|
40
37
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
"application/vnd.google-apps.
|
|
45
|
-
|
|
46
|
-
"application/vnd.google-apps.
|
|
47
|
-
|
|
48
|
-
"application/vnd.google-apps.
|
|
38
|
+
|
|
39
|
+
# Maps Google-native Drive MIME types → export MIME types
|
|
40
|
+
GOOGLE_EXPORT_MIME_MAP = {
|
|
41
|
+
"application/vnd.google-apps.document": \
|
|
42
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
43
|
+
"application/vnd.google-apps.spreadsheet": \
|
|
44
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
45
|
+
"application/vnd.google-apps.presentation": \
|
|
46
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# Maps export MIME types → file extensions
|
|
50
|
+
EXPORT_EXTENSION_MAP = {
|
|
51
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
|
52
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
|
53
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
|
|
54
|
+
"application/pdf": ".pdf",
|
|
55
|
+
"text/html": ".html",
|
|
49
56
|
}
|
|
50
57
|
|
|
51
58
|
|
|
52
59
|
class GoogleDriveAccessConfig(AccessConfig):
|
|
53
|
-
service_account_key: Optional[
|
|
54
|
-
|
|
55
|
-
)
|
|
60
|
+
service_account_key: Optional[
|
|
61
|
+
Annotated[dict, BeforeValidator(conform_string_to_dict)]
|
|
62
|
+
] = Field(default=None, description="Credentials values to use for authentication")
|
|
56
63
|
service_account_key_path: Optional[Path] = Field(
|
|
57
64
|
default=None,
|
|
58
65
|
description="File path to credentials values to use for authentication",
|
|
@@ -171,10 +178,14 @@ class GoogleDriveIndexer(Indexer):
|
|
|
171
178
|
Please enable it in the Google Cloud Console."
|
|
172
179
|
)
|
|
173
180
|
else:
|
|
174
|
-
raise SourceConnectionError(
|
|
181
|
+
raise SourceConnectionError(
|
|
182
|
+
"Google drive API unreachable for an unknown reason!"
|
|
183
|
+
)
|
|
175
184
|
|
|
176
185
|
@staticmethod
|
|
177
|
-
def count_files_recursively(
|
|
186
|
+
def count_files_recursively(
|
|
187
|
+
files_client, folder_id: str, extensions: list[str] = None
|
|
188
|
+
) -> int:
|
|
178
189
|
"""
|
|
179
190
|
Count non-folder files recursively under the given folder.
|
|
180
191
|
If `extensions` is provided, only count files
|
|
@@ -255,7 +266,9 @@ class GoogleDriveIndexer(Indexer):
|
|
|
255
266
|
# that the service account has proper permissions."
|
|
256
267
|
# )
|
|
257
268
|
else:
|
|
258
|
-
logger.info(
|
|
269
|
+
logger.info(
|
|
270
|
+
f"Found {file_count} files recursively in the folder."
|
|
271
|
+
)
|
|
259
272
|
else:
|
|
260
273
|
# Non-recursive: check for at least one immediate non-folder child.
|
|
261
274
|
response = client.list(
|
|
@@ -306,7 +319,9 @@ class GoogleDriveIndexer(Indexer):
|
|
|
306
319
|
date_modified_str = root_info.pop("modifiedTime", None)
|
|
307
320
|
parent_path = root_info.pop("parent_path", None)
|
|
308
321
|
parent_root_path = root_info.pop("parent_root_path", None)
|
|
309
|
-
date_modified_dt =
|
|
322
|
+
date_modified_dt = (
|
|
323
|
+
parser.parse(date_modified_str) if date_modified_str else None
|
|
324
|
+
)
|
|
310
325
|
if (
|
|
311
326
|
parent_path
|
|
312
327
|
and isinstance(parent_path, str)
|
|
@@ -406,7 +421,9 @@ class GoogleDriveIndexer(Indexer):
|
|
|
406
421
|
) -> list[FileData]:
|
|
407
422
|
root_info = self.get_root_info(files_client=files_client, object_id=object_id)
|
|
408
423
|
if not self.is_dir(root_info):
|
|
409
|
-
root_info["permissions"] = self.extract_permissions(
|
|
424
|
+
root_info["permissions"] = self.extract_permissions(
|
|
425
|
+
root_info.get("permissions")
|
|
426
|
+
)
|
|
410
427
|
data = [self.map_file_data(root_info)]
|
|
411
428
|
else:
|
|
412
429
|
file_contents = self.get_paginated_results(
|
|
@@ -427,7 +444,7 @@ class GoogleDriveIndexer(Indexer):
|
|
|
427
444
|
def extract_permissions(self, permissions: Optional[list[dict]]) -> list[dict]:
|
|
428
445
|
if not permissions:
|
|
429
446
|
logger.debug("no permissions found")
|
|
430
|
-
return {}
|
|
447
|
+
return [{}]
|
|
431
448
|
|
|
432
449
|
# https://developers.google.com/workspace/drive/api/guides/ref-roles
|
|
433
450
|
role_mapping = {
|
|
@@ -478,61 +495,136 @@ class GoogleDriveDownloaderConfig(DownloaderConfig):
|
|
|
478
495
|
|
|
479
496
|
@dataclass
|
|
480
497
|
class GoogleDriveDownloader(Downloader):
|
|
498
|
+
"""
|
|
499
|
+
Downloads files from Google Drive using authenticated direct HTTP requests
|
|
500
|
+
via `exportLinks` (for Google-native files) and `webContentLink` (for binary files).
|
|
501
|
+
|
|
502
|
+
These links emulate the behavior of Google Drive's "File > Download as..." options
|
|
503
|
+
in the UI and bypass the size limitations of `files.export()`.
|
|
504
|
+
|
|
505
|
+
Behavior:
|
|
506
|
+
- Google-native formats are downloaded using `exportLinks` in appropriate MIME formats.
|
|
507
|
+
- Binary files (non-Google-native) are downloaded using `webContentLink`.
|
|
508
|
+
- All downloads are performed via `requests.get()` using a valid bearer token.
|
|
509
|
+
"""
|
|
510
|
+
|
|
481
511
|
connection_config: GoogleDriveConnectionConfig
|
|
482
512
|
download_config: GoogleDriveDownloaderConfig = field(
|
|
483
513
|
default_factory=lambda: GoogleDriveDownloaderConfig()
|
|
484
514
|
)
|
|
485
515
|
connector_type: str = CONNECTOR_TYPE
|
|
486
516
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
return downloaded
|
|
517
|
+
def _get_download_url_and_ext(
|
|
518
|
+
self, file_id: str, mime_type: str
|
|
519
|
+
) -> tuple[str, str]:
|
|
520
|
+
"""
|
|
521
|
+
Resolves the appropriate download URL and expected file extension for a Google Drive file.
|
|
493
522
|
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
497
|
-
logger.debug(f"writing {file_data.source_identifiers.fullpath} to {download_path}")
|
|
498
|
-
with open(download_path, "wb") as handler:
|
|
499
|
-
handler.write(file_contents.getbuffer())
|
|
500
|
-
return self.generate_download_response(file_data=file_data, download_path=download_path)
|
|
523
|
+
- Google-native files use export MIME types from exportLinks (e.g., .docx, .xlsx).
|
|
524
|
+
- Binary files use webContentLink (e.g., uploaded PDFs or ZIPs).
|
|
501
525
|
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
from googleapiclient.http import MediaIoBaseDownload
|
|
526
|
+
Returns:
|
|
527
|
+
Tuple[str, str]: (download URL, file extension or "")
|
|
505
528
|
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
if not mime_type:
|
|
510
|
-
raise TypeError(
|
|
511
|
-
f"File not supported. Name: {file_data.source_identifiers.filename} "
|
|
512
|
-
f"ID: {record_id} "
|
|
513
|
-
f"MimeType: {mime_type}"
|
|
514
|
-
)
|
|
529
|
+
Raises:
|
|
530
|
+
SourceConnectionError: If no valid export or download link is available.
|
|
531
|
+
"""
|
|
515
532
|
with self.connection_config.get_client() as client:
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
533
|
+
metadata = client.get(
|
|
534
|
+
fileId=file_id, fields="exportLinks,webContentLink"
|
|
535
|
+
).execute()
|
|
536
|
+
|
|
537
|
+
export_links = metadata.get("exportLinks", {})
|
|
538
|
+
web_link = metadata.get("webContentLink")
|
|
539
|
+
|
|
540
|
+
if export_mime := GOOGLE_EXPORT_MIME_MAP.get(mime_type):
|
|
541
|
+
url = export_links.get(export_mime)
|
|
542
|
+
if not url:
|
|
543
|
+
raise SourceConnectionError(
|
|
544
|
+
f"No export link found for {file_id} as {export_mime}"
|
|
522
545
|
)
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
546
|
+
ext = EXPORT_EXTENSION_MAP.get(export_mime, "")
|
|
547
|
+
return url, ext
|
|
548
|
+
|
|
549
|
+
if not web_link:
|
|
550
|
+
raise SourceConnectionError(
|
|
551
|
+
f"No webContentLink available for file {file_id}"
|
|
552
|
+
)
|
|
553
|
+
return web_link, ""
|
|
554
|
+
|
|
555
|
+
@requires_dependencies(["httpx", "google.auth"], extras="google-drive")
|
|
556
|
+
def _download_url(self, file_data: FileData, url: str, ext: str = "") -> Path:
|
|
557
|
+
"""
|
|
558
|
+
Streams file content directly to disk using authenticated HTTP request.
|
|
559
|
+
|
|
560
|
+
Writes the file to the correct path in the download directory while downloading.
|
|
561
|
+
Avoids buffering large files in memory.
|
|
562
|
+
|
|
563
|
+
Returns:
|
|
564
|
+
Path to the downloaded file.
|
|
565
|
+
|
|
566
|
+
Raises:
|
|
567
|
+
SourceConnectionError: If the HTTP request fails.
|
|
568
|
+
"""
|
|
569
|
+
import httpx
|
|
570
|
+
from google.auth.transport.requests import Request
|
|
571
|
+
from google.oauth2 import service_account
|
|
572
|
+
|
|
573
|
+
access_config = self.connection_config.access_config.get_secret_value()
|
|
574
|
+
key_data = access_config.get_service_account_key()
|
|
575
|
+
creds = service_account.Credentials.from_service_account_info(
|
|
576
|
+
key_data,
|
|
577
|
+
scopes=["https://www.googleapis.com/auth/drive.readonly"],
|
|
578
|
+
)
|
|
579
|
+
creds.refresh(Request())
|
|
580
|
+
|
|
581
|
+
headers = {
|
|
582
|
+
"Authorization": f"Bearer {creds.token}",
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
download_path = self.get_download_path(file_data)
|
|
586
|
+
if ext:
|
|
587
|
+
download_path = download_path.with_suffix(ext)
|
|
588
|
+
|
|
589
|
+
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
590
|
+
logger.debug(f"Streaming file to {download_path}")
|
|
591
|
+
|
|
592
|
+
with (
|
|
593
|
+
httpx.Client(timeout=None, follow_redirects=True) as client,
|
|
594
|
+
client.stream("GET", url, headers=headers) as response,
|
|
595
|
+
):
|
|
596
|
+
if response.status_code != 200:
|
|
597
|
+
raise SourceConnectionError(
|
|
598
|
+
f"Failed to stream download from {url}: {response.status_code}"
|
|
526
599
|
)
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
600
|
+
with open(download_path, "wb") as f:
|
|
601
|
+
for chunk in response.iter_bytes():
|
|
602
|
+
f.write(chunk)
|
|
603
|
+
|
|
604
|
+
return download_path
|
|
605
|
+
|
|
606
|
+
def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
|
|
607
|
+
mime_type = file_data.additional_metadata.get("mimeType", "")
|
|
608
|
+
record_id = file_data.identifier
|
|
609
|
+
|
|
610
|
+
logger.debug(
|
|
611
|
+
f"Downloading file {file_data.source_identifiers.fullpath} of type {mime_type}"
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
download_url, ext = self._get_download_url_and_ext(record_id, mime_type)
|
|
615
|
+
download_path = self._download_url(file_data, download_url, ext)
|
|
616
|
+
|
|
617
|
+
file_data.additional_metadata.update(
|
|
618
|
+
{
|
|
619
|
+
"download_method": "export_link" if ext else "web_content_link",
|
|
620
|
+
"download_url_used": download_url,
|
|
621
|
+
}
|
|
622
|
+
)
|
|
623
|
+
file_data.local_download_path = str(download_path.resolve())
|
|
624
|
+
|
|
625
|
+
return self.generate_download_response(
|
|
626
|
+
file_data=file_data, download_path=download_path
|
|
627
|
+
)
|
|
536
628
|
|
|
537
629
|
|
|
538
630
|
google_drive_source_entry = SourceRegistryEntry(
|
|
@@ -69,8 +69,8 @@ class IbmWatsonxConnectionConfig(ConnectionConfig):
|
|
|
69
69
|
|
|
70
70
|
@property
|
|
71
71
|
def bearer_token(self) -> str:
|
|
72
|
-
# Add
|
|
73
|
-
timestamp = int(time.time()) + 60
|
|
72
|
+
# Add 5 minutes to deal with edge cases where the token expires before the request is made
|
|
73
|
+
timestamp = int(time.time()) + (60 * 5)
|
|
74
74
|
if self._bearer_token is None or self._bearer_token.get("expiration", 0) <= timestamp:
|
|
75
75
|
self._bearer_token = self.generate_bearer_token()
|
|
76
76
|
return self._bearer_token["access_token"]
|
|
@@ -240,7 +240,7 @@ class IbmWatsonxUploader(SQLUploader):
|
|
|
240
240
|
def upload_data_table(
|
|
241
241
|
self, table: "Table", data_table: "ArrowTable", file_data: FileData
|
|
242
242
|
) -> None:
|
|
243
|
-
from pyiceberg.exceptions import CommitFailedException
|
|
243
|
+
from pyiceberg.exceptions import CommitFailedException, RESTError
|
|
244
244
|
from tenacity import (
|
|
245
245
|
before_log,
|
|
246
246
|
retry,
|
|
@@ -265,21 +265,51 @@ class IbmWatsonxUploader(SQLUploader):
|
|
|
265
265
|
table.refresh()
|
|
266
266
|
logger.debug(e)
|
|
267
267
|
raise IcebergCommitFailedException(e)
|
|
268
|
+
except RESTError:
|
|
269
|
+
raise
|
|
268
270
|
except Exception as e:
|
|
269
271
|
raise ProviderError(f"Failed to upload data to table: {e}")
|
|
270
272
|
|
|
271
273
|
try:
|
|
272
274
|
return _upload_data_table(table, data_table, file_data)
|
|
275
|
+
except RESTError:
|
|
276
|
+
raise
|
|
273
277
|
except ProviderError:
|
|
274
278
|
raise
|
|
275
279
|
except Exception as e:
|
|
276
280
|
raise ProviderError(f"Failed to upload data to table: {e}")
|
|
277
281
|
|
|
282
|
+
@requires_dependencies(["pyiceberg", "tenacity"], extras="ibm-watsonx-s3")
|
|
278
283
|
def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
|
|
284
|
+
from pyiceberg.exceptions import RESTError
|
|
285
|
+
from tenacity import (
|
|
286
|
+
before_log,
|
|
287
|
+
retry,
|
|
288
|
+
retry_if_exception_type,
|
|
289
|
+
stop_after_attempt,
|
|
290
|
+
wait_random,
|
|
291
|
+
)
|
|
292
|
+
|
|
279
293
|
data_table = self._df_to_arrow_table(df)
|
|
280
294
|
|
|
281
|
-
|
|
282
|
-
|
|
295
|
+
# Retry connection in case of connection error
|
|
296
|
+
@retry(
|
|
297
|
+
stop=stop_after_attempt(2),
|
|
298
|
+
wait=wait_random(),
|
|
299
|
+
retry=retry_if_exception_type(RESTError),
|
|
300
|
+
before=before_log(logger, logging.DEBUG),
|
|
301
|
+
reraise=True,
|
|
302
|
+
)
|
|
303
|
+
def _upload_dataframe(data_table: Any, file_data: FileData) -> None:
|
|
304
|
+
with self.get_table() as table:
|
|
305
|
+
self.upload_data_table(table, data_table, file_data)
|
|
306
|
+
|
|
307
|
+
try:
|
|
308
|
+
return _upload_dataframe(data_table, file_data)
|
|
309
|
+
except ProviderError:
|
|
310
|
+
raise
|
|
311
|
+
except Exception as e:
|
|
312
|
+
raise ProviderError(f"Failed to upload data to table: {e}")
|
|
283
313
|
|
|
284
314
|
@requires_dependencies(["pandas"], extras="ibm-watsonx-s3")
|
|
285
315
|
def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.0.11" # pragma: no cover
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/base/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/base/cmd.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/base/dest.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/base/importer.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/base/src.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/utils/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/cli/utils/click.py
RENAMED
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/data_types/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/data_types/entities.py
RENAMED
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/azure_openai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/bedrock.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/huggingface.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/interfaces.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/mixedbreadai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/octoai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/openai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/togetherai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/vertexai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/embed/voyageai.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/interfaces/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/interfaces/indexer.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/interfaces/process.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/interfaces/uploader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/interfaces.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/otel.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/pipeline/pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.11 → unstructured_ingest-1.0.12}/unstructured_ingest/processes/chunker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|