unstructured-ingest 1.0.41__tar.gz → 1.0.44__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/.gitignore +1 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/PKG-INFO +2 -1
- unstructured_ingest-1.0.44/unstructured_ingest/__version__.py +1 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/togetherai.py +1 -1
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/astradb.py +4 -2
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/confluence.py +5 -2
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/delta_table.py +84 -30
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/sql/sql.py +5 -3
- unstructured_ingest-1.0.41/unstructured_ingest/__version__.py +0 -1
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/LICENSE.md +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/README.md +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/pyproject.toml +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/README.md +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/base/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/base/cmd.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/base/dest.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/base/importer.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/base/src.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/cli.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/cmds.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/utils/click.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/data_types/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/data_types/entities.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/data_types/file_data.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/azure_openai.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/bedrock.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/huggingface.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/interfaces.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/mixedbreadai.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/octoai.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/openai.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/vertexai.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/voyageai.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/error.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/errors_v2.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/interfaces/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/interfaces/connector.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/interfaces/downloader.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/interfaces/indexer.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/interfaces/process.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/interfaces/processor.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/interfaces/upload_stager.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/interfaces/uploader.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/logger.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/main.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/otel.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/interfaces.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/otel.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/pipeline.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/steps/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/steps/download.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/steps/embed.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/steps/filter.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/steps/index.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/steps/partition.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/steps/stage.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/steps/upload.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/chunker.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connector_registry.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/airtable.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/azure_ai_search.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/chroma.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/databricks/volumes.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/discord.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/duckdb/base.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/fsspec/s3.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/github.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/gitlab.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/google_drive.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/jira.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/kdbai.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/local.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/milvus.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/mongodb.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/neo4j.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/connector.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/onedrive.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/outlook.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/pinecone.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/redisdb.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/salesforce.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/sharepoint.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/slack.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/sql/postgres.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/sql/singlestore.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/sql/snowflake.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/sql/sqlite.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/sql/vastdb.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/utils.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/vectara.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/embedder.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/filter.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/partitioner.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/uncompress.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/utils/blob_storage.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/unstructured_api.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/utils/chunking.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/utils/compression.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/utils/constants.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/utils/data_prep.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/utils/dep_check.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/utils/html.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/utils/ndjson.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/utils/pydantic_models.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
- {unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/utils/table.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: unstructured_ingest
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.44
|
|
4
4
|
Summary: Local ETL data pipeline to get data RAG ready
|
|
5
5
|
Author-email: Unstructured Technologies <devops@unstructuredai.io>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -60,6 +60,7 @@ Provides-Extra: delta-table
|
|
|
60
60
|
Requires-Dist: boto3; extra == 'delta-table'
|
|
61
61
|
Requires-Dist: deltalake; extra == 'delta-table'
|
|
62
62
|
Requires-Dist: pandas; extra == 'delta-table'
|
|
63
|
+
Requires-Dist: tenacity; extra == 'delta-table'
|
|
63
64
|
Provides-Extra: discord
|
|
64
65
|
Requires-Dist: discord-py; extra == 'discord'
|
|
65
66
|
Provides-Extra: doc
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.44" # pragma: no cover
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/togetherai.py
RENAMED
|
@@ -22,7 +22,7 @@ if TYPE_CHECKING:
|
|
|
22
22
|
class TogetherAIEmbeddingConfig(EmbeddingConfig):
|
|
23
23
|
api_key: SecretStr = Field(description="API key for Together AI")
|
|
24
24
|
embedder_model_name: str = Field(
|
|
25
|
-
default="togethercomputer/m2-bert-80M-
|
|
25
|
+
default="togethercomputer/m2-bert-80M-32k-retrieval",
|
|
26
26
|
alias="model_name",
|
|
27
27
|
description="Together AI model name",
|
|
28
28
|
)
|
|
@@ -197,8 +197,10 @@ class AstraDBIndexer(Indexer):
|
|
|
197
197
|
id_batches = batch_generator(ids, self.index_config.batch_size)
|
|
198
198
|
for batch in id_batches:
|
|
199
199
|
batch_items = [BatchItem(identifier=b) for b in batch]
|
|
200
|
-
display_name = (
|
|
201
|
-
|
|
200
|
+
display_name = (
|
|
201
|
+
f"{self.index_config.collection_name}-{self.index_config.keyspace}"
|
|
202
|
+
f"-[{batch_items[0].identifier}..{batch_items[-1].identifier}]"
|
|
203
|
+
)
|
|
202
204
|
fd = AstraDBBatchFileData(
|
|
203
205
|
connector_type=CONNECTOR_TYPE,
|
|
204
206
|
metadata=FileDataSourceMetadata(
|
|
@@ -186,12 +186,15 @@ class ConfluenceIndexer(Indexer):
|
|
|
186
186
|
pages = client.get_all_pages_from_space(
|
|
187
187
|
space=space_key,
|
|
188
188
|
start=0,
|
|
189
|
-
limit=self.index_config.max_num_of_docs_from_each_space,
|
|
190
189
|
expand=None,
|
|
191
190
|
content_type="page", # blogpost and comment types not currently supported
|
|
192
191
|
status=None,
|
|
193
192
|
)
|
|
194
|
-
|
|
193
|
+
# Limit the number of documents to max_num_of_docs_from_each_space
|
|
194
|
+
# Note: this is needed because the limit field in client.get_all_pages_from_space does
|
|
195
|
+
# not seem to work as expected
|
|
196
|
+
limited_pages = pages[: self.index_config.max_num_of_docs_from_each_space]
|
|
197
|
+
doc_ids = [{"space_id": space_key, "doc_id": page["id"]} for page in limited_pages]
|
|
195
198
|
return doc_ids
|
|
196
199
|
|
|
197
200
|
def run(self) -> Generator[FileData, None, None]:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import
|
|
1
|
+
import logging
|
|
2
2
|
import traceback
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
|
-
from multiprocessing import Process, Queue
|
|
4
|
+
from multiprocessing import Process, Queue, current_process
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import TYPE_CHECKING, Any, Optional
|
|
7
7
|
from urllib.parse import urlparse
|
|
@@ -20,6 +20,7 @@ from unstructured_ingest.interfaces import (
|
|
|
20
20
|
)
|
|
21
21
|
from unstructured_ingest.logger import logger
|
|
22
22
|
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
23
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
23
24
|
from unstructured_ingest.utils.data_prep import get_data_df, get_json_data
|
|
24
25
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
25
26
|
from unstructured_ingest.utils.table import convert_to_pandas_dataframe
|
|
@@ -47,18 +48,17 @@ class DeltaTableAccessConfig(AccessConfig):
|
|
|
47
48
|
|
|
48
49
|
class DeltaTableConnectionConfig(ConnectionConfig):
|
|
49
50
|
access_config: Secret[DeltaTableAccessConfig] = Field(
|
|
50
|
-
default=DeltaTableAccessConfig(), validate_default=True
|
|
51
|
+
default=Secret(DeltaTableAccessConfig()), validate_default=True
|
|
51
52
|
)
|
|
52
53
|
aws_region: Optional[str] = Field(default=None, description="AWS Region")
|
|
53
54
|
table_uri: str = Field(
|
|
54
|
-
default=None,
|
|
55
55
|
description=(
|
|
56
56
|
"Local path or path to the target folder in the S3 bucket, "
|
|
57
57
|
"formatted as s3://my-bucket/my-folder/"
|
|
58
58
|
),
|
|
59
59
|
)
|
|
60
60
|
|
|
61
|
-
def update_storage_options(self, storage_options: dict) -> None:
|
|
61
|
+
def update_storage_options(self, storage_options: dict[str, str]) -> None:
|
|
62
62
|
secrets = self.access_config.get_secret_value()
|
|
63
63
|
if self.aws_region and secrets.aws_access_key_id and secrets.aws_secret_access_key:
|
|
64
64
|
storage_options["AWS_REGION"] = self.aws_region
|
|
@@ -80,9 +80,10 @@ class DeltaTableUploadStager(UploadStager):
|
|
|
80
80
|
default_factory=lambda: DeltaTableUploadStagerConfig()
|
|
81
81
|
)
|
|
82
82
|
|
|
83
|
-
def run(
|
|
83
|
+
def run( # type: ignore[override]
|
|
84
84
|
self,
|
|
85
85
|
elements_filepath: Path,
|
|
86
|
+
file_data: FileData,
|
|
86
87
|
output_dir: Path,
|
|
87
88
|
output_filename: str,
|
|
88
89
|
**kwargs: Any,
|
|
@@ -91,6 +92,8 @@ class DeltaTableUploadStager(UploadStager):
|
|
|
91
92
|
output_path = Path(output_dir) / Path(f"{output_filename}.parquet")
|
|
92
93
|
|
|
93
94
|
df = convert_to_pandas_dataframe(elements_dict=elements_contents)
|
|
95
|
+
# Ensure per-record overwrite/delete semantics: tag each row with the record identifier
|
|
96
|
+
df[RECORD_ID_LABEL] = file_data.identifier
|
|
94
97
|
df = df.dropna(axis=1, how="all")
|
|
95
98
|
df.to_parquet(output_path)
|
|
96
99
|
|
|
@@ -138,41 +141,92 @@ class DeltaTableUploader(Uploader):
|
|
|
138
141
|
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
139
142
|
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
140
143
|
|
|
144
|
+
@requires_dependencies(["tenacity"], extras="delta-table")
|
|
141
145
|
def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
|
|
142
|
-
|
|
143
|
-
self.connection_config.table_uri, file_data.source_identifiers.relative_path
|
|
144
|
-
)
|
|
146
|
+
upload_path = self.connection_config.table_uri
|
|
145
147
|
logger.info(
|
|
146
|
-
f"writing {len(df)} rows to destination table "
|
|
147
|
-
f"at {updated_upload_path}\ndtypes: {df.dtypes}",
|
|
148
|
+
f"writing {len(df)} rows to destination table at {upload_path}\ndtypes: {df.dtypes}",
|
|
148
149
|
)
|
|
149
|
-
storage_options = {}
|
|
150
|
+
storage_options: dict[str, str] = {}
|
|
150
151
|
self.connection_config.update_storage_options(storage_options=storage_options)
|
|
151
152
|
|
|
153
|
+
# Decide whether the Delta table already exists. If it does, we first delete all rows
|
|
154
|
+
# belonging to the current record and then append the fresh data. Otherwise we will
|
|
155
|
+
# create a brand-new table via an overwrite.
|
|
156
|
+
|
|
157
|
+
mode = "overwrite"
|
|
158
|
+
try:
|
|
159
|
+
from deltalake import DeltaTable # pylint: disable=import-error
|
|
160
|
+
|
|
161
|
+
dt = DeltaTable(upload_path, storage_options=storage_options)
|
|
162
|
+
logger.debug(f"Table exists: deleting rows for {file_data.identifier}")
|
|
163
|
+
# Table exists – remove any previous rows for this record_id so that appending is
|
|
164
|
+
# effectively an idempotent overwrite for the record.
|
|
165
|
+
dt.delete(predicate=f"{RECORD_ID_LABEL} = '{file_data.identifier}'")
|
|
166
|
+
mode = "append"
|
|
167
|
+
except Exception:
|
|
168
|
+
# Table does not exist yet (or cannot be opened) – we will create it below with
|
|
169
|
+
# mode="overwrite". All other failures will be captured later by the writer.
|
|
170
|
+
logger.debug("Table does not exist: creating new table")
|
|
171
|
+
|
|
152
172
|
writer_kwargs = {
|
|
153
|
-
"table_or_uri":
|
|
173
|
+
"table_or_uri": upload_path,
|
|
154
174
|
"data": df,
|
|
155
|
-
"mode":
|
|
175
|
+
"mode": mode,
|
|
156
176
|
"schema_mode": "merge",
|
|
157
177
|
"storage_options": storage_options,
|
|
158
178
|
}
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
kwargs={"queue": queue, **writer_kwargs},
|
|
179
|
+
|
|
180
|
+
from tenacity import (
|
|
181
|
+
before_log,
|
|
182
|
+
retry,
|
|
183
|
+
retry_if_exception,
|
|
184
|
+
stop_after_attempt,
|
|
185
|
+
wait_random,
|
|
167
186
|
)
|
|
168
|
-
writer.start()
|
|
169
|
-
writer.join()
|
|
170
187
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
188
|
+
def _is_commit_conflict(exc: BaseException) -> bool: # noqa: ANN401
|
|
189
|
+
"""Return True if exception looks like a Delta Lake commit conflict."""
|
|
190
|
+
|
|
191
|
+
return isinstance(exc, RuntimeError) and (
|
|
192
|
+
"CommitFailed" in str(exc) or "Metadata changed" in str(exc)
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
@retry(
|
|
196
|
+
stop=stop_after_attempt(10),
|
|
197
|
+
wait=wait_random(min=0.2, max=1.0),
|
|
198
|
+
before=before_log(logger, logging.DEBUG),
|
|
199
|
+
retry=retry_if_exception(_is_commit_conflict),
|
|
200
|
+
reraise=True,
|
|
201
|
+
)
|
|
202
|
+
def _single_attempt() -> None:
|
|
203
|
+
"""One optimistic transaction: delete old rows, then append new ones."""
|
|
204
|
+
|
|
205
|
+
# NOTE: deltalake writer on Linux sometimes can finish but still trigger a SIGABRT and
|
|
206
|
+
# cause ingest to fail, even though all tasks are completed normally. Putting the writer
|
|
207
|
+
# into a process mitigates this issue by ensuring python interpreter waits properly for
|
|
208
|
+
# deltalake's rust backend to finish
|
|
209
|
+
queue: Queue[str] = Queue()
|
|
210
|
+
|
|
211
|
+
if current_process().daemon:
|
|
212
|
+
# write_deltalake_with_error_handling will push any traceback to our queue
|
|
213
|
+
write_deltalake_with_error_handling(queue=queue, **writer_kwargs)
|
|
214
|
+
else:
|
|
215
|
+
# On non-daemon processes we still guard against SIGABRT by running in a subprocess.
|
|
216
|
+
writer = Process(
|
|
217
|
+
target=write_deltalake_with_error_handling,
|
|
218
|
+
kwargs={"queue": queue, **writer_kwargs},
|
|
219
|
+
)
|
|
220
|
+
writer.start()
|
|
221
|
+
writer.join()
|
|
222
|
+
|
|
223
|
+
# Check if the queue has any exception message
|
|
224
|
+
if not queue.empty():
|
|
225
|
+
error_message = queue.get()
|
|
226
|
+
logger.error("Exception occurred in write_deltalake: %s", error_message)
|
|
227
|
+
raise RuntimeError(f"Error in write_deltalake: {error_message}")
|
|
228
|
+
|
|
229
|
+
_single_attempt()
|
|
176
230
|
|
|
177
231
|
@requires_dependencies(["pandas"], extras="delta-table")
|
|
178
232
|
def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
|
|
@@ -182,7 +236,7 @@ class DeltaTableUploader(Uploader):
|
|
|
182
236
|
self.upload_dataframe(df=df, file_data=file_data)
|
|
183
237
|
|
|
184
238
|
@requires_dependencies(["pandas"], extras="delta-table")
|
|
185
|
-
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
239
|
+
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None: # type: ignore[override]
|
|
186
240
|
df = get_data_df(path)
|
|
187
241
|
self.upload_dataframe(df=df, file_data=file_data)
|
|
188
242
|
|
|
@@ -133,8 +133,10 @@ class SQLIndexer(Indexer, ABC):
|
|
|
133
133
|
|
|
134
134
|
for batch in id_batches:
|
|
135
135
|
batch_items = [BatchItem(identifier=str(b)) for b in batch]
|
|
136
|
-
display_name = (
|
|
137
|
-
|
|
136
|
+
display_name = (
|
|
137
|
+
f"{self.index_config.table_name}-{self.index_config.id_column}"
|
|
138
|
+
f"-[{batch_items[0].identifier}..{batch_items[-1].identifier}]"
|
|
139
|
+
)
|
|
138
140
|
# Make sure the hash is always a positive number to create identified
|
|
139
141
|
yield SqlBatchFileData(
|
|
140
142
|
connector_type=self.connector_type,
|
|
@@ -145,7 +147,7 @@ class SQLIndexer(Indexer, ABC):
|
|
|
145
147
|
table_name=self.index_config.table_name, id_column=self.index_config.id_column
|
|
146
148
|
),
|
|
147
149
|
batch_items=batch_items,
|
|
148
|
-
display_name=display_name
|
|
150
|
+
display_name=display_name,
|
|
149
151
|
)
|
|
150
152
|
|
|
151
153
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.0.41" # pragma: no cover
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/base/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/base/cmd.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/base/dest.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/base/importer.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/base/src.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/utils/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/cli/utils/click.py
RENAMED
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/data_types/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/data_types/entities.py
RENAMED
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/azure_openai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/bedrock.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/huggingface.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/interfaces.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/mixedbreadai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/octoai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/openai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/vertexai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/embed/voyageai.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/interfaces/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/interfaces/indexer.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/interfaces/process.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/interfaces/uploader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/interfaces.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/otel.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/pipeline/pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.41 → unstructured_ingest-1.0.44}/unstructured_ingest/processes/chunker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|