unstructured-ingest 1.0.7__tar.gz → 1.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/PKG-INFO +1 -1
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/pyproject.toml +1 -0
- unstructured_ingest-1.0.11/unstructured_ingest/__version__.py +1 -0
- unstructured_ingest-1.0.11/unstructured_ingest/data_types/entities.py +17 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/google_drive.py +16 -2
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +1 -1
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/neo4j.py +53 -22
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/pinecone.py +26 -0
- unstructured_ingest-1.0.7/unstructured_ingest/__version__.py +0 -1
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/.gitignore +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/LICENSE.md +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/README.md +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/README.md +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/cmd.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/dest.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/importer.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/src.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/cli.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/cmds.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/utils/click.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/utils/model_conversion.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/data_types/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/data_types/file_data.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/azure_openai.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/bedrock.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/huggingface.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/interfaces.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/mixedbreadai.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/octoai.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/openai.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/togetherai.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/vertexai.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/voyageai.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/error.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/errors_v2.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/connector.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/downloader.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/indexer.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/process.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/processor.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/upload_stager.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/uploader.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/logger.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/main.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/otel.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/interfaces.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/otel.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/pipeline.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/chunk.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/download.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/embed.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/filter.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/index.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/partition.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/stage.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/uncompress.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/upload.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/chunker.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connector_registry.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/airtable.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/astradb.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/azure_ai_search.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/chroma.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/confluence.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/couchbase.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/volumes.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/volumes_aws.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/volumes_azure.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/volumes_native.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/databricks/volumes_table.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/delta_table.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/discord.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/duckdb/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/duckdb/base.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/duckdb/duckdb.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/duckdb/motherduck.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/elasticsearch/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/azure.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/box.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/dropbox.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/fsspec.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/gcs.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/s3.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/sftp.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/fsspec/utils.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/github.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/gitlab.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/jira.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/kafka/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/kafka/cloud.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/kafka/kafka.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/kafka/local.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/kdbai.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/aws.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/azure.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/cloud.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/gcp.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/lancedb.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/lancedb/local.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/local.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/milvus.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/mongodb.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/client.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/connector.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/helpers.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/interfaces.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/block.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/code.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/file.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/image.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/table.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/template.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/blocks/video.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/date.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/file.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/page.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/parent.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/rich_text.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/notion/types/user.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/onedrive.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/outlook.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/qdrant/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/qdrant/cloud.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/qdrant/local.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/qdrant/qdrant.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/qdrant/server.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/redisdb.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/salesforce.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sharepoint.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/slack.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/postgres.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/singlestore.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/snowflake.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/sql.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/sqlite.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/sql/vastdb.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/utils.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/vectara.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/weaviate/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/weaviate/cloud.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/weaviate/embedded.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/weaviate/local.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/weaviate/weaviate.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/zendesk/client.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/embedder.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/filter.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/partitioner.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/uncompress.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/utils/blob_storage.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/unstructured_api.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/__init__.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/chunking.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/compression.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/constants.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/data_prep.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/dep_check.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/html.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/ndjson.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/pydantic_models.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/string_and_date_utils.py +0 -0
- {unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/utils/table.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.11" # pragma: no cover
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Entity(BaseModel):
|
|
5
|
+
type: str
|
|
6
|
+
entity: str
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class EntityRelationship(BaseModel):
|
|
10
|
+
to: str
|
|
11
|
+
from_: str = Field(..., alias="from")
|
|
12
|
+
relationship: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class EntitiesData(BaseModel):
|
|
16
|
+
items: list[Entity] = Field(default_factory=list)
|
|
17
|
+
relationships: list[EntityRelationship] = Field(default_factory=list)
|
|
@@ -153,7 +153,13 @@ class GoogleDriveIndexer(Indexer):
|
|
|
153
153
|
"""
|
|
154
154
|
try:
|
|
155
155
|
# A very minimal call: list 1 file from the drive.
|
|
156
|
-
client.list(
|
|
156
|
+
client.list(
|
|
157
|
+
supportsAllDrives=True,
|
|
158
|
+
includeItemsFromAllDrives=True,
|
|
159
|
+
spaces="drive",
|
|
160
|
+
pageSize=1,
|
|
161
|
+
fields="files(id)",
|
|
162
|
+
).execute()
|
|
157
163
|
except HttpError as e:
|
|
158
164
|
error_content = e.content.decode() if hasattr(e, "content") else ""
|
|
159
165
|
lower_error = error_content.lower()
|
|
@@ -183,6 +189,8 @@ class GoogleDriveIndexer(Indexer):
|
|
|
183
189
|
page_token = None
|
|
184
190
|
while True:
|
|
185
191
|
response = files_client.list(
|
|
192
|
+
supportsAllDrives=True,
|
|
193
|
+
includeItemsFromAllDrives=True,
|
|
186
194
|
spaces="drive",
|
|
187
195
|
q=query,
|
|
188
196
|
fields="nextPageToken, files(id, mimeType, fileExtension)",
|
|
@@ -251,6 +259,8 @@ class GoogleDriveIndexer(Indexer):
|
|
|
251
259
|
else:
|
|
252
260
|
# Non-recursive: check for at least one immediate non-folder child.
|
|
253
261
|
response = client.list(
|
|
262
|
+
supportsAllDrives=True,
|
|
263
|
+
includeItemsFromAllDrives=True,
|
|
254
264
|
spaces="drive",
|
|
255
265
|
fields="files(id)",
|
|
256
266
|
pageSize=1,
|
|
@@ -348,6 +358,8 @@ class GoogleDriveIndexer(Indexer):
|
|
|
348
358
|
files_response = []
|
|
349
359
|
while not done:
|
|
350
360
|
response: dict = files_client.list(
|
|
361
|
+
supportsAllDrives=True,
|
|
362
|
+
includeItemsFromAllDrives=True,
|
|
351
363
|
spaces="drive",
|
|
352
364
|
fields=fields_input,
|
|
353
365
|
corpora="user",
|
|
@@ -381,7 +393,9 @@ class GoogleDriveIndexer(Indexer):
|
|
|
381
393
|
return files_response
|
|
382
394
|
|
|
383
395
|
def get_root_info(self, files_client, object_id: str) -> dict:
|
|
384
|
-
return files_client.get(
|
|
396
|
+
return files_client.get(
|
|
397
|
+
supportsAllDrives=True, fileId=object_id, fields=",".join(self.fields)
|
|
398
|
+
).execute()
|
|
385
399
|
|
|
386
400
|
def get_files(
|
|
387
401
|
self,
|
|
@@ -172,7 +172,7 @@ class IbmWatsonxUploaderConfig(UploaderConfig):
|
|
|
172
172
|
namespace: str = Field(description="Namespace name")
|
|
173
173
|
table: str = Field(description="Table name")
|
|
174
174
|
max_retries: int = Field(
|
|
175
|
-
default=5, description="Maximum number of retries to upload data", ge=2, le=
|
|
175
|
+
default=5, description="Maximum number of retries to upload data", ge=2, le=500
|
|
176
176
|
)
|
|
177
177
|
record_id_key: str = Field(
|
|
178
178
|
default=RECORD_ID_LABEL,
|
|
@@ -9,8 +9,9 @@ from enum import Enum
|
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from typing import TYPE_CHECKING, Any, AsyncGenerator, Literal, Optional
|
|
11
11
|
|
|
12
|
-
from pydantic import BaseModel, ConfigDict, Field, Secret, field_validator
|
|
12
|
+
from pydantic import BaseModel, ConfigDict, Field, Secret, ValidationError, field_validator
|
|
13
13
|
|
|
14
|
+
from unstructured_ingest.data_types.entities import EntitiesData, Entity, EntityRelationship
|
|
14
15
|
from unstructured_ingest.data_types.file_data import FileData
|
|
15
16
|
from unstructured_ingest.error import DestinationConnectionError
|
|
16
17
|
from unstructured_ingest.interfaces import (
|
|
@@ -97,7 +98,6 @@ class Neo4jUploadStager(UploadStager):
|
|
|
97
98
|
**kwargs: Any,
|
|
98
99
|
) -> Path:
|
|
99
100
|
elements = get_json_data(elements_filepath)
|
|
100
|
-
|
|
101
101
|
nx_graph = self._create_lexical_graph(
|
|
102
102
|
elements, self._create_document_node(file_data=file_data)
|
|
103
103
|
)
|
|
@@ -109,28 +109,54 @@ class Neo4jUploadStager(UploadStager):
|
|
|
109
109
|
|
|
110
110
|
return output_filepath
|
|
111
111
|
|
|
112
|
-
def _add_entities(self,
|
|
113
|
-
entities = element.get("metadata", {}).get("entities", [])
|
|
114
|
-
if not entities:
|
|
115
|
-
return None
|
|
116
|
-
if not isinstance(entities, list):
|
|
117
|
-
return None
|
|
118
|
-
|
|
112
|
+
def _add_entities(self, entities: list[Entity], graph: "Graph", element_node: _Node) -> None:
|
|
119
113
|
for entity in entities:
|
|
120
|
-
if not isinstance(entity, dict):
|
|
121
|
-
continue
|
|
122
|
-
if "entity" not in entity or "type" not in entity:
|
|
123
|
-
continue
|
|
124
114
|
entity_node = _Node(
|
|
125
|
-
labels=[Label.ENTITY], properties={"id": entity
|
|
115
|
+
labels=[Label.ENTITY], properties={"id": entity.entity}, id_=entity.entity
|
|
126
116
|
)
|
|
127
117
|
graph.add_edge(
|
|
128
118
|
entity_node,
|
|
129
|
-
_Node(labels=[Label.ENTITY], properties={"id": entity
|
|
119
|
+
_Node(labels=[Label.ENTITY], properties={"id": entity.type}, id_=entity.type),
|
|
130
120
|
relationship=Relationship.ENTITY_TYPE,
|
|
131
121
|
)
|
|
132
122
|
graph.add_edge(element_node, entity_node, relationship=Relationship.HAS_ENTITY)
|
|
133
123
|
|
|
124
|
+
def _add_entity_relationships(
|
|
125
|
+
self, relationships: list[EntityRelationship], graph: "Graph"
|
|
126
|
+
) -> None:
|
|
127
|
+
for relationship in relationships:
|
|
128
|
+
from_node = _Node(
|
|
129
|
+
labels=[Label.ENTITY],
|
|
130
|
+
properties={"id": relationship.from_},
|
|
131
|
+
id_=relationship.from_,
|
|
132
|
+
)
|
|
133
|
+
to_node = _Node(
|
|
134
|
+
labels=[Label.ENTITY], properties={"id": relationship.to}, id_=relationship.to
|
|
135
|
+
)
|
|
136
|
+
graph.add_edge(from_node, to_node, relationship=relationship.relationship)
|
|
137
|
+
|
|
138
|
+
def _add_entity_data(self, element: dict, graph: "Graph", element_node: _Node) -> None:
|
|
139
|
+
entities = element.get("metadata", {}).get("entities", {})
|
|
140
|
+
if not entities:
|
|
141
|
+
return None
|
|
142
|
+
try:
|
|
143
|
+
if isinstance(entities, list):
|
|
144
|
+
self._add_entities(
|
|
145
|
+
[Entity.model_validate(e) for e in entities if isinstance(e, dict)],
|
|
146
|
+
graph,
|
|
147
|
+
element_node,
|
|
148
|
+
)
|
|
149
|
+
elif isinstance(entities, dict):
|
|
150
|
+
entity_data = EntitiesData.model_validate(entities)
|
|
151
|
+
self._add_entities(entity_data.items, graph, element_node)
|
|
152
|
+
self._add_entity_relationships(entity_data.relationships, graph)
|
|
153
|
+
except ValidationError:
|
|
154
|
+
logger.warning(
|
|
155
|
+
"Failed to add entities to the graph. "
|
|
156
|
+
"Please check the format of the entities in the input data."
|
|
157
|
+
)
|
|
158
|
+
return None
|
|
159
|
+
|
|
134
160
|
def _create_lexical_graph(self, elements: list[dict], document_node: _Node) -> "Graph":
|
|
135
161
|
import networkx as nx
|
|
136
162
|
|
|
@@ -149,7 +175,7 @@ class Neo4jUploadStager(UploadStager):
|
|
|
149
175
|
previous_node = element_node
|
|
150
176
|
graph.add_edge(element_node, document_node, relationship=Relationship.PART_OF_DOCUMENT)
|
|
151
177
|
|
|
152
|
-
self.
|
|
178
|
+
self._add_entity_data(element, graph, element_node)
|
|
153
179
|
|
|
154
180
|
if self._is_chunk(element):
|
|
155
181
|
for origin_element in format_and_truncate_orig_elements(element, include_text=True):
|
|
@@ -165,7 +191,7 @@ class Neo4jUploadStager(UploadStager):
|
|
|
165
191
|
document_node,
|
|
166
192
|
relationship=Relationship.PART_OF_DOCUMENT,
|
|
167
193
|
)
|
|
168
|
-
self.
|
|
194
|
+
self._add_entity_data(origin_element, graph, origin_element_node)
|
|
169
195
|
|
|
170
196
|
return graph
|
|
171
197
|
|
|
@@ -208,7 +234,9 @@ class _GraphData(BaseModel):
|
|
|
208
234
|
_Edge(
|
|
209
235
|
source=u,
|
|
210
236
|
destination=v,
|
|
211
|
-
relationship=Relationship(data_dict["relationship"])
|
|
237
|
+
relationship=Relationship(data_dict["relationship"])
|
|
238
|
+
if data_dict["relationship"] in Relationship
|
|
239
|
+
else data_dict["relationship"],
|
|
212
240
|
)
|
|
213
241
|
for u, v, data_dict in nx_graph.edges(data=True)
|
|
214
242
|
]
|
|
@@ -242,7 +270,7 @@ class _Edge(BaseModel):
|
|
|
242
270
|
|
|
243
271
|
source: _Node
|
|
244
272
|
destination: _Node
|
|
245
|
-
relationship: Relationship
|
|
273
|
+
relationship: Relationship | str
|
|
246
274
|
|
|
247
275
|
|
|
248
276
|
class Label(Enum):
|
|
@@ -380,7 +408,7 @@ class Neo4jUploader(Uploader):
|
|
|
380
408
|
)
|
|
381
409
|
logger.info(f"Finished merging {len(graph_data.nodes)} graph nodes.")
|
|
382
410
|
|
|
383
|
-
edges_by_relationship: defaultdict[tuple[Relationship, Label, Label], list[_Edge]] = (
|
|
411
|
+
edges_by_relationship: defaultdict[tuple[Relationship | str, Label, Label], list[_Edge]] = (
|
|
384
412
|
defaultdict(list)
|
|
385
413
|
)
|
|
386
414
|
for edge in graph_data.edges:
|
|
@@ -463,16 +491,19 @@ class Neo4jUploader(Uploader):
|
|
|
463
491
|
@staticmethod
|
|
464
492
|
def _create_edges_query(
|
|
465
493
|
edges: list[_Edge],
|
|
466
|
-
relationship: Relationship,
|
|
494
|
+
relationship: Relationship | str,
|
|
467
495
|
source_label: Label,
|
|
468
496
|
destination_label: Label,
|
|
469
497
|
) -> tuple[str, dict]:
|
|
470
498
|
logger.info(f"Preparing MERGE query for {len(edges)} {relationship} relationships.")
|
|
499
|
+
relationship = (
|
|
500
|
+
relationship.value if isinstance(relationship, Relationship) else relationship
|
|
501
|
+
)
|
|
471
502
|
query_string = f"""
|
|
472
503
|
UNWIND $edges AS edge
|
|
473
504
|
MATCH (u: `{source_label.value}` {{id: edge.source}})
|
|
474
505
|
MATCH (v: `{destination_label.value}` {{id: edge.destination}})
|
|
475
|
-
MERGE (u)-[:`{relationship
|
|
506
|
+
MERGE (u)-[:`{relationship}`]->(v)
|
|
476
507
|
"""
|
|
477
508
|
parameters = {
|
|
478
509
|
"edges": [
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import re
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
|
+
from pathlib import Path
|
|
4
5
|
from typing import TYPE_CHECKING, Any, Literal, Optional
|
|
5
6
|
|
|
6
7
|
from pydantic import Field, Secret
|
|
@@ -18,11 +19,14 @@ from unstructured_ingest.interfaces import (
|
|
|
18
19
|
)
|
|
19
20
|
from unstructured_ingest.logger import logger
|
|
20
21
|
from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
|
|
22
|
+
from unstructured_ingest.utils import ndjson
|
|
21
23
|
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
22
24
|
from unstructured_ingest.utils.data_prep import (
|
|
23
25
|
flatten_dict,
|
|
24
26
|
generator_batching_wbytes,
|
|
25
27
|
get_enhanced_element_id,
|
|
28
|
+
get_json_data,
|
|
29
|
+
write_data,
|
|
26
30
|
)
|
|
27
31
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
28
32
|
|
|
@@ -162,6 +166,28 @@ class PineconeUploadStager(UploadStager):
|
|
|
162
166
|
"metadata": metadata,
|
|
163
167
|
}
|
|
164
168
|
|
|
169
|
+
def stream_update(self, input_file: Path, output_file: Path, file_data: FileData) -> None:
|
|
170
|
+
with input_file.open() as in_f:
|
|
171
|
+
reader = ndjson.reader(in_f)
|
|
172
|
+
with output_file.open("w") as out_f:
|
|
173
|
+
writer = ndjson.writer(out_f)
|
|
174
|
+
for element in reader:
|
|
175
|
+
if "embeddings" not in element:
|
|
176
|
+
continue
|
|
177
|
+
conformed_element = self.conform_dict(element_dict=element, file_data=file_data)
|
|
178
|
+
writer.write(row=conformed_element)
|
|
179
|
+
writer.f.flush()
|
|
180
|
+
|
|
181
|
+
def process_whole(self, input_file: Path, output_file: Path, file_data: FileData) -> None:
|
|
182
|
+
elements_contents = get_json_data(path=input_file)
|
|
183
|
+
|
|
184
|
+
conformed_elements = [
|
|
185
|
+
self.conform_dict(element_dict=element, file_data=file_data)
|
|
186
|
+
for element in elements_contents
|
|
187
|
+
if "embeddings" in element
|
|
188
|
+
]
|
|
189
|
+
write_data(path=output_file, data=conformed_elements)
|
|
190
|
+
|
|
165
191
|
|
|
166
192
|
@dataclass
|
|
167
193
|
class PineconeUploader(VectorDBUploader):
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.0.7" # pragma: no cover
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/cmd.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/dest.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/importer.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/base/src.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/utils/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/cli/utils/click.py
RENAMED
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/data_types/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/data_types/file_data.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/azure_openai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/bedrock.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/huggingface.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/interfaces.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/mixedbreadai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/octoai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/openai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/togetherai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/vertexai.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/embed/voyageai.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/connector.py
RENAMED
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/indexer.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/process.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/processor.py
RENAMED
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/interfaces/uploader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/interfaces.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/otel.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/chunk.py
RENAMED
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/embed.py
RENAMED
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/index.py
RENAMED
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/pipeline/steps/stage.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/__init__.py
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.7 → unstructured_ingest-1.0.11}/unstructured_ingest/processes/chunker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|