unstructured-ingest 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/__init__.py +0 -0
- test/integration/__init__.py +0 -0
- test/integration/chunkers/__init__.py +0 -0
- test/integration/chunkers/test_chunkers.py +31 -0
- test/integration/connectors/__init__.py +0 -0
- test/integration/connectors/conftest.py +38 -0
- test/integration/connectors/databricks/__init__.py +0 -0
- test/integration/connectors/databricks/test_volumes_native.py +269 -0
- test/integration/connectors/discord/__init__.py +0 -0
- test/integration/connectors/discord/test_discord.py +90 -0
- test/integration/connectors/duckdb/__init__.py +0 -0
- test/integration/connectors/duckdb/conftest.py +14 -0
- test/integration/connectors/duckdb/test_duckdb.py +89 -0
- test/integration/connectors/duckdb/test_motherduck.py +95 -0
- test/integration/connectors/elasticsearch/__init__.py +0 -0
- test/integration/connectors/elasticsearch/conftest.py +34 -0
- test/integration/connectors/elasticsearch/test_elasticsearch.py +330 -0
- test/integration/connectors/elasticsearch/test_opensearch.py +325 -0
- test/integration/connectors/sql/__init__.py +0 -0
- test/integration/connectors/sql/test_postgres.py +195 -0
- test/integration/connectors/sql/test_singlestore.py +176 -0
- test/integration/connectors/sql/test_snowflake.py +238 -0
- test/integration/connectors/sql/test_sqlite.py +162 -0
- test/integration/connectors/test_astradb.py +217 -0
- test/integration/connectors/test_azure_ai_search.py +255 -0
- test/integration/connectors/test_chroma.py +120 -0
- test/integration/connectors/test_confluence.py +113 -0
- test/integration/connectors/test_delta_table.py +185 -0
- test/integration/connectors/test_lancedb.py +247 -0
- test/integration/connectors/test_milvus.py +203 -0
- test/integration/connectors/test_mongodb.py +335 -0
- test/integration/connectors/test_neo4j.py +236 -0
- test/integration/connectors/test_notion.py +145 -0
- test/integration/connectors/test_onedrive.py +118 -0
- test/integration/connectors/test_pinecone.py +288 -0
- test/integration/connectors/test_qdrant.py +215 -0
- test/integration/connectors/test_redis.py +119 -0
- test/integration/connectors/test_s3.py +183 -0
- test/integration/connectors/test_vectara.py +270 -0
- test/integration/connectors/utils/__init__.py +0 -0
- test/integration/connectors/utils/constants.py +7 -0
- test/integration/connectors/utils/docker.py +151 -0
- test/integration/connectors/utils/docker_compose.py +59 -0
- test/integration/connectors/utils/validation/__init__.py +0 -0
- test/integration/connectors/utils/validation/destination.py +75 -0
- test/integration/connectors/utils/validation/equality.py +75 -0
- test/integration/connectors/utils/validation/source.py +299 -0
- test/integration/connectors/utils/validation/utils.py +36 -0
- test/integration/connectors/weaviate/__init__.py +0 -0
- test/integration/connectors/weaviate/conftest.py +15 -0
- test/integration/connectors/weaviate/test_cloud.py +34 -0
- test/integration/connectors/weaviate/test_local.py +131 -0
- test/integration/embedders/__init__.py +0 -0
- test/integration/embedders/conftest.py +13 -0
- test/integration/embedders/test_azure_openai.py +59 -0
- test/integration/embedders/test_bedrock.py +103 -0
- test/integration/embedders/test_huggingface.py +26 -0
- test/integration/embedders/test_mixedbread.py +71 -0
- test/integration/embedders/test_octoai.py +77 -0
- test/integration/embedders/test_openai.py +76 -0
- test/integration/embedders/test_togetherai.py +71 -0
- test/integration/embedders/test_vertexai.py +65 -0
- test/integration/embedders/test_voyageai.py +65 -0
- test/integration/embedders/utils.py +68 -0
- test/integration/partitioners/__init__.py +0 -0
- test/integration/partitioners/test_partitioner.py +75 -0
- test/integration/utils.py +15 -0
- test/unit/__init__.py +0 -0
- test/unit/embed/__init__.py +0 -0
- test/unit/embed/test_mixedbreadai.py +42 -0
- test/unit/embed/test_octoai.py +27 -0
- test/unit/embed/test_openai.py +20 -0
- test/unit/embed/test_vertexai.py +25 -0
- test/unit/embed/test_voyageai.py +24 -0
- test/unit/test_error.py +27 -0
- test/unit/test_logger.py +78 -0
- test/unit/test_utils.py +184 -0
- test/unit/v2/__init__.py +0 -0
- test/unit/v2/chunkers/__init__.py +0 -0
- test/unit/v2/chunkers/test_chunkers.py +49 -0
- test/unit/v2/connectors/__init__.py +0 -0
- test/unit/v2/connectors/test_confluence.py +39 -0
- test/unit/v2/embedders/__init__.py +0 -0
- test/unit/v2/embedders/test_bedrock.py +36 -0
- test/unit/v2/embedders/test_huggingface.py +48 -0
- test/unit/v2/embedders/test_mixedbread.py +37 -0
- test/unit/v2/embedders/test_octoai.py +35 -0
- test/unit/v2/embedders/test_openai.py +35 -0
- test/unit/v2/embedders/test_togetherai.py +37 -0
- test/unit/v2/embedders/test_vertexai.py +37 -0
- test/unit/v2/embedders/test_voyageai.py +38 -0
- test/unit/v2/partitioners/__init__.py +0 -0
- test/unit/v2/partitioners/test_partitioner.py +63 -0
- test/unit/v2/test_interfaces.py +26 -0
- test/unit/v2/test_utils.py +82 -0
- test/unit/v2/utils/__init__.py +0 -0
- test/unit/v2/utils/data_generator.py +32 -0
- unstructured_ingest/__init__.py +1 -0
- unstructured_ingest/__version__.py +1 -0
- unstructured_ingest/cli/__init__.py +14 -0
- unstructured_ingest/cli/base/__init__.py +0 -0
- unstructured_ingest/cli/base/cmd.py +19 -0
- unstructured_ingest/cli/base/dest.py +87 -0
- unstructured_ingest/cli/base/src.py +57 -0
- unstructured_ingest/cli/cli.py +37 -0
- unstructured_ingest/cli/cmd_factory.py +12 -0
- unstructured_ingest/cli/cmds/__init__.py +145 -0
- unstructured_ingest/cli/cmds/airtable.py +69 -0
- unstructured_ingest/cli/cmds/astradb.py +99 -0
- unstructured_ingest/cli/cmds/azure_ai_search.py +65 -0
- unstructured_ingest/cli/cmds/biomed.py +52 -0
- unstructured_ingest/cli/cmds/chroma.py +104 -0
- unstructured_ingest/cli/cmds/clarifai.py +71 -0
- unstructured_ingest/cli/cmds/confluence.py +69 -0
- unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
- unstructured_ingest/cli/cmds/delta_table.py +94 -0
- unstructured_ingest/cli/cmds/discord.py +47 -0
- unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
- unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
- unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
- unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
- unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
- unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
- unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
- unstructured_ingest/cli/cmds/github.py +54 -0
- unstructured_ingest/cli/cmds/gitlab.py +54 -0
- unstructured_ingest/cli/cmds/google_drive.py +49 -0
- unstructured_ingest/cli/cmds/hubspot.py +70 -0
- unstructured_ingest/cli/cmds/jira.py +71 -0
- unstructured_ingest/cli/cmds/kafka.py +102 -0
- unstructured_ingest/cli/cmds/local.py +43 -0
- unstructured_ingest/cli/cmds/mongodb.py +72 -0
- unstructured_ingest/cli/cmds/notion.py +48 -0
- unstructured_ingest/cli/cmds/onedrive.py +66 -0
- unstructured_ingest/cli/cmds/opensearch.py +117 -0
- unstructured_ingest/cli/cmds/outlook.py +67 -0
- unstructured_ingest/cli/cmds/pinecone.py +71 -0
- unstructured_ingest/cli/cmds/qdrant.py +124 -0
- unstructured_ingest/cli/cmds/reddit.py +67 -0
- unstructured_ingest/cli/cmds/salesforce.py +58 -0
- unstructured_ingest/cli/cmds/sharepoint.py +66 -0
- unstructured_ingest/cli/cmds/slack.py +56 -0
- unstructured_ingest/cli/cmds/sql.py +66 -0
- unstructured_ingest/cli/cmds/vectara.py +66 -0
- unstructured_ingest/cli/cmds/weaviate.py +98 -0
- unstructured_ingest/cli/cmds/wikipedia.py +40 -0
- unstructured_ingest/cli/common.py +7 -0
- unstructured_ingest/cli/interfaces.py +663 -0
- unstructured_ingest/cli/utils.py +205 -0
- unstructured_ingest/connector/__init__.py +0 -0
- unstructured_ingest/connector/airtable.py +309 -0
- unstructured_ingest/connector/astradb.py +267 -0
- unstructured_ingest/connector/azure_ai_search.py +144 -0
- unstructured_ingest/connector/biomed.py +320 -0
- unstructured_ingest/connector/chroma.py +158 -0
- unstructured_ingest/connector/clarifai.py +122 -0
- unstructured_ingest/connector/confluence.py +285 -0
- unstructured_ingest/connector/databricks_volumes.py +137 -0
- unstructured_ingest/connector/delta_table.py +203 -0
- unstructured_ingest/connector/discord.py +180 -0
- unstructured_ingest/connector/elasticsearch.py +396 -0
- unstructured_ingest/connector/fsspec/__init__.py +0 -0
- unstructured_ingest/connector/fsspec/azure.py +78 -0
- unstructured_ingest/connector/fsspec/box.py +109 -0
- unstructured_ingest/connector/fsspec/dropbox.py +160 -0
- unstructured_ingest/connector/fsspec/fsspec.py +359 -0
- unstructured_ingest/connector/fsspec/gcs.py +82 -0
- unstructured_ingest/connector/fsspec/s3.py +62 -0
- unstructured_ingest/connector/fsspec/sftp.py +81 -0
- unstructured_ingest/connector/git.py +124 -0
- unstructured_ingest/connector/github.py +174 -0
- unstructured_ingest/connector/gitlab.py +142 -0
- unstructured_ingest/connector/google_drive.py +348 -0
- unstructured_ingest/connector/hubspot.py +278 -0
- unstructured_ingest/connector/jira.py +469 -0
- unstructured_ingest/connector/kafka.py +293 -0
- unstructured_ingest/connector/local.py +139 -0
- unstructured_ingest/connector/mongodb.py +284 -0
- unstructured_ingest/connector/notion/__init__.py +0 -0
- unstructured_ingest/connector/notion/client.py +248 -0
- unstructured_ingest/connector/notion/connector.py +469 -0
- unstructured_ingest/connector/notion/helpers.py +584 -0
- unstructured_ingest/connector/notion/interfaces.py +32 -0
- unstructured_ingest/connector/notion/types/__init__.py +0 -0
- unstructured_ingest/connector/notion/types/block.py +96 -0
- unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
- unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
- unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
- unstructured_ingest/connector/notion/types/database.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
- unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
- unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/connector/notion/types/date.py +26 -0
- unstructured_ingest/connector/notion/types/file.py +51 -0
- unstructured_ingest/connector/notion/types/page.py +45 -0
- unstructured_ingest/connector/notion/types/parent.py +66 -0
- unstructured_ingest/connector/notion/types/rich_text.py +189 -0
- unstructured_ingest/connector/notion/types/user.py +76 -0
- unstructured_ingest/connector/onedrive.py +232 -0
- unstructured_ingest/connector/opensearch.py +218 -0
- unstructured_ingest/connector/outlook.py +285 -0
- unstructured_ingest/connector/pinecone.py +140 -0
- unstructured_ingest/connector/qdrant.py +144 -0
- unstructured_ingest/connector/reddit.py +166 -0
- unstructured_ingest/connector/registry.py +109 -0
- unstructured_ingest/connector/salesforce.py +301 -0
- unstructured_ingest/connector/sharepoint.py +573 -0
- unstructured_ingest/connector/slack.py +224 -0
- unstructured_ingest/connector/sql.py +199 -0
- unstructured_ingest/connector/vectara.py +253 -0
- unstructured_ingest/connector/weaviate.py +190 -0
- unstructured_ingest/connector/wikipedia.py +208 -0
- unstructured_ingest/embed/__init__.py +0 -0
- unstructured_ingest/embed/azure_openai.py +31 -0
- unstructured_ingest/embed/bedrock.py +193 -0
- unstructured_ingest/embed/huggingface.py +52 -0
- unstructured_ingest/embed/interfaces.py +117 -0
- unstructured_ingest/embed/mixedbreadai.py +233 -0
- unstructured_ingest/embed/octoai.py +130 -0
- unstructured_ingest/embed/openai.py +116 -0
- unstructured_ingest/embed/togetherai.py +106 -0
- unstructured_ingest/embed/vertexai.py +126 -0
- unstructured_ingest/embed/voyageai.py +130 -0
- unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
- unstructured_ingest/enhanced_dataclass/core.py +99 -0
- unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
- unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
- unstructured_ingest/error.py +49 -0
- unstructured_ingest/ingest_backoff/__init__.py +3 -0
- unstructured_ingest/ingest_backoff/_common.py +102 -0
- unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
- unstructured_ingest/interfaces.py +852 -0
- unstructured_ingest/logger.py +130 -0
- unstructured_ingest/main.py +11 -0
- unstructured_ingest/pipeline/__init__.py +22 -0
- unstructured_ingest/pipeline/copy.py +19 -0
- unstructured_ingest/pipeline/doc_factory.py +12 -0
- unstructured_ingest/pipeline/interfaces.py +270 -0
- unstructured_ingest/pipeline/partition.py +60 -0
- unstructured_ingest/pipeline/permissions.py +12 -0
- unstructured_ingest/pipeline/pipeline.py +117 -0
- unstructured_ingest/pipeline/reformat/__init__.py +0 -0
- unstructured_ingest/pipeline/reformat/chunking.py +134 -0
- unstructured_ingest/pipeline/reformat/embedding.py +64 -0
- unstructured_ingest/pipeline/source.py +77 -0
- unstructured_ingest/pipeline/utils.py +6 -0
- unstructured_ingest/pipeline/write.py +18 -0
- unstructured_ingest/processor.py +93 -0
- unstructured_ingest/runner/__init__.py +104 -0
- unstructured_ingest/runner/airtable.py +35 -0
- unstructured_ingest/runner/astradb.py +34 -0
- unstructured_ingest/runner/base_runner.py +89 -0
- unstructured_ingest/runner/biomed.py +45 -0
- unstructured_ingest/runner/confluence.py +35 -0
- unstructured_ingest/runner/delta_table.py +34 -0
- unstructured_ingest/runner/discord.py +35 -0
- unstructured_ingest/runner/elasticsearch.py +40 -0
- unstructured_ingest/runner/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/fsspec/azure.py +30 -0
- unstructured_ingest/runner/fsspec/box.py +28 -0
- unstructured_ingest/runner/fsspec/dropbox.py +30 -0
- unstructured_ingest/runner/fsspec/fsspec.py +40 -0
- unstructured_ingest/runner/fsspec/gcs.py +28 -0
- unstructured_ingest/runner/fsspec/s3.py +28 -0
- unstructured_ingest/runner/fsspec/sftp.py +28 -0
- unstructured_ingest/runner/github.py +37 -0
- unstructured_ingest/runner/gitlab.py +37 -0
- unstructured_ingest/runner/google_drive.py +35 -0
- unstructured_ingest/runner/hubspot.py +35 -0
- unstructured_ingest/runner/jira.py +35 -0
- unstructured_ingest/runner/kafka.py +34 -0
- unstructured_ingest/runner/local.py +23 -0
- unstructured_ingest/runner/mongodb.py +34 -0
- unstructured_ingest/runner/notion.py +61 -0
- unstructured_ingest/runner/onedrive.py +35 -0
- unstructured_ingest/runner/opensearch.py +40 -0
- unstructured_ingest/runner/outlook.py +33 -0
- unstructured_ingest/runner/reddit.py +35 -0
- unstructured_ingest/runner/salesforce.py +33 -0
- unstructured_ingest/runner/sharepoint.py +35 -0
- unstructured_ingest/runner/slack.py +33 -0
- unstructured_ingest/runner/utils.py +47 -0
- unstructured_ingest/runner/wikipedia.py +35 -0
- unstructured_ingest/runner/writers/__init__.py +48 -0
- unstructured_ingest/runner/writers/astradb.py +22 -0
- unstructured_ingest/runner/writers/azure_ai_search.py +24 -0
- unstructured_ingest/runner/writers/base_writer.py +26 -0
- unstructured_ingest/runner/writers/chroma.py +22 -0
- unstructured_ingest/runner/writers/clarifai.py +19 -0
- unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
- unstructured_ingest/runner/writers/delta_table.py +24 -0
- unstructured_ingest/runner/writers/elasticsearch.py +24 -0
- unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
- unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
- unstructured_ingest/runner/writers/fsspec/box.py +21 -0
- unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
- unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
- unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
- unstructured_ingest/runner/writers/kafka.py +21 -0
- unstructured_ingest/runner/writers/mongodb.py +21 -0
- unstructured_ingest/runner/writers/opensearch.py +26 -0
- unstructured_ingest/runner/writers/pinecone.py +21 -0
- unstructured_ingest/runner/writers/qdrant.py +19 -0
- unstructured_ingest/runner/writers/sql.py +22 -0
- unstructured_ingest/runner/writers/vectara.py +22 -0
- unstructured_ingest/runner/writers/weaviate.py +21 -0
- unstructured_ingest/utils/__init__.py +0 -0
- unstructured_ingest/utils/chunking.py +56 -0
- unstructured_ingest/utils/compression.py +118 -0
- unstructured_ingest/utils/data_prep.py +200 -0
- unstructured_ingest/utils/dep_check.py +78 -0
- unstructured_ingest/utils/google_filetype.py +9 -0
- unstructured_ingest/utils/string_and_date_utils.py +49 -0
- unstructured_ingest/utils/table.py +73 -0
- unstructured_ingest/v2/__init__.py +1 -0
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +4 -0
- unstructured_ingest/v2/cli/base/cmd.py +269 -0
- unstructured_ingest/v2/cli/base/dest.py +85 -0
- unstructured_ingest/v2/cli/base/importer.py +34 -0
- unstructured_ingest/v2/cli/base/src.py +85 -0
- unstructured_ingest/v2/cli/cli.py +24 -0
- unstructured_ingest/v2/cli/cmds.py +14 -0
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/cli/utils/click.py +237 -0
- unstructured_ingest/v2/cli/utils/model_conversion.py +222 -0
- unstructured_ingest/v2/constants.py +2 -0
- unstructured_ingest/v2/errors.py +18 -0
- unstructured_ingest/v2/interfaces/__init__.py +32 -0
- unstructured_ingest/v2/interfaces/connector.py +50 -0
- unstructured_ingest/v2/interfaces/downloader.py +89 -0
- unstructured_ingest/v2/interfaces/file_data.py +116 -0
- unstructured_ingest/v2/interfaces/indexer.py +30 -0
- unstructured_ingest/v2/interfaces/process.py +19 -0
- unstructured_ingest/v2/interfaces/processor.py +88 -0
- unstructured_ingest/v2/interfaces/upload_stager.py +102 -0
- unstructured_ingest/v2/interfaces/uploader.py +53 -0
- unstructured_ingest/v2/logger.py +126 -0
- unstructured_ingest/v2/main.py +11 -0
- unstructured_ingest/v2/otel.py +111 -0
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +211 -0
- unstructured_ingest/v2/pipeline/otel.py +32 -0
- unstructured_ingest/v2/pipeline/pipeline.py +384 -0
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/steps/chunk.py +80 -0
- unstructured_ingest/v2/pipeline/steps/download.py +207 -0
- unstructured_ingest/v2/pipeline/steps/embed.py +79 -0
- unstructured_ingest/v2/pipeline/steps/filter.py +35 -0
- unstructured_ingest/v2/pipeline/steps/index.py +86 -0
- unstructured_ingest/v2/pipeline/steps/partition.py +79 -0
- unstructured_ingest/v2/pipeline/steps/stage.py +65 -0
- unstructured_ingest/v2/pipeline/steps/uncompress.py +50 -0
- unstructured_ingest/v2/pipeline/steps/upload.py +58 -0
- unstructured_ingest/v2/processes/__init__.py +18 -0
- unstructured_ingest/v2/processes/chunker.py +124 -0
- unstructured_ingest/v2/processes/connector_registry.py +69 -0
- unstructured_ingest/v2/processes/connectors/__init__.py +117 -0
- unstructured_ingest/v2/processes/connectors/airtable.py +235 -0
- unstructured_ingest/v2/processes/connectors/astradb.py +402 -0
- unstructured_ingest/v2/processes/connectors/azure_ai_search.py +276 -0
- unstructured_ingest/v2/processes/connectors/chroma.py +190 -0
- unstructured_ingest/v2/processes/connectors/confluence.py +207 -0
- unstructured_ingest/v2/processes/connectors/couchbase.py +334 -0
- unstructured_ingest/v2/processes/connectors/databricks/__init__.py +52 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +208 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +87 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +102 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +85 -0
- unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +86 -0
- unstructured_ingest/v2/processes/connectors/delta_table.py +191 -0
- unstructured_ingest/v2/processes/connectors/discord.py +158 -0
- unstructured_ingest/v2/processes/connectors/duckdb/__init__.py +15 -0
- unstructured_ingest/v2/processes/connectors/duckdb/base.py +100 -0
- unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +127 -0
- unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +126 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py +19 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +470 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py +195 -0
- unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +170 -0
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +168 -0
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +332 -0
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +197 -0
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +185 -0
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +171 -0
- unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
- unstructured_ingest/v2/processes/connectors/gitlab.py +268 -0
- unstructured_ingest/v2/processes/connectors/google_drive.py +348 -0
- unstructured_ingest/v2/processes/connectors/kafka/__init__.py +17 -0
- unstructured_ingest/v2/processes/connectors/kafka/cloud.py +121 -0
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py +273 -0
- unstructured_ingest/v2/processes/connectors/kafka/local.py +103 -0
- unstructured_ingest/v2/processes/connectors/kdbai.py +148 -0
- unstructured_ingest/v2/processes/connectors/lancedb/__init__.py +30 -0
- unstructured_ingest/v2/processes/connectors/lancedb/aws.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/azure.py +43 -0
- unstructured_ingest/v2/processes/connectors/lancedb/cloud.py +42 -0
- unstructured_ingest/v2/processes/connectors/lancedb/gcp.py +44 -0
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +169 -0
- unstructured_ingest/v2/processes/connectors/lancedb/local.py +44 -0
- unstructured_ingest/v2/processes/connectors/local.py +217 -0
- unstructured_ingest/v2/processes/connectors/milvus.py +225 -0
- unstructured_ingest/v2/processes/connectors/mongodb.py +361 -0
- unstructured_ingest/v2/processes/connectors/neo4j.py +385 -0
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/client.py +349 -0
- unstructured_ingest/v2/processes/connectors/notion/connector.py +346 -0
- unstructured_ingest/v2/processes/connectors/notion/helpers.py +448 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +32 -0
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +96 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bookmark.py +40 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/callout.py +94 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_database.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/code.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/column_list.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/divider.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/heading.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +21 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_preview.py +24 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/numbered_list.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +31 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/quote.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/synced_block.py +57 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table.py +63 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/table_of_contents.py +23 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/template.py +30 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/todo.py +42 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/unsupported.py +20 -0
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +22 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/__init__.py +106 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/checkbox.py +38 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +35 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/date.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/formula.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_time.py +34 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/multiselect.py +73 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +49 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/people.py +41 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/phone_number.py +36 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/relation.py +67 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +43 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +56 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/select.py +69 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/status.py +81 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/unique_id.py +50 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +37 -0
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +78 -0
- unstructured_ingest/v2/processes/connectors/notion/types/date.py +29 -0
- unstructured_ingest/v2/processes/connectors/notion/types/file.py +54 -0
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +45 -0
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +66 -0
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +189 -0
- unstructured_ingest/v2/processes/connectors/notion/types/user.py +79 -0
- unstructured_ingest/v2/processes/connectors/onedrive.py +447 -0
- unstructured_ingest/v2/processes/connectors/outlook.py +239 -0
- unstructured_ingest/v2/processes/connectors/pinecone.py +277 -0
- unstructured_ingest/v2/processes/connectors/qdrant/__init__.py +16 -0
- unstructured_ingest/v2/processes/connectors/qdrant/cloud.py +59 -0
- unstructured_ingest/v2/processes/connectors/qdrant/local.py +58 -0
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +160 -0
- unstructured_ingest/v2/processes/connectors/qdrant/server.py +60 -0
- unstructured_ingest/v2/processes/connectors/redisdb.py +182 -0
- unstructured_ingest/v2/processes/connectors/salesforce.py +303 -0
- unstructured_ingest/v2/processes/connectors/sharepoint.py +448 -0
- unstructured_ingest/v2/processes/connectors/slack.py +248 -0
- unstructured_ingest/v2/processes/connectors/sql/__init__.py +27 -0
- unstructured_ingest/v2/processes/connectors/sql/postgres.py +162 -0
- unstructured_ingest/v2/processes/connectors/sql/singlestore.py +166 -0
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py +210 -0
- unstructured_ingest/v2/processes/connectors/sql/sql.py +434 -0
- unstructured_ingest/v2/processes/connectors/sql/sqlite.py +168 -0
- unstructured_ingest/v2/processes/connectors/utils.py +29 -0
- unstructured_ingest/v2/processes/connectors/vectara.py +350 -0
- unstructured_ingest/v2/processes/connectors/weaviate/__init__.py +22 -0
- unstructured_ingest/v2/processes/connectors/weaviate/cloud.py +165 -0
- unstructured_ingest/v2/processes/connectors/weaviate/embedded.py +90 -0
- unstructured_ingest/v2/processes/connectors/weaviate/local.py +73 -0
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +267 -0
- unstructured_ingest/v2/processes/embedder.py +195 -0
- unstructured_ingest/v2/processes/filter.py +60 -0
- unstructured_ingest/v2/processes/partitioner.py +188 -0
- unstructured_ingest/v2/processes/uncompress.py +61 -0
- unstructured_ingest/v2/unstructured_api.py +128 -0
- unstructured_ingest/v2/utils.py +61 -0
- unstructured_ingest-0.3.13.dist-info/LICENSE.md +201 -0
- unstructured_ingest-0.3.13.dist-info/METADATA +205 -0
- unstructured_ingest-0.3.13.dist-info/RECORD +557 -0
- unstructured_ingest-0.3.13.dist-info/WHEEL +5 -0
- unstructured_ingest-0.3.13.dist-info/entry_points.txt +2 -0
- unstructured_ingest-0.3.13.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
4
|
+
|
|
5
|
+
from pydantic import Field, Secret
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.error import DestinationConnectionError
|
|
8
|
+
from unstructured_ingest.utils.data_prep import (
|
|
9
|
+
flatten_dict,
|
|
10
|
+
generator_batching_wbytes,
|
|
11
|
+
)
|
|
12
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
|
+
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
14
|
+
from unstructured_ingest.v2.interfaces import (
|
|
15
|
+
AccessConfig,
|
|
16
|
+
ConnectionConfig,
|
|
17
|
+
FileData,
|
|
18
|
+
Uploader,
|
|
19
|
+
UploaderConfig,
|
|
20
|
+
UploadStager,
|
|
21
|
+
UploadStagerConfig,
|
|
22
|
+
)
|
|
23
|
+
from unstructured_ingest.v2.logger import logger
|
|
24
|
+
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
25
|
+
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from pinecone import Index as PineconeIndex
|
|
29
|
+
from pinecone import Pinecone
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
CONNECTOR_TYPE = "pinecone"
|
|
33
|
+
MAX_PAYLOAD_SIZE = 2 * 1024 * 1024 # 2MB
|
|
34
|
+
MAX_POOL_THREADS = 100
|
|
35
|
+
MAX_METADATA_BYTES = 40960 # 40KB https://docs.pinecone.io/reference/quotas-and-limits#hard-limits
|
|
36
|
+
MAX_QUERY_RESULTS = 10000
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class PineconeAccessConfig(AccessConfig):
|
|
40
|
+
pinecone_api_key: Optional[str] = Field(
|
|
41
|
+
default=None, description="API key for Pinecone.", alias="api_key"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class PineconeConnectionConfig(ConnectionConfig):
|
|
46
|
+
index_name: str = Field(description="Name of the index to connect to.")
|
|
47
|
+
access_config: Secret[PineconeAccessConfig] = Field(
|
|
48
|
+
default=PineconeAccessConfig(), validate_default=True
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
@requires_dependencies(["pinecone"], extras="pinecone")
|
|
52
|
+
def get_client(self, **index_kwargs) -> "Pinecone":
|
|
53
|
+
from pinecone import Pinecone
|
|
54
|
+
|
|
55
|
+
from unstructured_ingest import __version__ as unstructured_version
|
|
56
|
+
|
|
57
|
+
return Pinecone(
|
|
58
|
+
api_key=self.access_config.get_secret_value().pinecone_api_key,
|
|
59
|
+
source_tag=f"unstructured_ingest=={unstructured_version}",
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def get_index(self, **index_kwargs) -> "PineconeIndex":
|
|
63
|
+
pc = self.get_client()
|
|
64
|
+
|
|
65
|
+
index = pc.Index(name=self.index_name, **index_kwargs)
|
|
66
|
+
logger.debug(f"connected to index: {pc.describe_index(self.index_name)}")
|
|
67
|
+
return index
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
ALLOWED_FIELDS = (
|
|
71
|
+
"element_id",
|
|
72
|
+
"text",
|
|
73
|
+
"parent_id",
|
|
74
|
+
"category_depth",
|
|
75
|
+
"emphasized_text_tags",
|
|
76
|
+
"emphasized_text_contents",
|
|
77
|
+
"coordinates",
|
|
78
|
+
"last_modified",
|
|
79
|
+
"page_number",
|
|
80
|
+
"filename",
|
|
81
|
+
"is_continuation",
|
|
82
|
+
"link_urls",
|
|
83
|
+
"link_texts",
|
|
84
|
+
"text_as_html",
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class PineconeUploadStagerConfig(UploadStagerConfig):
|
|
89
|
+
metadata_fields: list[str] = Field(
|
|
90
|
+
default=list(ALLOWED_FIELDS),
|
|
91
|
+
description=(
|
|
92
|
+
"which metadata from the source element to map to the payload metadata being sent to "
|
|
93
|
+
"Pinecone."
|
|
94
|
+
),
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class PineconeUploaderConfig(UploaderConfig):
|
|
99
|
+
batch_size: Optional[int] = Field(
|
|
100
|
+
default=None,
|
|
101
|
+
description="Optional number of records per batch. Will otherwise limit by size.",
|
|
102
|
+
)
|
|
103
|
+
pool_threads: Optional[int] = Field(
|
|
104
|
+
default=1, description="Optional limit on number of threads to use for upload"
|
|
105
|
+
)
|
|
106
|
+
namespace: Optional[str] = Field(
|
|
107
|
+
default=None,
|
|
108
|
+
description="The namespace to write to. If not specified, the default namespace is used",
|
|
109
|
+
)
|
|
110
|
+
record_id_key: str = Field(
|
|
111
|
+
default=RECORD_ID_LABEL,
|
|
112
|
+
description="searchable key to find entries for the same record on previous runs",
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@dataclass
|
|
117
|
+
class PineconeUploadStager(UploadStager):
|
|
118
|
+
upload_stager_config: PineconeUploadStagerConfig = field(
|
|
119
|
+
default_factory=lambda: PineconeUploadStagerConfig()
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
|
|
123
|
+
embeddings = element_dict.pop("embeddings", None)
|
|
124
|
+
metadata: dict[str, Any] = element_dict.pop("metadata", {})
|
|
125
|
+
data_source = metadata.pop("data_source", {})
|
|
126
|
+
coordinates = metadata.pop("coordinates", {})
|
|
127
|
+
pinecone_metadata = {}
|
|
128
|
+
for possible_meta in [element_dict, metadata, data_source, coordinates]:
|
|
129
|
+
pinecone_metadata.update(
|
|
130
|
+
{
|
|
131
|
+
k: v
|
|
132
|
+
for k, v in possible_meta.items()
|
|
133
|
+
if k in self.upload_stager_config.metadata_fields
|
|
134
|
+
}
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
metadata = flatten_dict(
|
|
138
|
+
pinecone_metadata,
|
|
139
|
+
separator="-",
|
|
140
|
+
flatten_lists=True,
|
|
141
|
+
remove_none=True,
|
|
142
|
+
)
|
|
143
|
+
metadata_size_bytes = len(json.dumps(metadata).encode())
|
|
144
|
+
if metadata_size_bytes > MAX_METADATA_BYTES:
|
|
145
|
+
logger.info(
|
|
146
|
+
f"Metadata size is {metadata_size_bytes} bytes, which exceeds the limit of"
|
|
147
|
+
f" {MAX_METADATA_BYTES} bytes per vector. Dropping the metadata."
|
|
148
|
+
)
|
|
149
|
+
metadata = {}
|
|
150
|
+
|
|
151
|
+
metadata[RECORD_ID_LABEL] = file_data.identifier
|
|
152
|
+
|
|
153
|
+
# To support more optimal deletes, a prefix is suggested for each record:
|
|
154
|
+
# https://docs.pinecone.io/guides/data/manage-rag-documents#delete-all-records-for-a-parent-document
|
|
155
|
+
return {
|
|
156
|
+
"id": f"{file_data.identifier}#{get_enhanced_element_id(element_dict=element_dict, file_data=file_data)}", # noqa:E501
|
|
157
|
+
"values": embeddings,
|
|
158
|
+
"metadata": metadata,
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@dataclass
|
|
163
|
+
class PineconeUploader(Uploader):
|
|
164
|
+
upload_config: PineconeUploaderConfig
|
|
165
|
+
connection_config: PineconeConnectionConfig
|
|
166
|
+
connector_type: str = CONNECTOR_TYPE
|
|
167
|
+
|
|
168
|
+
def precheck(self):
|
|
169
|
+
try:
|
|
170
|
+
self.connection_config.get_index()
|
|
171
|
+
except Exception as e:
|
|
172
|
+
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
173
|
+
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
174
|
+
|
|
175
|
+
def pod_delete_by_record_id(self, file_data: FileData) -> None:
|
|
176
|
+
logger.debug(
|
|
177
|
+
f"deleting any content with metadata "
|
|
178
|
+
f"{self.upload_config.record_id_key}={file_data.identifier} "
|
|
179
|
+
f"from pinecone pod index"
|
|
180
|
+
)
|
|
181
|
+
index = self.connection_config.get_index(pool_threads=MAX_POOL_THREADS)
|
|
182
|
+
delete_kwargs = {
|
|
183
|
+
"filter": {self.upload_config.record_id_key: {"$eq": file_data.identifier}}
|
|
184
|
+
}
|
|
185
|
+
if namespace := self.upload_config.namespace:
|
|
186
|
+
delete_kwargs["namespace"] = namespace
|
|
187
|
+
|
|
188
|
+
resp = index.delete(**delete_kwargs)
|
|
189
|
+
logger.debug(
|
|
190
|
+
f"deleted any content with metadata "
|
|
191
|
+
f"{self.upload_config.record_id_key}={file_data.identifier} "
|
|
192
|
+
f"from pinecone index: {resp}"
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
def serverless_delete_by_record_id(self, file_data: FileData) -> None:
|
|
196
|
+
logger.debug(
|
|
197
|
+
f"deleting any content with metadata "
|
|
198
|
+
f"{self.upload_config.record_id_key}={file_data.identifier} "
|
|
199
|
+
f"from pinecone serverless index"
|
|
200
|
+
)
|
|
201
|
+
index = self.connection_config.get_index(pool_threads=MAX_POOL_THREADS)
|
|
202
|
+
list_kwargs = {"prefix": f"{file_data.identifier}#"}
|
|
203
|
+
deleted_ids = 0
|
|
204
|
+
if namespace := self.upload_config.namespace:
|
|
205
|
+
list_kwargs["namespace"] = namespace
|
|
206
|
+
for ids in index.list(**list_kwargs):
|
|
207
|
+
deleted_ids += len(ids)
|
|
208
|
+
delete_kwargs = {"ids": ids}
|
|
209
|
+
if namespace := self.upload_config.namespace:
|
|
210
|
+
delete_resp = delete_kwargs["namespace"] = namespace
|
|
211
|
+
# delete_resp should be an empty dict if there were no errors
|
|
212
|
+
if delete_resp:
|
|
213
|
+
logger.error(f"failed to delete batch of ids: {delete_resp}")
|
|
214
|
+
index.delete(**delete_kwargs)
|
|
215
|
+
logger.info(
|
|
216
|
+
f"deleted {deleted_ids} records with metadata "
|
|
217
|
+
f"{self.upload_config.record_id_key}={file_data.identifier} "
|
|
218
|
+
f"from pinecone index"
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
@requires_dependencies(["pinecone"], extras="pinecone")
|
|
222
|
+
def upsert_batches_async(self, elements_dict: list[dict]):
|
|
223
|
+
from pinecone.exceptions import PineconeApiException
|
|
224
|
+
|
|
225
|
+
chunks = list(
|
|
226
|
+
generator_batching_wbytes(
|
|
227
|
+
iterable=elements_dict,
|
|
228
|
+
batch_size_limit_bytes=MAX_PAYLOAD_SIZE - 100,
|
|
229
|
+
max_batch_size=self.upload_config.batch_size,
|
|
230
|
+
)
|
|
231
|
+
)
|
|
232
|
+
logger.info(f"split doc with {len(elements_dict)} elements into {len(chunks)} batches")
|
|
233
|
+
|
|
234
|
+
max_pool_threads = min(len(chunks), MAX_POOL_THREADS)
|
|
235
|
+
if self.upload_config.pool_threads:
|
|
236
|
+
pool_threads = min(self.upload_config.pool_threads, max_pool_threads)
|
|
237
|
+
else:
|
|
238
|
+
pool_threads = max_pool_threads
|
|
239
|
+
index = self.connection_config.get_index(pool_threads=pool_threads)
|
|
240
|
+
with index:
|
|
241
|
+
upsert_kwargs = [{"vectors": chunk, "async_req": True} for chunk in chunks]
|
|
242
|
+
if namespace := self.upload_config.namespace:
|
|
243
|
+
for kwargs in upsert_kwargs:
|
|
244
|
+
kwargs["namespace"] = namespace
|
|
245
|
+
async_results = [index.upsert(**kwarg) for kwarg in upsert_kwargs]
|
|
246
|
+
# Wait for and retrieve responses (this raises in case of error)
|
|
247
|
+
try:
|
|
248
|
+
results = [async_result.get() for async_result in async_results]
|
|
249
|
+
except PineconeApiException as api_error:
|
|
250
|
+
raise DestinationConnectionError(f"http error: {api_error}") from api_error
|
|
251
|
+
logger.debug(f"results: {results}")
|
|
252
|
+
|
|
253
|
+
def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
|
|
254
|
+
logger.info(
|
|
255
|
+
f"writing a total of {len(data)} elements via"
|
|
256
|
+
f" document batches to destination"
|
|
257
|
+
f" index named {self.connection_config.index_name}"
|
|
258
|
+
)
|
|
259
|
+
# Determine if serverless or pod based index
|
|
260
|
+
pinecone_client = self.connection_config.get_client()
|
|
261
|
+
index_description = pinecone_client.describe_index(name=self.connection_config.index_name)
|
|
262
|
+
if "serverless" in index_description.get("spec"):
|
|
263
|
+
self.serverless_delete_by_record_id(file_data=file_data)
|
|
264
|
+
elif "pod" in index_description.get("spec"):
|
|
265
|
+
self.pod_delete_by_record_id(file_data=file_data)
|
|
266
|
+
else:
|
|
267
|
+
raise ValueError(f"unexpected spec type in index description: {index_description}")
|
|
268
|
+
self.upsert_batches_async(elements_dict=data)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
pinecone_destination_entry = DestinationRegistryEntry(
|
|
272
|
+
connection_config=PineconeConnectionConfig,
|
|
273
|
+
uploader=PineconeUploader,
|
|
274
|
+
uploader_config=PineconeUploaderConfig,
|
|
275
|
+
upload_stager=PineconeUploadStager,
|
|
276
|
+
upload_stager_config=PineconeUploadStagerConfig,
|
|
277
|
+
)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from unstructured_ingest.v2.processes.connector_registry import (
|
|
4
|
+
add_destination_entry,
|
|
5
|
+
)
|
|
6
|
+
|
|
7
|
+
from .cloud import CONNECTOR_TYPE as CLOUD_CONNECTOR_TYPE
|
|
8
|
+
from .cloud import qdrant_cloud_destination_entry
|
|
9
|
+
from .local import CONNECTOR_TYPE as LOCAL_CONNECTOR_TYPE
|
|
10
|
+
from .local import qdrant_local_destination_entry
|
|
11
|
+
from .server import CONNECTOR_TYPE as SERVER_CONNECTOR_TYPE
|
|
12
|
+
from .server import qdrant_server_destination_entry
|
|
13
|
+
|
|
14
|
+
add_destination_entry(destination_type=CLOUD_CONNECTOR_TYPE, entry=qdrant_cloud_destination_entry)
|
|
15
|
+
add_destination_entry(destination_type=SERVER_CONNECTOR_TYPE, entry=qdrant_server_destination_entry)
|
|
16
|
+
add_destination_entry(destination_type=LOCAL_CONNECTOR_TYPE, entry=qdrant_local_destination_entry)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from pydantic import Field, Secret
|
|
4
|
+
|
|
5
|
+
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
6
|
+
from unstructured_ingest.v2.processes.connectors.qdrant.qdrant import (
|
|
7
|
+
QdrantAccessConfig,
|
|
8
|
+
QdrantConnectionConfig,
|
|
9
|
+
QdrantUploader,
|
|
10
|
+
QdrantUploaderConfig,
|
|
11
|
+
QdrantUploadStager,
|
|
12
|
+
QdrantUploadStagerConfig,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
CONNECTOR_TYPE = "qdrant-cloud"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CloudQdrantAccessConfig(QdrantAccessConfig):
|
|
19
|
+
api_key: str = Field(description="Qdrant API key")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class CloudQdrantConnectionConfig(QdrantConnectionConfig):
|
|
23
|
+
url: str = Field(default=None, description="url of Qdrant Cloud")
|
|
24
|
+
access_config: Secret[CloudQdrantAccessConfig]
|
|
25
|
+
|
|
26
|
+
def get_client_kwargs(self) -> dict:
|
|
27
|
+
return {
|
|
28
|
+
"api_key": self.access_config.get_secret_value().api_key,
|
|
29
|
+
"url": self.url,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class CloudQdrantUploadStagerConfig(QdrantUploadStagerConfig):
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class CloudQdrantUploadStager(QdrantUploadStager):
|
|
39
|
+
upload_stager_config: CloudQdrantUploadStagerConfig
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class CloudQdrantUploaderConfig(QdrantUploaderConfig):
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class CloudQdrantUploader(QdrantUploader):
|
|
48
|
+
connection_config: CloudQdrantConnectionConfig
|
|
49
|
+
upload_config: CloudQdrantUploaderConfig
|
|
50
|
+
connector_type: str = CONNECTOR_TYPE
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
qdrant_cloud_destination_entry = DestinationRegistryEntry(
|
|
54
|
+
connection_config=CloudQdrantConnectionConfig,
|
|
55
|
+
uploader=CloudQdrantUploader,
|
|
56
|
+
uploader_config=CloudQdrantUploaderConfig,
|
|
57
|
+
upload_stager=CloudQdrantUploadStager,
|
|
58
|
+
upload_stager_config=CloudQdrantUploadStagerConfig,
|
|
59
|
+
)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from pydantic import Field, Secret
|
|
4
|
+
|
|
5
|
+
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
6
|
+
from unstructured_ingest.v2.processes.connectors.qdrant.qdrant import (
|
|
7
|
+
QdrantAccessConfig,
|
|
8
|
+
QdrantConnectionConfig,
|
|
9
|
+
QdrantUploader,
|
|
10
|
+
QdrantUploaderConfig,
|
|
11
|
+
QdrantUploadStager,
|
|
12
|
+
QdrantUploadStagerConfig,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
CONNECTOR_TYPE = "qdrant-local"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class LocalQdrantAccessConfig(QdrantAccessConfig):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class LocalQdrantConnectionConfig(QdrantConnectionConfig):
|
|
23
|
+
path: str = Field(default=None, description="Persistence path for QdrantLocal.")
|
|
24
|
+
access_config: Secret[LocalQdrantAccessConfig] = Field(
|
|
25
|
+
default_factory=LocalQdrantAccessConfig, validate_default=True
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
def get_client_kwargs(self) -> dict:
|
|
29
|
+
return {"path": self.path}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class LocalQdrantUploadStagerConfig(QdrantUploadStagerConfig):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class LocalQdrantUploadStager(QdrantUploadStager):
|
|
38
|
+
upload_stager_config: LocalQdrantUploadStagerConfig
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LocalQdrantUploaderConfig(QdrantUploaderConfig):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class LocalQdrantUploader(QdrantUploader):
|
|
47
|
+
connection_config: LocalQdrantConnectionConfig
|
|
48
|
+
upload_config: LocalQdrantUploaderConfig
|
|
49
|
+
connector_type: str = CONNECTOR_TYPE
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
qdrant_local_destination_entry = DestinationRegistryEntry(
|
|
53
|
+
connection_config=LocalQdrantConnectionConfig,
|
|
54
|
+
uploader=LocalQdrantUploader,
|
|
55
|
+
uploader_config=LocalQdrantUploaderConfig,
|
|
56
|
+
upload_stager=LocalQdrantUploadStager,
|
|
57
|
+
upload_stager_config=LocalQdrantUploadStagerConfig,
|
|
58
|
+
)
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from contextlib import asynccontextmanager, contextmanager
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import TYPE_CHECKING, Any, AsyncGenerator, Generator, Optional
|
|
7
|
+
|
|
8
|
+
from pydantic import Field, Secret
|
|
9
|
+
|
|
10
|
+
from unstructured_ingest.error import DestinationConnectionError, WriteError
|
|
11
|
+
from unstructured_ingest.utils.data_prep import batch_generator, flatten_dict
|
|
12
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
|
+
from unstructured_ingest.v2.interfaces import (
|
|
14
|
+
AccessConfig,
|
|
15
|
+
ConnectionConfig,
|
|
16
|
+
FileData,
|
|
17
|
+
Uploader,
|
|
18
|
+
UploaderConfig,
|
|
19
|
+
UploadStager,
|
|
20
|
+
UploadStagerConfig,
|
|
21
|
+
)
|
|
22
|
+
from unstructured_ingest.v2.logger import logger
|
|
23
|
+
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from qdrant_client import AsyncQdrantClient, QdrantClient
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class QdrantAccessConfig(AccessConfig, ABC):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class QdrantConnectionConfig(ConnectionConfig, ABC):
|
|
34
|
+
access_config: Secret[QdrantAccessConfig] = Field(
|
|
35
|
+
default_factory=QdrantAccessConfig, validate_default=True, description="Access Config"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def get_client_kwargs(self) -> dict:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
@requires_dependencies(["qdrant_client"], extras="qdrant")
|
|
43
|
+
@asynccontextmanager
|
|
44
|
+
async def get_async_client(self) -> AsyncGenerator["AsyncQdrantClient", None]:
|
|
45
|
+
from qdrant_client import AsyncQdrantClient
|
|
46
|
+
|
|
47
|
+
client_kwargs = self.get_client_kwargs()
|
|
48
|
+
client = AsyncQdrantClient(**client_kwargs)
|
|
49
|
+
try:
|
|
50
|
+
yield client
|
|
51
|
+
finally:
|
|
52
|
+
await client.close()
|
|
53
|
+
|
|
54
|
+
@requires_dependencies(["qdrant_client"], extras="qdrant")
|
|
55
|
+
@contextmanager
|
|
56
|
+
def get_client(self) -> Generator["QdrantClient", None, None]:
|
|
57
|
+
from qdrant_client import QdrantClient
|
|
58
|
+
|
|
59
|
+
client_kwargs = self.get_client_kwargs()
|
|
60
|
+
client = QdrantClient(**client_kwargs)
|
|
61
|
+
try:
|
|
62
|
+
yield client
|
|
63
|
+
finally:
|
|
64
|
+
client.close()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class QdrantUploadStagerConfig(UploadStagerConfig):
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class QdrantUploadStager(UploadStager, ABC):
|
|
73
|
+
upload_stager_config: QdrantUploadStagerConfig = field(
|
|
74
|
+
default_factory=lambda: QdrantUploadStagerConfig()
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
|
|
78
|
+
"""Prepares dictionary in the format that Chroma requires"""
|
|
79
|
+
data = element_dict.copy()
|
|
80
|
+
return {
|
|
81
|
+
"id": get_enhanced_element_id(element_dict=data, file_data=file_data),
|
|
82
|
+
"vector": data.pop("embeddings", {}),
|
|
83
|
+
"payload": {
|
|
84
|
+
"text": data.pop("text", None),
|
|
85
|
+
"element_serialized": json.dumps(data),
|
|
86
|
+
**flatten_dict(
|
|
87
|
+
data,
|
|
88
|
+
separator="-",
|
|
89
|
+
flatten_lists=True,
|
|
90
|
+
),
|
|
91
|
+
},
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class QdrantUploaderConfig(UploaderConfig):
|
|
96
|
+
collection_name: str = Field(description="Name of the collection.")
|
|
97
|
+
batch_size: int = Field(default=50, description="Number of records per batch.")
|
|
98
|
+
num_processes: Optional[int] = Field(
|
|
99
|
+
default=1,
|
|
100
|
+
description="Optional limit on number of threads to use for upload.",
|
|
101
|
+
deprecated=True,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass
|
|
106
|
+
class QdrantUploader(Uploader, ABC):
|
|
107
|
+
upload_config: QdrantUploaderConfig
|
|
108
|
+
connection_config: QdrantConnectionConfig
|
|
109
|
+
|
|
110
|
+
@DestinationConnectionError.wrap
|
|
111
|
+
def precheck(self) -> None:
|
|
112
|
+
with self.connection_config.get_client() as client:
|
|
113
|
+
collections_response = client.get_collections()
|
|
114
|
+
collection_names = [c.name for c in collections_response.collections]
|
|
115
|
+
if self.upload_config.collection_name not in collection_names:
|
|
116
|
+
raise DestinationConnectionError(
|
|
117
|
+
"collection '{}' not found: {}".format(
|
|
118
|
+
self.upload_config.collection_name, ", ".join(collection_names)
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def is_async(self):
|
|
123
|
+
return True
|
|
124
|
+
|
|
125
|
+
async def run_data_async(
|
|
126
|
+
self,
|
|
127
|
+
data: list[dict],
|
|
128
|
+
file_data: FileData,
|
|
129
|
+
**kwargs: Any,
|
|
130
|
+
) -> None:
|
|
131
|
+
batches = list(batch_generator(data, batch_size=self.upload_config.batch_size))
|
|
132
|
+
logger.debug(
|
|
133
|
+
"Elements split into %i batches of size %i.",
|
|
134
|
+
len(batches),
|
|
135
|
+
self.upload_config.batch_size,
|
|
136
|
+
)
|
|
137
|
+
await asyncio.gather(*[self._upsert_batch(batch) for batch in batches])
|
|
138
|
+
|
|
139
|
+
async def _upsert_batch(self, batch: list[dict]) -> None:
|
|
140
|
+
from qdrant_client import models
|
|
141
|
+
|
|
142
|
+
points: list[models.PointStruct] = [models.PointStruct(**item) for item in batch]
|
|
143
|
+
try:
|
|
144
|
+
logger.debug(
|
|
145
|
+
"Upserting %i points to the '%s' collection.",
|
|
146
|
+
len(points),
|
|
147
|
+
self.upload_config.collection_name,
|
|
148
|
+
)
|
|
149
|
+
async with self.connection_config.get_async_client() as async_client:
|
|
150
|
+
await async_client.upsert(
|
|
151
|
+
self.upload_config.collection_name, points=points, wait=True
|
|
152
|
+
)
|
|
153
|
+
except Exception as api_error:
|
|
154
|
+
logger.error(
|
|
155
|
+
"Failed to upsert points to the collection due to the following error %s", api_error
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
raise WriteError(f"Qdrant error: {api_error}") from api_error
|
|
159
|
+
|
|
160
|
+
logger.debug("Successfully upsert points to the collection.")
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from pydantic import Field, Secret
|
|
4
|
+
|
|
5
|
+
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
6
|
+
from unstructured_ingest.v2.processes.connectors.qdrant.qdrant import (
|
|
7
|
+
QdrantAccessConfig,
|
|
8
|
+
QdrantConnectionConfig,
|
|
9
|
+
QdrantUploader,
|
|
10
|
+
QdrantUploaderConfig,
|
|
11
|
+
QdrantUploadStager,
|
|
12
|
+
QdrantUploadStagerConfig,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
CONNECTOR_TYPE = "qdrant-server"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ServerQdrantAccessConfig(QdrantAccessConfig):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ServerQdrantConnectionConfig(QdrantConnectionConfig):
|
|
23
|
+
url: str = Field(default=None, description="url of Qdrant server")
|
|
24
|
+
access_config: Secret[ServerQdrantAccessConfig] = Field(
|
|
25
|
+
default_factory=ServerQdrantAccessConfig, validate_default=True
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
def get_client_kwargs(self) -> dict:
|
|
29
|
+
return {
|
|
30
|
+
"url": self.url,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ServerQdrantUploadStagerConfig(QdrantUploadStagerConfig):
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class ServerQdrantUploadStager(QdrantUploadStager):
|
|
40
|
+
upload_stager_config: ServerQdrantUploadStagerConfig
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ServerQdrantUploaderConfig(QdrantUploaderConfig):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class ServerQdrantUploader(QdrantUploader):
|
|
49
|
+
connection_config: ServerQdrantConnectionConfig
|
|
50
|
+
upload_config: ServerQdrantUploaderConfig
|
|
51
|
+
connector_type: str = CONNECTOR_TYPE
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
qdrant_server_destination_entry = DestinationRegistryEntry(
|
|
55
|
+
connection_config=ServerQdrantConnectionConfig,
|
|
56
|
+
uploader=ServerQdrantUploader,
|
|
57
|
+
uploader_config=ServerQdrantUploaderConfig,
|
|
58
|
+
upload_stager=ServerQdrantUploadStager,
|
|
59
|
+
upload_stager_config=ServerQdrantUploadStagerConfig,
|
|
60
|
+
)
|