unstructured-ingest 0.6.4__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- examples/airtable.py +44 -0
- examples/azure_cognitive_search.py +55 -0
- examples/chroma.py +54 -0
- examples/couchbase.py +55 -0
- examples/databricks_volumes_dest.py +55 -0
- examples/databricks_volumes_source.py +53 -0
- examples/delta_table.py +45 -0
- examples/discord_example.py +36 -0
- examples/elasticsearch.py +49 -0
- examples/google_drive.py +45 -0
- examples/kdbai.py +54 -0
- examples/local.py +36 -0
- examples/milvus.py +44 -0
- examples/mongodb.py +53 -0
- examples/opensearch.py +50 -0
- examples/pinecone.py +57 -0
- examples/s3.py +38 -0
- examples/salesforce.py +44 -0
- examples/sharepoint.py +47 -0
- examples/singlestore.py +49 -0
- examples/sql.py +90 -0
- examples/vectara.py +54 -0
- examples/weaviate.py +44 -0
- test/integration/chunkers/test_chunkers.py +1 -1
- test/integration/connectors/conftest.py +1 -1
- test/integration/connectors/databricks/test_volumes_native.py +3 -3
- test/integration/connectors/discord/test_discord.py +1 -1
- test/integration/connectors/duckdb/test_duckdb.py +2 -2
- test/integration/connectors/duckdb/test_motherduck.py +2 -2
- test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
- test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
- test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
- test/integration/connectors/sql/test_postgres.py +2 -2
- test/integration/connectors/sql/test_singlestore.py +2 -2
- test/integration/connectors/sql/test_snowflake.py +2 -2
- test/integration/connectors/sql/test_sqlite.py +2 -2
- test/integration/connectors/sql/test_vastdb.py +1 -1
- test/integration/connectors/test_astradb.py +2 -2
- test/integration/connectors/test_azure_ai_search.py +2 -2
- test/integration/connectors/test_chroma.py +2 -2
- test/integration/connectors/test_confluence.py +1 -1
- test/integration/connectors/test_delta_table.py +2 -2
- test/integration/connectors/test_dropbox.py +2 -2
- test/integration/connectors/test_github.py +1 -1
- test/integration/connectors/test_google_drive.py +2 -2
- test/integration/connectors/test_jira.py +1 -1
- test/integration/connectors/test_lancedb.py +7 -7
- test/integration/connectors/test_milvus.py +2 -2
- test/integration/connectors/test_mongodb.py +2 -2
- test/integration/connectors/test_neo4j.py +7 -7
- test/integration/connectors/test_notion.py +2 -2
- test/integration/connectors/test_onedrive.py +2 -2
- test/integration/connectors/test_pinecone.py +3 -3
- test/integration/connectors/test_qdrant.py +6 -6
- test/integration/connectors/test_redis.py +3 -3
- test/integration/connectors/test_s3.py +3 -3
- test/integration/connectors/test_sharepoint.py +1 -1
- test/integration/connectors/test_vectara.py +4 -4
- test/integration/connectors/test_zendesk.py +2 -2
- test/integration/connectors/utils/validation/destination.py +2 -2
- test/integration/connectors/utils/validation/source.py +2 -2
- test/integration/connectors/weaviate/test_cloud.py +1 -1
- test/integration/connectors/weaviate/test_local.py +2 -2
- test/integration/embedders/test_azure_openai.py +1 -1
- test/integration/embedders/test_bedrock.py +2 -2
- test/integration/embedders/test_huggingface.py +1 -1
- test/integration/embedders/test_mixedbread.py +1 -1
- test/integration/embedders/test_octoai.py +2 -2
- test/integration/embedders/test_openai.py +2 -2
- test/integration/embedders/test_togetherai.py +2 -2
- test/integration/embedders/test_vertexai.py +1 -1
- test/integration/embedders/test_voyageai.py +1 -1
- test/integration/partitioners/test_partitioner.py +2 -2
- test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
- test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
- test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
- test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
- test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
- test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
- test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
- test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
- test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
- test/unit/test_html.py +1 -1
- test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
- test/unit/test_utils.py +106 -97
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/__init__.py +0 -14
- unstructured_ingest/cli/base/__init__.py +4 -0
- unstructured_ingest/cli/base/cmd.py +259 -9
- unstructured_ingest/cli/base/dest.py +58 -61
- unstructured_ingest/cli/base/src.py +54 -36
- unstructured_ingest/cli/cli.py +4 -17
- unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
- unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
- unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
- unstructured_ingest/embed/bedrock.py +3 -3
- unstructured_ingest/embed/octoai.py +3 -3
- unstructured_ingest/embed/openai.py +3 -3
- unstructured_ingest/embed/togetherai.py +4 -4
- unstructured_ingest/embed/vertexai.py +1 -1
- unstructured_ingest/embed/voyageai.py +4 -4
- unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
- unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
- unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
- unstructured_ingest/{v2/otel.py → otel.py} +1 -1
- unstructured_ingest/pipeline/__init__.py +0 -22
- unstructured_ingest/pipeline/interfaces.py +179 -238
- unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
- unstructured_ingest/pipeline/pipeline.py +388 -97
- unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
- unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
- unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
- unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
- unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
- unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
- unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
- unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/github.py +10 -10
- unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
- unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
- unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
- unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
- unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
- unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
- unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
- unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +55 -27
- unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
- unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +8 -8
- unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
- unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +7 -7
- unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
- unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
- unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
- unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
- unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
- unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
- unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
- unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
- unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
- unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
- unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
- unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
- unstructured_ingest/utils/compression.py +1 -48
- unstructured_ingest/utils/data_prep.py +9 -1
- unstructured_ingest/utils/html.py +3 -3
- unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
- unstructured_ingest/utils/string_and_date_utils.py +1 -1
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/METADATA +98 -97
- unstructured_ingest-0.7.1.dist-info/RECORD +370 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/top_level.txt +1 -0
- test/unit/v2/test_utils.py +0 -82
- unstructured_ingest/cli/cmd_factory.py +0 -12
- unstructured_ingest/cli/cmds/__init__.py +0 -145
- unstructured_ingest/cli/cmds/airtable.py +0 -69
- unstructured_ingest/cli/cmds/astradb.py +0 -99
- unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
- unstructured_ingest/cli/cmds/biomed.py +0 -52
- unstructured_ingest/cli/cmds/chroma.py +0 -104
- unstructured_ingest/cli/cmds/clarifai.py +0 -71
- unstructured_ingest/cli/cmds/confluence.py +0 -69
- unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
- unstructured_ingest/cli/cmds/delta_table.py +0 -94
- unstructured_ingest/cli/cmds/discord.py +0 -47
- unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
- unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
- unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
- unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
- unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
- unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
- unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
- unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
- unstructured_ingest/cli/cmds/github.py +0 -54
- unstructured_ingest/cli/cmds/gitlab.py +0 -54
- unstructured_ingest/cli/cmds/google_drive.py +0 -49
- unstructured_ingest/cli/cmds/hubspot.py +0 -70
- unstructured_ingest/cli/cmds/jira.py +0 -71
- unstructured_ingest/cli/cmds/kafka.py +0 -102
- unstructured_ingest/cli/cmds/local.py +0 -43
- unstructured_ingest/cli/cmds/mongodb.py +0 -72
- unstructured_ingest/cli/cmds/notion.py +0 -48
- unstructured_ingest/cli/cmds/onedrive.py +0 -66
- unstructured_ingest/cli/cmds/opensearch.py +0 -117
- unstructured_ingest/cli/cmds/outlook.py +0 -67
- unstructured_ingest/cli/cmds/pinecone.py +0 -71
- unstructured_ingest/cli/cmds/qdrant.py +0 -124
- unstructured_ingest/cli/cmds/reddit.py +0 -67
- unstructured_ingest/cli/cmds/salesforce.py +0 -58
- unstructured_ingest/cli/cmds/sharepoint.py +0 -66
- unstructured_ingest/cli/cmds/slack.py +0 -56
- unstructured_ingest/cli/cmds/sql.py +0 -66
- unstructured_ingest/cli/cmds/vectara.py +0 -66
- unstructured_ingest/cli/cmds/weaviate.py +0 -98
- unstructured_ingest/cli/cmds/wikipedia.py +0 -40
- unstructured_ingest/cli/common.py +0 -7
- unstructured_ingest/cli/interfaces.py +0 -663
- unstructured_ingest/cli/utils.py +0 -205
- unstructured_ingest/connector/airtable.py +0 -309
- unstructured_ingest/connector/astradb.py +0 -267
- unstructured_ingest/connector/azure_ai_search.py +0 -144
- unstructured_ingest/connector/biomed.py +0 -320
- unstructured_ingest/connector/chroma.py +0 -158
- unstructured_ingest/connector/clarifai.py +0 -122
- unstructured_ingest/connector/confluence.py +0 -285
- unstructured_ingest/connector/databricks_volumes.py +0 -137
- unstructured_ingest/connector/delta_table.py +0 -203
- unstructured_ingest/connector/discord.py +0 -180
- unstructured_ingest/connector/elasticsearch.py +0 -396
- unstructured_ingest/connector/fsspec/azure.py +0 -78
- unstructured_ingest/connector/fsspec/box.py +0 -109
- unstructured_ingest/connector/fsspec/dropbox.py +0 -160
- unstructured_ingest/connector/fsspec/fsspec.py +0 -359
- unstructured_ingest/connector/fsspec/gcs.py +0 -82
- unstructured_ingest/connector/fsspec/s3.py +0 -62
- unstructured_ingest/connector/fsspec/sftp.py +0 -81
- unstructured_ingest/connector/git.py +0 -124
- unstructured_ingest/connector/github.py +0 -174
- unstructured_ingest/connector/gitlab.py +0 -142
- unstructured_ingest/connector/google_drive.py +0 -348
- unstructured_ingest/connector/hubspot.py +0 -278
- unstructured_ingest/connector/jira.py +0 -469
- unstructured_ingest/connector/kafka.py +0 -293
- unstructured_ingest/connector/local.py +0 -139
- unstructured_ingest/connector/mongodb.py +0 -284
- unstructured_ingest/connector/notion/client.py +0 -248
- unstructured_ingest/connector/notion/connector.py +0 -469
- unstructured_ingest/connector/notion/helpers.py +0 -584
- unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
- unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
- unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
- unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
- unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
- unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
- unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
- unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
- unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
- unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
- unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
- unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
- unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
- unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
- unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
- unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
- unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
- unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
- unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
- unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
- unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
- unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
- unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
- unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
- unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
- unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
- unstructured_ingest/connector/notion/types/date.py +0 -26
- unstructured_ingest/connector/notion/types/file.py +0 -51
- unstructured_ingest/connector/notion/types/user.py +0 -76
- unstructured_ingest/connector/onedrive.py +0 -232
- unstructured_ingest/connector/opensearch.py +0 -218
- unstructured_ingest/connector/outlook.py +0 -285
- unstructured_ingest/connector/pinecone.py +0 -150
- unstructured_ingest/connector/qdrant.py +0 -144
- unstructured_ingest/connector/reddit.py +0 -166
- unstructured_ingest/connector/registry.py +0 -109
- unstructured_ingest/connector/salesforce.py +0 -301
- unstructured_ingest/connector/sharepoint.py +0 -573
- unstructured_ingest/connector/slack.py +0 -224
- unstructured_ingest/connector/sql.py +0 -199
- unstructured_ingest/connector/vectara.py +0 -253
- unstructured_ingest/connector/weaviate.py +0 -190
- unstructured_ingest/connector/wikipedia.py +0 -208
- unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
- unstructured_ingest/enhanced_dataclass/core.py +0 -99
- unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
- unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
- unstructured_ingest/interfaces.py +0 -852
- unstructured_ingest/pipeline/copy.py +0 -19
- unstructured_ingest/pipeline/doc_factory.py +0 -12
- unstructured_ingest/pipeline/partition.py +0 -60
- unstructured_ingest/pipeline/permissions.py +0 -12
- unstructured_ingest/pipeline/reformat/chunking.py +0 -134
- unstructured_ingest/pipeline/reformat/embedding.py +0 -64
- unstructured_ingest/pipeline/source.py +0 -77
- unstructured_ingest/pipeline/utils.py +0 -6
- unstructured_ingest/pipeline/write.py +0 -18
- unstructured_ingest/processor.py +0 -93
- unstructured_ingest/runner/__init__.py +0 -104
- unstructured_ingest/runner/airtable.py +0 -35
- unstructured_ingest/runner/astradb.py +0 -34
- unstructured_ingest/runner/base_runner.py +0 -89
- unstructured_ingest/runner/biomed.py +0 -45
- unstructured_ingest/runner/confluence.py +0 -35
- unstructured_ingest/runner/delta_table.py +0 -34
- unstructured_ingest/runner/discord.py +0 -35
- unstructured_ingest/runner/elasticsearch.py +0 -40
- unstructured_ingest/runner/fsspec/azure.py +0 -30
- unstructured_ingest/runner/fsspec/box.py +0 -28
- unstructured_ingest/runner/fsspec/dropbox.py +0 -30
- unstructured_ingest/runner/fsspec/fsspec.py +0 -40
- unstructured_ingest/runner/fsspec/gcs.py +0 -28
- unstructured_ingest/runner/fsspec/s3.py +0 -28
- unstructured_ingest/runner/fsspec/sftp.py +0 -28
- unstructured_ingest/runner/github.py +0 -37
- unstructured_ingest/runner/gitlab.py +0 -37
- unstructured_ingest/runner/google_drive.py +0 -35
- unstructured_ingest/runner/hubspot.py +0 -35
- unstructured_ingest/runner/jira.py +0 -35
- unstructured_ingest/runner/kafka.py +0 -34
- unstructured_ingest/runner/local.py +0 -23
- unstructured_ingest/runner/mongodb.py +0 -34
- unstructured_ingest/runner/notion.py +0 -61
- unstructured_ingest/runner/onedrive.py +0 -35
- unstructured_ingest/runner/opensearch.py +0 -40
- unstructured_ingest/runner/outlook.py +0 -33
- unstructured_ingest/runner/reddit.py +0 -35
- unstructured_ingest/runner/salesforce.py +0 -33
- unstructured_ingest/runner/sharepoint.py +0 -35
- unstructured_ingest/runner/slack.py +0 -33
- unstructured_ingest/runner/utils.py +0 -47
- unstructured_ingest/runner/wikipedia.py +0 -35
- unstructured_ingest/runner/writers/__init__.py +0 -48
- unstructured_ingest/runner/writers/astradb.py +0 -22
- unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
- unstructured_ingest/runner/writers/base_writer.py +0 -26
- unstructured_ingest/runner/writers/chroma.py +0 -22
- unstructured_ingest/runner/writers/clarifai.py +0 -19
- unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
- unstructured_ingest/runner/writers/delta_table.py +0 -24
- unstructured_ingest/runner/writers/elasticsearch.py +0 -24
- unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
- unstructured_ingest/runner/writers/fsspec/box.py +0 -21
- unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
- unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
- unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
- unstructured_ingest/runner/writers/kafka.py +0 -21
- unstructured_ingest/runner/writers/mongodb.py +0 -21
- unstructured_ingest/runner/writers/opensearch.py +0 -26
- unstructured_ingest/runner/writers/pinecone.py +0 -21
- unstructured_ingest/runner/writers/qdrant.py +0 -19
- unstructured_ingest/runner/writers/sql.py +0 -22
- unstructured_ingest/runner/writers/vectara.py +0 -22
- unstructured_ingest/runner/writers/weaviate.py +0 -21
- unstructured_ingest/utils/google_filetype.py +0 -9
- unstructured_ingest/v2/__init__.py +0 -1
- unstructured_ingest/v2/cli/__init__.py +0 -0
- unstructured_ingest/v2/cli/base/__init__.py +0 -4
- unstructured_ingest/v2/cli/base/cmd.py +0 -269
- unstructured_ingest/v2/cli/base/dest.py +0 -85
- unstructured_ingest/v2/cli/base/src.py +0 -85
- unstructured_ingest/v2/cli/cli.py +0 -24
- unstructured_ingest/v2/cli/utils/__init__.py +0 -0
- unstructured_ingest/v2/logger.py +0 -126
- unstructured_ingest/v2/main.py +0 -11
- unstructured_ingest/v2/pipeline/__init__.py +0 -0
- unstructured_ingest/v2/pipeline/interfaces.py +0 -211
- unstructured_ingest/v2/pipeline/pipeline.py +0 -408
- unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
- unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
- unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
- unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
- unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
- unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
- unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
- unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
- unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
- unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
- unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
- unstructured_ingest/v2/processes/utils/__init__.py +0 -0
- unstructured_ingest/v2/types/__init__.py +0 -0
- unstructured_ingest-0.6.4.dist-info/RECORD +0 -591
- {test/unit/v2 → examples}/__init__.py +0 -0
- /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
- /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
- /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
- /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/__init__.py +0 -0
- /test/unit/{v2/utils → utils}/data_generator.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
- /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
- /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
- /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
- /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
- /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
- /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
- /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
- /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
- /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
- /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
- /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
- /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
- /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
- /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
- /unstructured_ingest/{v2 → utils}/constants.py +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/entry_points.txt +0 -0
|
@@ -14,8 +14,9 @@ from pytest_mock import MockerFixture
|
|
|
14
14
|
|
|
15
15
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, SQL_TAG, env_setup_path
|
|
16
16
|
from test.integration.utils import requires_env
|
|
17
|
-
from unstructured_ingest.
|
|
18
|
-
from unstructured_ingest.
|
|
17
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
18
|
+
from unstructured_ingest.logger import logger
|
|
19
|
+
from unstructured_ingest.processes.connectors.sql.databricks_delta_tables import (
|
|
19
20
|
CONNECTOR_TYPE,
|
|
20
21
|
DatabricksDeltaTablesAccessConfig,
|
|
21
22
|
DatabricksDeltaTablesConnectionConfig,
|
|
@@ -23,7 +24,6 @@ from unstructured_ingest.v2.processes.connectors.sql.databricks_delta_tables imp
|
|
|
23
24
|
DatabricksDeltaTablesUploaderConfig,
|
|
24
25
|
DatabricksDeltaTablesUploadStager,
|
|
25
26
|
)
|
|
26
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
27
27
|
|
|
28
28
|
CATALOG = "utic-dev-tech-fixtures"
|
|
29
29
|
|
|
@@ -20,7 +20,8 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
20
20
|
SourceValidationConfigs,
|
|
21
21
|
source_connector_validation,
|
|
22
22
|
)
|
|
23
|
-
from unstructured_ingest.
|
|
23
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
24
|
+
from unstructured_ingest.processes.connectors.sql.postgres import (
|
|
24
25
|
CONNECTOR_TYPE,
|
|
25
26
|
PostgresAccessConfig,
|
|
26
27
|
PostgresConnectionConfig,
|
|
@@ -31,7 +32,6 @@ from unstructured_ingest.v2.processes.connectors.sql.postgres import (
|
|
|
31
32
|
PostgresUploader,
|
|
32
33
|
PostgresUploadStager,
|
|
33
34
|
)
|
|
34
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
35
35
|
|
|
36
36
|
SEED_DATA_ROWS = 10
|
|
37
37
|
|
|
@@ -20,7 +20,8 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
20
20
|
SourceValidationConfigs,
|
|
21
21
|
source_connector_validation,
|
|
22
22
|
)
|
|
23
|
-
from unstructured_ingest.
|
|
23
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
24
|
+
from unstructured_ingest.processes.connectors.sql.singlestore import (
|
|
24
25
|
CONNECTOR_TYPE,
|
|
25
26
|
SingleStoreAccessConfig,
|
|
26
27
|
SingleStoreConnectionConfig,
|
|
@@ -32,7 +33,6 @@ from unstructured_ingest.v2.processes.connectors.sql.singlestore import (
|
|
|
32
33
|
SingleStoreUploaderConfig,
|
|
33
34
|
SingleStoreUploadStager,
|
|
34
35
|
)
|
|
35
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
36
36
|
|
|
37
37
|
SEED_DATA_ROWS = 10
|
|
38
38
|
|
|
@@ -22,7 +22,8 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
22
22
|
source_connector_validation,
|
|
23
23
|
)
|
|
24
24
|
from test.integration.utils import requires_env
|
|
25
|
-
from unstructured_ingest.
|
|
25
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
26
|
+
from unstructured_ingest.processes.connectors.sql.snowflake import (
|
|
26
27
|
CONNECTOR_TYPE,
|
|
27
28
|
SnowflakeAccessConfig,
|
|
28
29
|
SnowflakeConnectionConfig,
|
|
@@ -33,7 +34,6 @@ from unstructured_ingest.v2.processes.connectors.sql.snowflake import (
|
|
|
33
34
|
SnowflakeUploader,
|
|
34
35
|
SnowflakeUploadStager,
|
|
35
36
|
)
|
|
36
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
37
37
|
|
|
38
38
|
SEED_DATA_ROWS = 20
|
|
39
39
|
|
|
@@ -20,7 +20,8 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
20
20
|
SourceValidationConfigs,
|
|
21
21
|
source_connector_validation,
|
|
22
22
|
)
|
|
23
|
-
from unstructured_ingest.
|
|
23
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
24
|
+
from unstructured_ingest.processes.connectors.sql.sqlite import (
|
|
24
25
|
CONNECTOR_TYPE,
|
|
25
26
|
SQLiteConnectionConfig,
|
|
26
27
|
SQLiteDownloader,
|
|
@@ -30,7 +31,6 @@ from unstructured_ingest.v2.processes.connectors.sql.sqlite import (
|
|
|
30
31
|
SQLiteUploader,
|
|
31
32
|
SQLiteUploadStager,
|
|
32
33
|
)
|
|
33
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
34
34
|
|
|
35
35
|
SEED_DATA_ROWS = 10
|
|
36
36
|
|
|
@@ -8,7 +8,7 @@ from test.integration.connectors.utils.validation.destination import (
|
|
|
8
8
|
StagerValidationConfigs,
|
|
9
9
|
stager_validation,
|
|
10
10
|
)
|
|
11
|
-
from unstructured_ingest.
|
|
11
|
+
from unstructured_ingest.processes.connectors.sql.vastdb import (
|
|
12
12
|
CONNECTOR_TYPE,
|
|
13
13
|
VastdbUploadStager,
|
|
14
14
|
VastdbUploadStagerConfig,
|
|
@@ -20,7 +20,8 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
20
20
|
source_connector_validation,
|
|
21
21
|
)
|
|
22
22
|
from test.integration.utils import requires_env
|
|
23
|
-
from unstructured_ingest.
|
|
23
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
24
|
+
from unstructured_ingest.processes.connectors.astradb import (
|
|
24
25
|
CONNECTOR_TYPE,
|
|
25
26
|
AstraDBAccessConfig,
|
|
26
27
|
AstraDBConnectionConfig,
|
|
@@ -35,7 +36,6 @@ from unstructured_ingest.v2.processes.connectors.astradb import (
|
|
|
35
36
|
DestinationConnectionError,
|
|
36
37
|
SourceConnectionError,
|
|
37
38
|
)
|
|
38
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
39
39
|
|
|
40
40
|
EXISTENT_COLLECTION_NAME = "ingest_test_src"
|
|
41
41
|
NONEXISTENT_COLLECTION_NAME = "nonexistant"
|
|
@@ -29,7 +29,8 @@ from test.integration.connectors.utils.validation.destination import (
|
|
|
29
29
|
stager_validation,
|
|
30
30
|
)
|
|
31
31
|
from test.integration.utils import requires_env
|
|
32
|
-
from unstructured_ingest.
|
|
32
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
33
|
+
from unstructured_ingest.processes.connectors.azure_ai_search import (
|
|
33
34
|
CONNECTOR_TYPE,
|
|
34
35
|
RECORD_ID_LABEL,
|
|
35
36
|
AzureAISearchAccessConfig,
|
|
@@ -39,7 +40,6 @@ from unstructured_ingest.v2.processes.connectors.azure_ai_search import (
|
|
|
39
40
|
AzureAISearchUploadStager,
|
|
40
41
|
AzureAISearchUploadStagerConfig,
|
|
41
42
|
)
|
|
42
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
43
43
|
|
|
44
44
|
repo_path = Path(__file__).parent.resolve()
|
|
45
45
|
|
|
@@ -27,8 +27,8 @@ from test.integration.connectors.utils.validation.destination import (
|
|
|
27
27
|
StagerValidationConfigs,
|
|
28
28
|
stager_validation,
|
|
29
29
|
)
|
|
30
|
-
from unstructured_ingest.
|
|
31
|
-
from unstructured_ingest.
|
|
30
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
31
|
+
from unstructured_ingest.processes.connectors.chroma import (
|
|
32
32
|
CONNECTOR_TYPE,
|
|
33
33
|
ChromaConnectionConfig,
|
|
34
34
|
ChromaUploader,
|
|
@@ -8,7 +8,7 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
8
8
|
source_connector_validation,
|
|
9
9
|
)
|
|
10
10
|
from test.integration.utils import requires_env
|
|
11
|
-
from unstructured_ingest.
|
|
11
|
+
from unstructured_ingest.processes.connectors.confluence import (
|
|
12
12
|
CONNECTOR_TYPE,
|
|
13
13
|
ConfluenceAccessConfig,
|
|
14
14
|
ConfluenceConnectionConfig,
|
|
@@ -8,7 +8,8 @@ from fsspec import get_filesystem_class
|
|
|
8
8
|
|
|
9
9
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, SQL_TAG
|
|
10
10
|
from test.integration.utils import requires_env
|
|
11
|
-
from unstructured_ingest.
|
|
11
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
12
|
+
from unstructured_ingest.processes.connectors.delta_table import (
|
|
12
13
|
CONNECTOR_TYPE,
|
|
13
14
|
DeltaTableAccessConfig,
|
|
14
15
|
DeltaTableConnectionConfig,
|
|
@@ -17,7 +18,6 @@ from unstructured_ingest.v2.processes.connectors.delta_table import (
|
|
|
17
18
|
DeltaTableUploadStager,
|
|
18
19
|
DeltaTableUploadStagerConfig,
|
|
19
20
|
)
|
|
20
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
21
21
|
|
|
22
22
|
multiprocessing.set_start_method("spawn")
|
|
23
23
|
|
|
@@ -12,10 +12,10 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
12
12
|
source_connector_validation,
|
|
13
13
|
)
|
|
14
14
|
from test.integration.utils import requires_env
|
|
15
|
-
from unstructured_ingest.
|
|
15
|
+
from unstructured_ingest.processes.connectors.fsspec.dropbox import (
|
|
16
16
|
CONNECTOR_TYPE as DROPBOX_CONNECTOR_TYPE,
|
|
17
17
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
18
|
+
from unstructured_ingest.processes.connectors.fsspec.dropbox import (
|
|
19
19
|
DropboxAccessConfig,
|
|
20
20
|
DropboxConnectionConfig,
|
|
21
21
|
DropboxDownloader,
|
|
@@ -8,7 +8,7 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
8
8
|
source_connector_validation,
|
|
9
9
|
)
|
|
10
10
|
from test.integration.utils import requires_env
|
|
11
|
-
from unstructured_ingest.
|
|
11
|
+
from unstructured_ingest.processes.connectors.github import (
|
|
12
12
|
CONNECTOR_TYPE,
|
|
13
13
|
GithubAccessConfig,
|
|
14
14
|
GithubConnectionConfig,
|
|
@@ -18,8 +18,8 @@ from test.integration.utils import requires_env
|
|
|
18
18
|
from unstructured_ingest.error import (
|
|
19
19
|
SourceConnectionError,
|
|
20
20
|
)
|
|
21
|
-
from unstructured_ingest.
|
|
22
|
-
from unstructured_ingest.
|
|
21
|
+
from unstructured_ingest.interfaces import Downloader, Indexer
|
|
22
|
+
from unstructured_ingest.processes.connectors.google_drive import (
|
|
23
23
|
CONNECTOR_TYPE,
|
|
24
24
|
GoogleDriveAccessConfig,
|
|
25
25
|
GoogleDriveConnectionConfig,
|
|
@@ -8,7 +8,7 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
8
8
|
source_connector_validation,
|
|
9
9
|
)
|
|
10
10
|
from test.integration.utils import requires_env
|
|
11
|
-
from unstructured_ingest.
|
|
11
|
+
from unstructured_ingest.processes.connectors.jira import (
|
|
12
12
|
CONNECTOR_TYPE,
|
|
13
13
|
JiraAccessConfig,
|
|
14
14
|
JiraConnectionConfig,
|
|
@@ -12,33 +12,33 @@ from lancedb import AsyncConnection
|
|
|
12
12
|
from upath import UPath
|
|
13
13
|
|
|
14
14
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, VECTOR_DB_TAG
|
|
15
|
-
from unstructured_ingest.
|
|
16
|
-
from unstructured_ingest.
|
|
15
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
16
|
+
from unstructured_ingest.processes.connectors.lancedb.aws import (
|
|
17
17
|
LanceDBAwsAccessConfig,
|
|
18
18
|
LanceDBAwsConnectionConfig,
|
|
19
19
|
LanceDBAwsUploader,
|
|
20
20
|
)
|
|
21
|
-
from unstructured_ingest.
|
|
21
|
+
from unstructured_ingest.processes.connectors.lancedb.azure import (
|
|
22
22
|
LanceDBAzureAccessConfig,
|
|
23
23
|
LanceDBAzureConnectionConfig,
|
|
24
24
|
LanceDBAzureUploader,
|
|
25
25
|
)
|
|
26
|
-
from unstructured_ingest.
|
|
26
|
+
from unstructured_ingest.processes.connectors.lancedb.gcp import (
|
|
27
27
|
LanceDBGCSAccessConfig,
|
|
28
28
|
LanceDBGCSConnectionConfig,
|
|
29
29
|
LanceDBGSPUploader,
|
|
30
30
|
)
|
|
31
|
-
from unstructured_ingest.
|
|
31
|
+
from unstructured_ingest.processes.connectors.lancedb.lancedb import (
|
|
32
32
|
CONNECTOR_TYPE,
|
|
33
33
|
LanceDBUploaderConfig,
|
|
34
34
|
LanceDBUploadStager,
|
|
35
35
|
)
|
|
36
|
-
from unstructured_ingest.
|
|
36
|
+
from unstructured_ingest.processes.connectors.lancedb.local import (
|
|
37
37
|
LanceDBLocalAccessConfig,
|
|
38
38
|
LanceDBLocalConnectionConfig,
|
|
39
39
|
LanceDBLocalUploader,
|
|
40
40
|
)
|
|
41
|
-
from unstructured_ingest.
|
|
41
|
+
from unstructured_ingest.utils.constants import RECORD_ID_LABEL
|
|
42
42
|
|
|
43
43
|
DATABASE_NAME = "database"
|
|
44
44
|
TABLE_NAME = "elements"
|
|
@@ -24,15 +24,15 @@ from test.integration.connectors.utils.validation.destination import (
|
|
|
24
24
|
StagerValidationConfigs,
|
|
25
25
|
stager_validation,
|
|
26
26
|
)
|
|
27
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
27
28
|
from unstructured_ingest.error import DestinationConnectionError
|
|
28
|
-
from unstructured_ingest.
|
|
29
|
+
from unstructured_ingest.processes.connectors.milvus import (
|
|
29
30
|
CONNECTOR_TYPE,
|
|
30
31
|
MilvusConnectionConfig,
|
|
31
32
|
MilvusUploader,
|
|
32
33
|
MilvusUploaderConfig,
|
|
33
34
|
MilvusUploadStager,
|
|
34
35
|
)
|
|
35
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
36
36
|
|
|
37
37
|
DB_NAME = "test_database"
|
|
38
38
|
EXISTENT_COLLECTION_NAME = "test_collection"
|
|
@@ -19,8 +19,9 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
19
19
|
source_connector_validation,
|
|
20
20
|
)
|
|
21
21
|
from test.integration.utils import requires_env
|
|
22
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
22
23
|
from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
|
|
23
|
-
from unstructured_ingest.
|
|
24
|
+
from unstructured_ingest.processes.connectors.mongodb import (
|
|
24
25
|
CONNECTOR_TYPE,
|
|
25
26
|
MongoDBAccessConfig,
|
|
26
27
|
MongoDBConnectionConfig,
|
|
@@ -31,7 +32,6 @@ from unstructured_ingest.v2.processes.connectors.mongodb import (
|
|
|
31
32
|
MongoDBUploader,
|
|
32
33
|
MongoDBUploaderConfig,
|
|
33
34
|
)
|
|
34
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
35
35
|
|
|
36
36
|
SOURCE_COLLECTION = "sample-mongodb-data"
|
|
37
37
|
|
|
@@ -11,9 +11,13 @@ from pytest_check import check
|
|
|
11
11
|
|
|
12
12
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, GRAPH_DB_TAG
|
|
13
13
|
from test.integration.connectors.utils.docker import container_context
|
|
14
|
+
from unstructured_ingest.data_types.file_data import (
|
|
15
|
+
FileData,
|
|
16
|
+
FileDataSourceMetadata,
|
|
17
|
+
SourceIdentifiers,
|
|
18
|
+
)
|
|
14
19
|
from unstructured_ingest.error import DestinationConnectionError
|
|
15
|
-
from unstructured_ingest.
|
|
16
|
-
from unstructured_ingest.v2.processes.connectors.neo4j import (
|
|
20
|
+
from unstructured_ingest.processes.connectors.neo4j import (
|
|
17
21
|
CONNECTOR_TYPE,
|
|
18
22
|
Label,
|
|
19
23
|
Neo4jAccessConfig,
|
|
@@ -23,11 +27,7 @@ from unstructured_ingest.v2.processes.connectors.neo4j import (
|
|
|
23
27
|
Neo4jUploadStager,
|
|
24
28
|
Relationship,
|
|
25
29
|
)
|
|
26
|
-
from unstructured_ingest.
|
|
27
|
-
FileData,
|
|
28
|
-
FileDataSourceMetadata,
|
|
29
|
-
SourceIdentifiers,
|
|
30
|
-
)
|
|
30
|
+
from unstructured_ingest.utils.chunking import elements_from_base64_gzipped_json
|
|
31
31
|
|
|
32
32
|
USERNAME = "neo4j"
|
|
33
33
|
PASSWORD = "password"
|
|
@@ -9,8 +9,8 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
9
9
|
run_all_validations,
|
|
10
10
|
update_fixtures,
|
|
11
11
|
)
|
|
12
|
-
from unstructured_ingest.
|
|
13
|
-
from unstructured_ingest.
|
|
12
|
+
from unstructured_ingest.interfaces import Downloader, Indexer
|
|
13
|
+
from unstructured_ingest.processes.connectors.notion.connector import (
|
|
14
14
|
CONNECTOR_TYPE,
|
|
15
15
|
NotionAccessConfig,
|
|
16
16
|
NotionConnectionConfig,
|
|
@@ -15,7 +15,8 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
15
15
|
source_connector_validation,
|
|
16
16
|
)
|
|
17
17
|
from test.integration.utils import requires_env
|
|
18
|
-
from unstructured_ingest.
|
|
18
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
19
|
+
from unstructured_ingest.processes.connectors.onedrive import (
|
|
19
20
|
CONNECTOR_TYPE,
|
|
20
21
|
OnedriveAccessConfig,
|
|
21
22
|
OnedriveConnectionConfig,
|
|
@@ -26,7 +27,6 @@ from unstructured_ingest.v2.processes.connectors.onedrive import (
|
|
|
26
27
|
OnedriveUploader,
|
|
27
28
|
OnedriveUploaderConfig,
|
|
28
29
|
)
|
|
29
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
@pytest.fixture
|
|
@@ -18,9 +18,10 @@ from test.integration.connectors.utils.validation.destination import (
|
|
|
18
18
|
stager_validation,
|
|
19
19
|
)
|
|
20
20
|
from test.integration.utils import requires_env
|
|
21
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
21
22
|
from unstructured_ingest.error import DestinationConnectionError
|
|
22
|
-
from unstructured_ingest.
|
|
23
|
-
from unstructured_ingest.
|
|
23
|
+
from unstructured_ingest.logger import logger
|
|
24
|
+
from unstructured_ingest.processes.connectors.pinecone import (
|
|
24
25
|
CONNECTOR_TYPE,
|
|
25
26
|
MAX_QUERY_RESULTS,
|
|
26
27
|
PineconeAccessConfig,
|
|
@@ -30,7 +31,6 @@ from unstructured_ingest.v2.processes.connectors.pinecone import (
|
|
|
30
31
|
PineconeUploadStager,
|
|
31
32
|
PineconeUploadStagerConfig,
|
|
32
33
|
)
|
|
33
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
34
34
|
|
|
35
35
|
METADATA_BYTES_LIMIT = (
|
|
36
36
|
40960 # 40KB https://docs.pinecone.io/reference/quotas-and-limits#hard-limits
|
|
@@ -16,7 +16,8 @@ from test.integration.connectors.utils.validation.destination import (
|
|
|
16
16
|
stager_validation,
|
|
17
17
|
)
|
|
18
18
|
from test.integration.utils import requires_env
|
|
19
|
-
from unstructured_ingest.
|
|
19
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
20
|
+
from unstructured_ingest.processes.connectors.qdrant.cloud import (
|
|
20
21
|
CloudQdrantAccessConfig,
|
|
21
22
|
CloudQdrantConnectionConfig,
|
|
22
23
|
CloudQdrantUploader,
|
|
@@ -24,27 +25,26 @@ from unstructured_ingest.v2.processes.connectors.qdrant.cloud import (
|
|
|
24
25
|
CloudQdrantUploadStager,
|
|
25
26
|
CloudQdrantUploadStagerConfig,
|
|
26
27
|
)
|
|
27
|
-
from unstructured_ingest.
|
|
28
|
+
from unstructured_ingest.processes.connectors.qdrant.local import (
|
|
28
29
|
CONNECTOR_TYPE as LOCAL_CONNECTOR_TYPE,
|
|
29
30
|
)
|
|
30
|
-
from unstructured_ingest.
|
|
31
|
+
from unstructured_ingest.processes.connectors.qdrant.local import (
|
|
31
32
|
LocalQdrantConnectionConfig,
|
|
32
33
|
LocalQdrantUploader,
|
|
33
34
|
LocalQdrantUploaderConfig,
|
|
34
35
|
LocalQdrantUploadStager,
|
|
35
36
|
LocalQdrantUploadStagerConfig,
|
|
36
37
|
)
|
|
37
|
-
from unstructured_ingest.
|
|
38
|
+
from unstructured_ingest.processes.connectors.qdrant.server import (
|
|
38
39
|
CONNECTOR_TYPE as SERVER_CONNECTOR_TYPE,
|
|
39
40
|
)
|
|
40
|
-
from unstructured_ingest.
|
|
41
|
+
from unstructured_ingest.processes.connectors.qdrant.server import (
|
|
41
42
|
ServerQdrantConnectionConfig,
|
|
42
43
|
ServerQdrantUploader,
|
|
43
44
|
ServerQdrantUploaderConfig,
|
|
44
45
|
ServerQdrantUploadStager,
|
|
45
46
|
ServerQdrantUploadStagerConfig,
|
|
46
47
|
)
|
|
47
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
48
48
|
|
|
49
49
|
COLLECTION_NAME = f"test-coll-{uuid.uuid4().hex[:12]}"
|
|
50
50
|
VECTORS_CONFIG = {"size": 384, "distance": "Cosine"}
|
|
@@ -11,16 +11,16 @@ from redis.asyncio import Redis, from_url
|
|
|
11
11
|
|
|
12
12
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, NOSQL_TAG
|
|
13
13
|
from test.integration.utils import requires_env
|
|
14
|
-
from unstructured_ingest.
|
|
14
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
15
|
+
from unstructured_ingest.processes.connectors.redisdb import (
|
|
15
16
|
CONNECTOR_TYPE as REDIS_CONNECTOR_TYPE,
|
|
16
17
|
)
|
|
17
|
-
from unstructured_ingest.
|
|
18
|
+
from unstructured_ingest.processes.connectors.redisdb import (
|
|
18
19
|
RedisAccessConfig,
|
|
19
20
|
RedisConnectionConfig,
|
|
20
21
|
RedisUploader,
|
|
21
22
|
RedisUploaderConfig,
|
|
22
23
|
)
|
|
23
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
async def delete_record(client: Redis, element_id: str, key_prefix: str) -> None:
|
|
@@ -17,8 +17,9 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
17
17
|
source_connector_validation,
|
|
18
18
|
)
|
|
19
19
|
from test.integration.utils import requires_env
|
|
20
|
-
from unstructured_ingest.
|
|
21
|
-
from unstructured_ingest.
|
|
20
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
21
|
+
from unstructured_ingest.errors_v2 import UserAuthError, UserError
|
|
22
|
+
from unstructured_ingest.processes.connectors.fsspec.s3 import (
|
|
22
23
|
CONNECTOR_TYPE,
|
|
23
24
|
S3AccessConfig,
|
|
24
25
|
S3ConnectionConfig,
|
|
@@ -29,7 +30,6 @@ from unstructured_ingest.v2.processes.connectors.fsspec.s3 import (
|
|
|
29
30
|
S3Uploader,
|
|
30
31
|
S3UploaderConfig,
|
|
31
32
|
)
|
|
32
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
def validate_predownload_file_data(file_data: FileData):
|
|
@@ -8,7 +8,7 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
8
8
|
source_connector_validation,
|
|
9
9
|
)
|
|
10
10
|
from test.integration.utils import requires_env
|
|
11
|
-
from unstructured_ingest.
|
|
11
|
+
from unstructured_ingest.processes.connectors.sharepoint import (
|
|
12
12
|
CONNECTOR_TYPE,
|
|
13
13
|
SharepointAccessConfig,
|
|
14
14
|
SharepointConnectionConfig,
|
|
@@ -11,11 +11,12 @@ import requests
|
|
|
11
11
|
|
|
12
12
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, NOSQL_TAG
|
|
13
13
|
from test.integration.utils import requires_env
|
|
14
|
-
from unstructured_ingest.
|
|
15
|
-
from unstructured_ingest.
|
|
14
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
15
|
+
from unstructured_ingest.logger import logger
|
|
16
|
+
from unstructured_ingest.processes.connectors.vectara import (
|
|
16
17
|
CONNECTOR_TYPE as VECTARA_CONNECTOR_TYPE,
|
|
17
18
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
+
from unstructured_ingest.processes.connectors.vectara import (
|
|
19
20
|
VectaraAccessConfig,
|
|
20
21
|
VectaraConnectionConfig,
|
|
21
22
|
VectaraUploader,
|
|
@@ -23,7 +24,6 @@ from unstructured_ingest.v2.processes.connectors.vectara import (
|
|
|
23
24
|
VectaraUploadStager,
|
|
24
25
|
VectaraUploadStagerConfig,
|
|
25
26
|
)
|
|
26
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def validate_upload(document: dict, expected_data: dict):
|
|
@@ -9,8 +9,8 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
9
9
|
source_connector_validation,
|
|
10
10
|
)
|
|
11
11
|
from test.integration.utils import requires_env
|
|
12
|
-
from unstructured_ingest.
|
|
13
|
-
from unstructured_ingest.
|
|
12
|
+
from unstructured_ingest.errors_v2 import UserAuthError
|
|
13
|
+
from unstructured_ingest.processes.connectors.zendesk.zendesk import (
|
|
14
14
|
CONNECTOR_TYPE,
|
|
15
15
|
ZendeskAccessConfig,
|
|
16
16
|
ZendeskConnectionConfig,
|
|
@@ -3,9 +3,9 @@ import shutil
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
5
|
from test.integration.connectors.utils.validation.utils import ValidationConfig
|
|
6
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
7
|
+
from unstructured_ingest.interfaces import UploadStager
|
|
6
8
|
from unstructured_ingest.utils.data_prep import get_data
|
|
7
|
-
from unstructured_ingest.v2.interfaces import UploadStager
|
|
8
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class StagerValidationConfigs(ValidationConfig):
|
|
@@ -8,8 +8,8 @@ from deepdiff import DeepDiff
|
|
|
8
8
|
from pydantic import Field
|
|
9
9
|
|
|
10
10
|
from test.integration.connectors.utils.validation.utils import ValidationConfig
|
|
11
|
-
from unstructured_ingest.
|
|
12
|
-
from unstructured_ingest.
|
|
11
|
+
from unstructured_ingest.data_types.file_data import FileData
|
|
12
|
+
from unstructured_ingest.interfaces import Downloader, Indexer
|
|
13
13
|
|
|
14
14
|
NONSTANDARD_METADATA_FIELDS = {
|
|
15
15
|
"additional_metadata.@microsoft.graph.downloadUrl": [
|
|
@@ -2,7 +2,7 @@ import pytest
|
|
|
2
2
|
from pydantic import ValidationError
|
|
3
3
|
|
|
4
4
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, VECTOR_DB_TAG
|
|
5
|
-
from unstructured_ingest.
|
|
5
|
+
from unstructured_ingest.processes.connectors.weaviate.cloud import (
|
|
6
6
|
CONNECTOR_TYPE,
|
|
7
7
|
CloudWeaviateAccessConfig,
|
|
8
8
|
CloudWeaviateConnectionConfig,
|
|
@@ -9,14 +9,14 @@ from weaviate.client import WeaviateClient
|
|
|
9
9
|
|
|
10
10
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, VECTOR_DB_TAG
|
|
11
11
|
from test.integration.connectors.utils.docker import container_context
|
|
12
|
-
from unstructured_ingest.
|
|
12
|
+
from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
|
|
13
|
+
from unstructured_ingest.processes.connectors.weaviate.local import (
|
|
13
14
|
CONNECTOR_TYPE,
|
|
14
15
|
LocalWeaviateConnectionConfig,
|
|
15
16
|
LocalWeaviateUploader,
|
|
16
17
|
LocalWeaviateUploaderConfig,
|
|
17
18
|
LocalWeaviateUploadStager,
|
|
18
19
|
)
|
|
19
|
-
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
20
20
|
|
|
21
21
|
COLLECTION_NAME = "elements"
|
|
22
22
|
|
|
@@ -9,7 +9,7 @@ from unstructured_ingest.embed.azure_openai import (
|
|
|
9
9
|
AzureOpenAIEmbeddingConfig,
|
|
10
10
|
AzureOpenAIEmbeddingEncoder,
|
|
11
11
|
)
|
|
12
|
-
from unstructured_ingest.
|
|
12
|
+
from unstructured_ingest.processes.embedder import Embedder, EmbedderConfig
|
|
13
13
|
|
|
14
14
|
API_KEY = "AZURE_OPENAI_API_KEY"
|
|
15
15
|
ENDPOINT = "AZURE_OPENAI_ENDPOINT"
|
|
@@ -15,8 +15,8 @@ from unstructured_ingest.embed.bedrock import (
|
|
|
15
15
|
BedrockEmbeddingConfig,
|
|
16
16
|
BedrockEmbeddingEncoder,
|
|
17
17
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
-
from unstructured_ingest.
|
|
18
|
+
from unstructured_ingest.errors_v2 import UserAuthError, UserError
|
|
19
|
+
from unstructured_ingest.processes.embedder import Embedder, EmbedderConfig
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def get_aws_credentials() -> dict:
|
|
@@ -6,7 +6,7 @@ from unstructured_ingest.embed.huggingface import (
|
|
|
6
6
|
HuggingFaceEmbeddingConfig,
|
|
7
7
|
HuggingFaceEmbeddingEncoder,
|
|
8
8
|
)
|
|
9
|
-
from unstructured_ingest.
|
|
9
|
+
from unstructured_ingest.processes.embedder import Embedder, EmbedderConfig
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def test_huggingface_embedder(embedder_file: Path):
|
|
@@ -15,7 +15,7 @@ from unstructured_ingest.embed.mixedbreadai import (
|
|
|
15
15
|
MixedbreadAIEmbeddingConfig,
|
|
16
16
|
MixedbreadAIEmbeddingEncoder,
|
|
17
17
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
18
|
+
from unstructured_ingest.processes.embedder import Embedder, EmbedderConfig
|
|
19
19
|
|
|
20
20
|
API_KEY = "MXBAI_API_KEY"
|
|
21
21
|
|
|
@@ -15,8 +15,8 @@ from unstructured_ingest.embed.octoai import (
|
|
|
15
15
|
OctoAiEmbeddingConfig,
|
|
16
16
|
OctoAIEmbeddingEncoder,
|
|
17
17
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
-
from unstructured_ingest.
|
|
18
|
+
from unstructured_ingest.errors_v2 import UserAuthError
|
|
19
|
+
from unstructured_ingest.processes.embedder import Embedder, EmbedderConfig
|
|
20
20
|
|
|
21
21
|
API_KEY = "OCTOAI_API_KEY"
|
|
22
22
|
|
|
@@ -15,8 +15,8 @@ from unstructured_ingest.embed.openai import (
|
|
|
15
15
|
OpenAIEmbeddingConfig,
|
|
16
16
|
OpenAIEmbeddingEncoder,
|
|
17
17
|
)
|
|
18
|
-
from unstructured_ingest.
|
|
19
|
-
from unstructured_ingest.
|
|
18
|
+
from unstructured_ingest.errors_v2 import UserAuthError
|
|
19
|
+
from unstructured_ingest.processes.embedder import Embedder, EmbedderConfig
|
|
20
20
|
|
|
21
21
|
API_KEY = "OPENAI_API_KEY"
|
|
22
22
|
|