PyPI - unstructured-ingest - Versions diffs - 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl - Mend

unstructured-ingest 0.3.10py3-none-any.whl → 0.3.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (53) hide show

test/integration/connectors/{databricks_tests → databricks}/test_volumes_native.py RENAMED Viewed

@@ -1,10 +1,10 @@
 import json
 import os
-import tempfile
 import uuid
 from contextlib import contextmanager
 from dataclasses import dataclass
 from pathlib import Path
+from unittest import mock
 import pytest
 from databricks.sdk import WorkspaceClient
@@ -31,11 +31,15 @@ from unstructured_ingest.v2.processes.connectors.databricks.volumes_native impor
 @dataclass
-class EnvData:
+class BaseEnvData:
     host: str
+    catalog: str
+@dataclass
+class BasicAuthEnvData(BaseEnvData):
     client_id: str
     client_secret: str
-    catalog: str
     def get_connection_config(self) -> DatabricksNativeVolumesConnectionConfig:
         return DatabricksNativeVolumesConnectionConfig(
@@ -47,8 +51,21 @@ class EnvData:
         )
-def get_env_data() -> EnvData:
-    return EnvData(
+@dataclass
+class PATEnvData(BaseEnvData):
+    token: str
+    def get_connection_config(self) -> DatabricksNativeVolumesConnectionConfig:
+        return DatabricksNativeVolumesConnectionConfig(
+            host=self.host,
+            access_config=DatabricksNativeVolumesAccessConfig(
+                token=self.token,
+            ),
+        )
+def get_basic_auth_env_data() -> BasicAuthEnvData:
+    return BasicAuthEnvData(
         host=os.environ["DATABRICKS_HOST"],
         client_id=os.environ["DATABRICKS_CLIENT_ID"],
         client_secret=os.environ["DATABRICKS_CLIENT_SECRET"],
@@ -56,23 +73,30 @@ def get_env_data() -> EnvData:
     )
+def get_pat_env_data() -> PATEnvData:
+    return PATEnvData(
+        host=os.environ["DATABRICKS_HOST"],
+        catalog=os.environ["DATABRICKS_CATALOG"],
+        token=os.environ["DATABRICKS_PAT"],
+    )
 @pytest.mark.asyncio
 @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
 @requires_env(
     "DATABRICKS_HOST", "DATABRICKS_CLIENT_ID", "DATABRICKS_CLIENT_SECRET", "DATABRICKS_CATALOG"
 )
-async def test_volumes_native_source():
-    env_data = get_env_data()
-    indexer_config = DatabricksNativeVolumesIndexerConfig(
-        recursive=True,
-        volume="test-platform",
-        volume_path="databricks-volumes-test-input",
-        catalog=env_data.catalog,
-    )
-    connection_config = env_data.get_connection_config()
-    with tempfile.TemporaryDirectory() as tempdir:
-        tempdir_path = Path(tempdir)
-        download_config = DatabricksNativeVolumesDownloaderConfig(download_dir=tempdir_path)
+async def test_volumes_native_source(tmp_path: Path):
+    env_data = get_basic_auth_env_data()
+    with mock.patch.dict(os.environ, clear=True):
+        indexer_config = DatabricksNativeVolumesIndexerConfig(
+            recursive=True,
+            volume="test-platform",
+            volume_path="databricks-volumes-test-input",
+            catalog=env_data.catalog,
+        )
+        connection_config = env_data.get_connection_config()
+        download_config = DatabricksNativeVolumesDownloaderConfig(download_dir=tmp_path)
         indexer = DatabricksNativeVolumesIndexer(
             connection_config=connection_config, index_config=indexer_config
         )
@@ -89,12 +113,44 @@ async def test_volumes_native_source():
         )
+@pytest.mark.asyncio
+@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
+@requires_env("DATABRICKS_HOST", "DATABRICKS_PAT", "DATABRICKS_CATALOG")
+async def test_volumes_native_source_pat(tmp_path: Path):
+    env_data = get_pat_env_data()
+    with mock.patch.dict(os.environ, clear=True):
+        indexer_config = DatabricksNativeVolumesIndexerConfig(
+            recursive=True,
+            volume="test-platform",
+            volume_path="databricks-volumes-test-input",
+            catalog=env_data.catalog,
+        )
+        connection_config = env_data.get_connection_config()
+        download_config = DatabricksNativeVolumesDownloaderConfig(download_dir=tmp_path)
+        indexer = DatabricksNativeVolumesIndexer(
+            connection_config=connection_config, index_config=indexer_config
+        )
+        downloader = DatabricksNativeVolumesDownloader(
+            connection_config=connection_config, download_config=download_config
+        )
+        await source_connector_validation(
+            indexer=indexer,
+            downloader=downloader,
+            configs=SourceValidationConfigs(
+                test_id="databricks_volumes_native_pat",
+                expected_num_files=1,
+            ),
+        )
 def _get_volume_path(catalog: str, volume: str, volume_path: str):
     return f"/Volumes/{catalog}/default/{volume}/{volume_path}"
 @contextmanager
-def databricks_destination_context(env_data: EnvData, volume: str, volume_path) -> WorkspaceClient:
+def databricks_destination_context(
+    env_data: BasicAuthEnvData, volume: str, volume_path
+) -> WorkspaceClient:
     client = WorkspaceClient(
         host=env_data.host, client_id=env_data.client_id, client_secret=env_data.client_secret
     )
@@ -137,7 +193,7 @@ def validate_upload(client: WorkspaceClient, catalog: str, volume: str, volume_p
     "DATABRICKS_HOST", "DATABRICKS_CLIENT_ID", "DATABRICKS_CLIENT_SECRET", "DATABRICKS_CATALOG"
 )
 async def test_volumes_native_destination(upload_file: Path):
-    env_data = get_env_data()
+    env_data = get_basic_auth_env_data()
     volume_path = f"databricks-volumes-test-output-{uuid.uuid4()}"
     file_data = FileData(
         source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),

test/integration/connectors/sql/test_postgres.py CHANGED Viewed

@@ -15,7 +15,7 @@ from test.integration.connectors.utils.validation.source import (
     SourceValidationConfigs,
     source_connector_validation,
 )
-from unstructured_ingest.v2.interfaces import FileData
+from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
 from unstructured_ingest.v2.processes.connectors.sql.postgres import (
     CONNECTOR_TYPE,
     PostgresAccessConfig,
@@ -119,7 +119,11 @@ def validate_destination(
 async def test_postgres_destination(upload_file: Path, temp_dir: Path):
     # the postgres destination connector doesn't leverage the file data but is required as an input,
     # mocking it with arbitrary values to meet the base requirements:
-    mock_file_data = FileData(identifier="mock file data", connector_type=CONNECTOR_TYPE)
+    mock_file_data = FileData(
+        identifier="mock file data",
+        connector_type=CONNECTOR_TYPE,
+        source_identifiers=SourceIdentifiers(filename=upload_file.name, fullpath=upload_file.name),
+    )
     with docker_compose_context(
         docker_compose_path=env_setup_path / "sql" / "postgres" / "destination"
     ):

test/integration/connectors/sql/test_singlestore.py CHANGED Viewed

@@ -15,7 +15,7 @@ from test.integration.connectors.utils.validation.source import (
     SourceValidationConfigs,
     source_connector_validation,
 )
-from unstructured_ingest.v2.interfaces import FileData
+from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
 from unstructured_ingest.v2.processes.connectors.sql.singlestore import (
     CONNECTOR_TYPE,
     SingleStoreAccessConfig,
@@ -103,7 +103,11 @@ def validate_destination(
 @pytest.mark.asyncio
 @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, "sql")
 async def test_singlestore_destination(upload_file: Path, temp_dir: Path):
-    mock_file_data = FileData(identifier="mock file data", connector_type=CONNECTOR_TYPE)
+    mock_file_data = FileData(
+        identifier="mock file data",
+        connector_type=CONNECTOR_TYPE,
+        source_identifiers=SourceIdentifiers(filename=upload_file.name, fullpath=upload_file.name),
+    )
     with docker_compose_context(
         docker_compose_path=env_setup_path / "sql" / "singlestore" / "destination"
     ):

test/integration/connectors/sql/test_snowflake.py CHANGED Viewed

@@ -17,7 +17,7 @@ from test.integration.connectors.utils.validation.source import (
     source_connector_validation,
 )
 from test.integration.utils import requires_env
-from unstructured_ingest.v2.interfaces import FileData
+from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
 from unstructured_ingest.v2.processes.connectors.sql.snowflake import (
     CONNECTOR_TYPE,
     SnowflakeAccessConfig,
@@ -170,7 +170,11 @@ async def test_snowflake_destination(
 ):
     # the postgres destination connector doesn't leverage the file data but is required as an input,
     # mocking it with arbitrary values to meet the base requirements:
-    mock_file_data = FileData(identifier="mock file data", connector_type=CONNECTOR_TYPE)
+    mock_file_data = FileData(
+        identifier="mock file data",
+        connector_type=CONNECTOR_TYPE,
+        source_identifiers=SourceIdentifiers(filename=upload_file.name, fullpath=upload_file.name),
+    )
     init_db_destination()
     stager = SnowflakeUploadStager()
     staged_path = stager.run(

test/integration/connectors/sql/test_sqlite.py CHANGED Viewed

@@ -15,7 +15,7 @@ from test.integration.connectors.utils.validation.source import (
     SourceValidationConfigs,
     source_connector_validation,
 )
-from unstructured_ingest.v2.interfaces import FileData
+from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
 from unstructured_ingest.v2.processes.connectors.sql.sqlite import (
     CONNECTOR_TYPE,
     SQLiteConnectionConfig,
@@ -116,7 +116,11 @@ async def test_sqlite_destination(
 ):
     # the sqlite destination connector doesn't leverage the file data but is required as an input,
     # mocking it with arbitrary values to meet the base requirements:
-    mock_file_data = FileData(identifier="mock file data", connector_type=CONNECTOR_TYPE)
+    mock_file_data = FileData(
+        identifier="mock file data",
+        connector_type=CONNECTOR_TYPE,
+        source_identifiers=SourceIdentifiers(filename=upload_file.name, fullpath=upload_file.name),
+    )
     stager = SQLiteUploadStager()
     staged_path = stager.run(
         elements_filepath=upload_file,

test/integration/connectors/test_milvus.py CHANGED Viewed

@@ -174,6 +174,19 @@ def test_precheck_fails_on_nonexistent_collection(collection: str):
         uploader.precheck()
+@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
+def test_precheck_fails_on_nonexisting_db(collection: str):
+    uploader = MilvusUploader(
+        connection_config=MilvusConnectionConfig(uri=DB_URI),
+        upload_config=MilvusUploaderConfig(db_name="nonexisting_db", collection_name=collection),
+    )
+    with pytest.raises(
+        DestinationConnectionError,
+        match="database not found",
+    ):
+        uploader.precheck()
 @pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
 def test_milvus_stager(
     request: TopRequest,

test/integration/connectors/test_onedrive.py CHANGED Viewed

@@ -20,6 +20,9 @@ from unstructured_ingest.v2.processes.connectors.onedrive import (
 @pytest.fixture
+@pytest.mark.xfail(
+    reason="Issues with test setup on the provider side."
+)  # TODO: remove line when issues are addressed
 def onedrive_test_folder() -> str:
     """
     Pytest fixture that creates a test folder in OneDrive and deletes it after test run.
@@ -66,6 +69,9 @@ def get_connection_config():
 @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
 @requires_env("MS_CLIENT_CRED", "MS_CLIENT_ID", "MS_TENANT_ID", "MS_USER_PNAME")
+@pytest.mark.xfail(
+    reason="Issues with test setup on the provider side."
+)  # TODO: remove line when issues are addressed
 def test_onedrive_destination(upload_file: Path, onedrive_test_folder: str):
     """
     Integration test for the OneDrive destination connector.

test/integration/connectors/test_redis.py ADDED Viewed

@@ -0,0 +1,119 @@
+import asyncio
+import json
+import os
+from pathlib import Path
+from typing import Optional
+import numpy as np
+import pytest
+from redis import exceptions as redis_exceptions
+from redis.asyncio import Redis, from_url
+from test.integration.connectors.utils.constants import DESTINATION_TAG
+from test.integration.utils import requires_env
+from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
+from unstructured_ingest.v2.processes.connectors.redisdb import (
+    CONNECTOR_TYPE as REDIS_CONNECTOR_TYPE,
+)
+from unstructured_ingest.v2.processes.connectors.redisdb import (
+    RedisAccessConfig,
+    RedisConnectionConfig,
+    RedisUploader,
+    RedisUploaderConfig,
+)
+async def delete_record(client: Redis, element_id: str) -> None:
+    await client.delete(element_id)
+async def validate_upload(client: Redis, first_element: dict):
+    element_id = first_element["element_id"]
+    expected_text = first_element["text"]
+    expected_embeddings = first_element["embeddings"]
+    async with client.pipeline(transaction=True) as pipe:
+        try:
+            response = await pipe.json().get(element_id, "$").execute()
+            response = response[0][0]
+        except redis_exceptions.ResponseError:
+            response = await pipe.get(element_id).execute()
+            response = json.loads(response[0])
+    embedding_similarity = np.linalg.norm(
+        np.array(response["embeddings"]) - np.array(expected_embeddings)
+    )
+    assert response is not None
+    assert response["element_id"] == element_id
+    assert response["text"] == expected_text
+    assert embedding_similarity < 1e-10
+async def redis_destination_test(
+    upload_file: Path,
+    tmp_path: Path,
+    connection_kwargs: dict,
+    uri: Optional[str] = None,
+    password: Optional[str] = None,
+):
+    uploader = RedisUploader(
+        connection_config=RedisConnectionConfig(
+            **connection_kwargs, access_config=RedisAccessConfig(uri=uri, password=password)
+        ),
+        upload_config=RedisUploaderConfig(batch_size=10),
+    )
+    file_data = FileData(
+        source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),
+        connector_type=REDIS_CONNECTOR_TYPE,
+        identifier="mock-file-data",
+    )
+    with upload_file.open() as upload_fp:
+        elements = json.load(upload_fp)
+    first_element = elements[0]
+    try:
+        if uploader.is_async():
+            await uploader.run_data_async(data=elements, file_data=file_data)
+        if uri:
+            async with from_url(uri) as client:
+                await validate_upload(client=client, first_element=first_element)
+        else:
+            async with Redis(**connection_kwargs, password=password) as client:
+                await validate_upload(client=client, first_element=first_element)
+    except Exception as e:
+        raise e
+    finally:
+        if uri:
+            async with from_url(uri) as client:
+                tasks = [delete_record(client, element["element_id"]) for element in elements]
+                await asyncio.gather(*tasks)
+        else:
+            async with Redis(**connection_kwargs, password=password) as client:
+                tasks = [delete_record(client, element["element_id"]) for element in elements]
+                await asyncio.gather(*tasks)
+@pytest.mark.asyncio
+@pytest.mark.tags(REDIS_CONNECTOR_TYPE, DESTINATION_TAG)
+@requires_env("AZURE_REDIS_INGEST_TEST_PASSWORD")
+async def test_redis_destination_azure_with_password(upload_file: Path, tmp_path: Path):
+    connection_kwargs = {
+        "host": "utic-dashboard-dev.redis.cache.windows.net",
+        "port": 6380,
+        "db": 0,
+        "ssl": True,
+    }
+    redis_pw = os.environ["AZURE_REDIS_INGEST_TEST_PASSWORD"]
+    await redis_destination_test(upload_file, tmp_path, connection_kwargs, password=redis_pw)
+@pytest.mark.asyncio
+@pytest.mark.tags(REDIS_CONNECTOR_TYPE, DESTINATION_TAG, "redis")
+@requires_env("AZURE_REDIS_INGEST_TEST_PASSWORD")
+async def test_redis_destination_azure_with_uri(upload_file: Path, tmp_path: Path):
+    connection_kwargs = {}
+    redis_pw = os.environ["AZURE_REDIS_INGEST_TEST_PASSWORD"]
+    uri = f"rediss://:{redis_pw}@utic-dashboard-dev.redis.cache.windows.net:6380/0"
+    await redis_destination_test(upload_file, tmp_path, connection_kwargs, uri=uri)

test/integration/connectors/test_vectara.py ADDED Viewed

@@ -0,0 +1,270 @@
+import json
+import os
+import time
+from pathlib import Path
+from typing import Generator
+from uuid import uuid4
+import pytest
+import requests
+from test.integration.connectors.utils.constants import DESTINATION_TAG
+from test.integration.utils import requires_env
+from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
+from unstructured_ingest.v2.logger import logger
+from unstructured_ingest.v2.processes.connectors.vectara import (
+    CONNECTOR_TYPE as VECTARA_CONNECTOR_TYPE,
+)
+from unstructured_ingest.v2.processes.connectors.vectara import (
+    VectaraAccessConfig,
+    VectaraConnectionConfig,
+    VectaraUploader,
+    VectaraUploaderConfig,
+    VectaraUploadStager,
+    VectaraUploadStagerConfig,
+)
+def validate_upload(response: dict, expected_data: dict):
+    element_id = expected_data["element_id"]
+    expected_text = expected_data["text"]
+    filename = expected_data["metadata"]["filename"]
+    filetype = expected_data["metadata"]["filetype"]
+    page_number = expected_data["metadata"]["page_number"]
+    response = response["search_results"][0]
+    assert response is not None
+    assert response["text"] == expected_text
+    assert response["part_metadata"]["element_id"] == element_id
+    assert response["part_metadata"]["filename"] == filename
+    assert response["part_metadata"]["filetype"] == filetype
+    assert response["part_metadata"]["page_number"] == page_number
+@requires_env("VECTARA_OAUTH_CLIENT_ID", "VECTARA_OAUTH_SECRET", "VECTARA_CUSTOMER_ID")
+def _get_jwt_token():
+    """Connect to the server and get a JWT token."""
+    customer_id = os.environ["VECTARA_CUSTOMER_ID"]
+    token_endpoint = (
+        f"https://vectara-prod-{customer_id}.auth.us-west-2.amazoncognito.com/oauth2/token"
+    )
+    headers = {
+        "Content-Type": "application/x-www-form-urlencoded",
+    }
+    data = {
+        "grant_type": "client_credentials",
+        "client_id": os.environ["VECTARA_OAUTH_CLIENT_ID"],
+        "client_secret": os.environ["VECTARA_OAUTH_SECRET"],
+    }
+    response = requests.post(token_endpoint, headers=headers, data=data)
+    response.raise_for_status()
+    response_json = response.json()
+    return response_json.get("access_token")
+def query_data(corpus_key: str, element_id: str) -> dict:
+    url = f"https://api.vectara.io/v2/corpora/{corpus_key}/query"
+    # the query below requires the corpus to have filter attributes for element_id
+    data = json.dumps(
+        {
+            "query": "string",
+            "search": {
+                "metadata_filter": f"part.element_id = '{element_id}'",
+                "lexical_interpolation": 1,
+                "limit": 10,
+            },
+        }
+    )
+    jwt_token = _get_jwt_token()
+    headers = {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+        "Authorization": f"Bearer {jwt_token}",
+        "X-source": "unstructured",
+    }
+    response = requests.post(url, headers=headers, data=data)
+    response.raise_for_status()
+    response_json = response.json()
+    return response_json
+def create_corpora(corpus_key: str, corpus_name: str) -> None:
+    url = "https://api.vectara.io/v2/corpora"
+    data = json.dumps({"key": corpus_key, "name": corpus_name, "description": "integration test"})
+    jwt_token = _get_jwt_token()
+    headers = {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+        "Authorization": f"Bearer {jwt_token}",
+        "X-source": "unstructured",
+    }
+    response = requests.post(url, headers=headers, data=data)
+    response.raise_for_status()
+def replace_filter_attributes(corpus_key: str) -> None:
+    url = f"https://api.vectara.io/v2/corpora/{corpus_key}/replace_filter_attributes"
+    data = json.dumps(
+        {
+            "filter_attributes": [
+                {"name": "element_id", "level": "part", "indexed": True, "type": "text"}
+            ]
+        }
+    )
+    jwt_token = _get_jwt_token()
+    headers = {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+        "Authorization": f"Bearer {jwt_token}",
+        "X-source": "unstructured",
+    }
+    response = requests.post(url, headers=headers, data=data)
+    response.raise_for_status()
+def delete_corpora(corpus_key: str) -> None:
+    url = f"https://api.vectara.io/v2/corpora/{corpus_key}"
+    jwt_token = _get_jwt_token()
+    headers = {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+        "Authorization": f"Bearer {jwt_token}",
+        "X-source": "unstructured",
+    }
+    response = requests.delete(url, headers=headers)
+    response.raise_for_status()
+def list_corpora() -> list:
+    url = "https://api.vectara.io/v2/corpora?limit=100"
+    jwt_token = _get_jwt_token()
+    headers = {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+        "Authorization": f"Bearer {jwt_token}",
+        "X-source": "unstructured",
+    }
+    response = requests.get(url, headers=headers)
+    response.raise_for_status()
+    response_json = response.json()
+    if response_json.get("corpora"):
+        return [item["key"] for item in response_json.get("corpora")]
+    else:
+        return []
+def wait_for_ready(corpus_key: str, timeout=60, interval=2) -> None:
+    def is_ready_status():
+        corpora_list = list_corpora()
+        return corpus_key in corpora_list
+    start = time.time()
+    is_ready = is_ready_status()
+    while not is_ready and time.time() - start < timeout:
+        time.sleep(interval)
+        is_ready = is_ready_status()
+    if not is_ready:
+        raise TimeoutError("time out waiting for corpus to be ready")
+def wait_for_delete(corpus_key: str, timeout=60, interval=2) -> None:
+    start = time.time()
+    while time.time() - start < timeout:
+        corpora_list = list_corpora()
+        if corpus_key not in corpora_list:
+            return
+        time.sleep(interval)
+    raise TimeoutError("time out waiting for corpus to delete")
+@pytest.fixture
+def corpora_util() -> Generator[str, None, None]:
+    random_id = str(uuid4()).split("-")[0]
+    corpus_key = f"ingest-test-{random_id}"
+    corpus_name = "ingest-test"
+    logger.info(f"Creating corpus with key: {corpus_key}")
+    try:
+        create_corpora(corpus_key, corpus_name)
+        replace_filter_attributes(corpus_key)
+        wait_for_ready(corpus_key=corpus_key)
+        yield corpus_key
+    except Exception as e:
+        logger.error(f"failed to create corpus {corpus_key}: {e}")
+    finally:
+        logger.info(f"deleting corpus: {corpus_key}")
+        delete_corpora(corpus_key)
+        wait_for_delete(corpus_key=corpus_key)
+@pytest.mark.asyncio
+@pytest.mark.tags(VECTARA_CONNECTOR_TYPE, DESTINATION_TAG, "vectara")
+@requires_env("VECTARA_OAUTH_CLIENT_ID", "VECTARA_OAUTH_SECRET", "VECTARA_CUSTOMER_ID")
+async def test_vectara_destination(
+    upload_file: Path, tmp_path: Path, corpora_util: str, retries=30, interval=10
+):
+    corpus_key = corpora_util
+    connection_kwargs = {
+        "customer_id": os.environ["VECTARA_CUSTOMER_ID"],
+        "corpus_key": corpus_key,
+    }
+    oauth_client_id = os.environ["VECTARA_OAUTH_CLIENT_ID"]
+    oauth_secret = os.environ["VECTARA_OAUTH_SECRET"]
+    file_data = FileData(
+        source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),
+        connector_type=VECTARA_CONNECTOR_TYPE,
+        identifier="mock-file-data",
+    )
+    stager_config = VectaraUploadStagerConfig(batch_size=10)
+    stager = VectaraUploadStager(upload_stager_config=stager_config)
+    new_upload_file = stager.run(
+        elements_filepath=upload_file,
+        output_dir=tmp_path,
+        output_filename=upload_file.name,
+        file_data=file_data,
+    )
+    uploader = VectaraUploader(
+        connection_config=VectaraConnectionConfig(
+            **connection_kwargs,
+            access_config=VectaraAccessConfig(
+                oauth_client_id=oauth_client_id, oauth_secret=oauth_secret
+            ),
+        ),
+        upload_config=VectaraUploaderConfig(),
+    )
+    with new_upload_file.open() as new_upload_fp:
+        elements_stager = json.load(new_upload_fp)
+    if uploader.is_async():
+        await uploader.run_data_async(data=elements_stager, file_data=file_data)
+    with upload_file.open() as upload_fp:
+        elements = json.load(upload_fp)
+    first_element = elements[0]
+    for i in range(retries):
+        response = query_data(corpus_key, first_element["element_id"])
+        if not response["search_results"]:
+            time.sleep(interval)
+        else:
+            break
+    validate_upload(response=response, expected_data=first_element)

unstructured-ingest 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.3.10py3-none-any.whl → 0.3.12py3-none-any.whl