PyPI - unstructured-ingest - Versions diffs - 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl - Mend

unstructured-ingest 0.3.8py3-none-any.whl → 0.3.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (87) hide show

test/integration/connectors/test_chroma.py ADDED Viewed

@@ -0,0 +1,120 @@
+import json
+from pathlib import Path
+import chromadb
+import pytest
+from _pytest.fixtures import TopRequest
+from test.integration.connectors.utils.constants import (
+    DESTINATION_TAG,
+)
+from test.integration.connectors.utils.docker import HealthCheck, container_context
+from test.integration.connectors.utils.validation.destination import (
+    StagerValidationConfigs,
+    stager_validation,
+)
+from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
+from unstructured_ingest.v2.processes.connectors.chroma import (
+    CONNECTOR_TYPE,
+    ChromaConnectionConfig,
+    ChromaUploader,
+    ChromaUploaderConfig,
+    ChromaUploadStager,
+    ChromaUploadStagerConfig,
+)
+@pytest.fixture
+def chroma_instance():
+    with container_context(
+        image="chromadb/chroma:latest",
+        ports={8000: 8000},
+        name="chroma_int_test",
+        healthcheck=HealthCheck(
+            interval=5,
+            timeout=10,
+            retries=3,
+            test="timeout 10s bash -c ':> /dev/tcp/127.0.0.1/8000' || exit 1",
+        ),
+    ) as ctx:
+        yield ctx
+def validate_collection(collection_name: str, num_embeddings: int):
+    print(f"Checking contents of Chroma collection: {collection_name}")
+    chroma_client = chromadb.HttpClient(
+        host="localhost",
+        port="8000",
+        tenant="default_tenant",
+        database="default_database",
+    )
+    collection = chroma_client.get_or_create_collection(name=collection_name)
+    number_of_embeddings = collection.count()
+    expected_embeddings = num_embeddings
+    print(
+        f"# of embeddings in collection vs expected: {number_of_embeddings}/{expected_embeddings}"
+    )
+    assert number_of_embeddings == expected_embeddings, (
+        f"Number of rows in generated table ({number_of_embeddings}) "
+        f"doesn't match expected value: {expected_embeddings}"
+    )
+@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
+def test_chroma_destination(
+    upload_file: Path,
+    chroma_instance,
+    tmp_path: Path,
+):
+    collection_name = "test_collection"
+    file_data = FileData(
+        source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),
+        connector_type=CONNECTOR_TYPE,
+        identifier="mock file data",
+    )
+    stager = ChromaUploadStager(upload_stager_config=ChromaUploadStagerConfig())
+    uploader = ChromaUploader(
+        connection_config=ChromaConnectionConfig(
+            host="localhost",
+            port=8000,
+            tenant="default_tenant",
+            database="default_database",
+        ),
+        upload_config=ChromaUploaderConfig(collection_name=collection_name),
+    )
+    staged_filepath = stager.run(
+        elements_filepath=upload_file,
+        file_data=file_data,
+        output_dir=tmp_path,
+        output_filename=upload_file.name,
+    )
+    uploader.precheck()
+    uploader.run(path=staged_filepath, file_data=file_data)
+    # Run validation
+    with staged_filepath.open() as f:
+        staged_elements = json.load(f)
+    expected_count = len(staged_elements)
+    validate_collection(collection_name=collection_name, num_embeddings=expected_count)
+@pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
+@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, "stager")
+def test_chroma_stager(
+    request: TopRequest,
+    upload_file_str: str,
+    tmp_path: Path,
+):
+    upload_file: Path = request.getfixturevalue(upload_file_str)
+    stager = ChromaUploadStager()
+    stager_validation(
+        configs=StagerValidationConfigs(test_id=CONNECTOR_TYPE, expected_count=22),
+        input_file=upload_file,
+        stager=stager,
+        tmp_dir=tmp_path,
+    )

test/integration/connectors/test_confluence.py CHANGED Viewed

@@ -5,8 +5,8 @@ import pytest
 from test.integration.connectors.utils.constants import (
     SOURCE_TAG,
 )
-from test.integration.connectors.utils.validation import (
-    ValidationConfigs,
+from test.integration.connectors.utils.validation.source import (
+    SourceValidationConfigs,
     source_connector_validation,
 )
 from test.integration.utils import requires_env
@@ -60,7 +60,7 @@ async def test_confluence_source(temp_dir):
     await source_connector_validation(
         indexer=indexer,
         downloader=downloader,
-        configs=ValidationConfigs(
+        configs=SourceValidationConfigs(
             test_id="confluence",
             expected_num_files=11,
             validate_downloaded_files=True,
@@ -107,7 +107,7 @@ async def test_confluence_source_large(temp_dir):
     await source_connector_validation(
         indexer=indexer,
         downloader=downloader,
-        configs=ValidationConfigs(
+        configs=SourceValidationConfigs(
             test_id="confluence_large", expected_num_files=250, validate_file_data=False
         ),
     )

test/integration/connectors/test_delta_table.py CHANGED Viewed

@@ -114,6 +114,7 @@ async def test_delta_table_destination_s3(upload_file: Path, temp_dir: Path):
     )
     try:
+        uploader.precheck()
         if uploader.is_async():
             await uploader.run_async(path=new_upload_file, file_data=file_data)
         else:

test/integration/connectors/test_kafka.py CHANGED Viewed

@@ -14,8 +14,8 @@ from test.integration.connectors.utils.constants import (
     env_setup_path,
 )
 from test.integration.connectors.utils.docker_compose import docker_compose_context
-from test.integration.connectors.utils.validation import (
-    ValidationConfigs,
+from test.integration.connectors.utils.validation.source import (
+    SourceValidationConfigs,
     source_connector_validation,
 )
 from test.integration.utils import requires_env
@@ -121,8 +121,8 @@ async def test_kafka_source_local(kafka_seed_topic: str):
         await source_connector_validation(
             indexer=indexer,
             downloader=downloader,
-            configs=ValidationConfigs(
-                test_id="kafka", expected_num_files=5, validate_downloaded_files=True
+            configs=SourceValidationConfigs(
+                test_id="kafka-local", expected_num_files=5, validate_downloaded_files=True
             ),
         )
@@ -203,8 +203,8 @@ async def test_kafka_source_cloud(kafka_seed_topic_cloud: int):
         await source_connector_validation(
             indexer=indexer,
             downloader=downloader,
-            configs=ValidationConfigs(
-                test_id="kafka",
+            configs=SourceValidationConfigs(
+                test_id="kafka-cloud",
                 exclude_fields_extend=["connector_type"],
                 expected_num_files=expected_messages,
                 validate_downloaded_files=True,

test/integration/connectors/test_milvus.py CHANGED Viewed

@@ -4,6 +4,7 @@ from pathlib import Path
 import docker
 import pytest
+from _pytest.fixtures import TopRequest
 from pymilvus import (
     CollectionSchema,
     DataType,
@@ -15,6 +16,10 @@ from pymilvus.milvus_client import IndexParams
 from test.integration.connectors.utils.constants import DESTINATION_TAG, env_setup_path
 from test.integration.connectors.utils.docker import healthcheck_wait
 from test.integration.connectors.utils.docker_compose import docker_compose_context
+from test.integration.connectors.utils.validation.destination import (
+    StagerValidationConfigs,
+    stager_validation,
+)
 from unstructured_ingest.error import DestinationConnectionError
 from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
 from unstructured_ingest.v2.processes.connectors.milvus import (
@@ -167,3 +172,19 @@ def test_precheck_fails_on_nonexistent_collection(collection: str):
         match=f"Collection '{NONEXISTENT_COLLECTION_NAME}' does not exist",
     ):
         uploader.precheck()
+@pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
+def test_milvus_stager(
+    request: TopRequest,
+    upload_file_str: str,
+    tmp_path: Path,
+):
+    upload_file: Path = request.getfixturevalue(upload_file_str)
+    stager = MilvusUploadStager()
+    stager_validation(
+        configs=StagerValidationConfigs(test_id=CONNECTOR_TYPE, expected_count=22),
+        input_file=upload_file,
+        stager=stager,
+        tmp_dir=tmp_path,
+    )

test/integration/connectors/test_mongodb.py CHANGED Viewed

@@ -14,8 +14,8 @@ from pymongo.mongo_client import MongoClient
 from pymongo.operations import SearchIndexModel
 from test.integration.connectors.utils.constants import DESTINATION_TAG, SOURCE_TAG
-from test.integration.connectors.utils.validation import (
-    ValidationConfigs,
+from test.integration.connectors.utils.validation.source import (
+    SourceValidationConfigs,
     source_connector_validation,
 )
 from test.integration.utils import requires_env
@@ -196,8 +196,11 @@ async def test_mongodb_source(temp_dir: Path):
     await source_connector_validation(
         indexer=indexer,
         downloader=downloader,
-        configs=ValidationConfigs(
-            test_id=CONNECTOR_TYPE, expected_num_files=4, validate_downloaded_files=True
+        configs=SourceValidationConfigs(
+            test_id=CONNECTOR_TYPE,
+            expected_num_files=4,
+            validate_downloaded_files=True,
+            expected_number_indexed_file_data=1,
         ),
     )

test/integration/connectors/test_neo4j.py ADDED Viewed

@@ -0,0 +1,236 @@
+import json
+import time
+import uuid
+from datetime import datetime
+from pathlib import Path
+import pytest
+from neo4j import AsyncGraphDatabase, Driver, GraphDatabase
+from neo4j.exceptions import ServiceUnavailable
+from pytest_check import check
+from test.integration.connectors.utils.constants import DESTINATION_TAG
+from test.integration.connectors.utils.docker import container_context
+from unstructured_ingest.error import DestinationConnectionError
+from unstructured_ingest.utils.chunking import elements_from_base64_gzipped_json
+from unstructured_ingest.v2.interfaces.file_data import (
+    FileData,
+    FileDataSourceMetadata,
+    SourceIdentifiers,
+)
+from unstructured_ingest.v2.processes.connectors.neo4j import (
+    CONNECTOR_TYPE,
+    Label,
+    Neo4jAccessConfig,
+    Neo4jConnectionConfig,
+    Neo4jUploader,
+    Neo4jUploaderConfig,
+    Neo4jUploadStager,
+    Relationship,
+)
+USERNAME = "neo4j"
+PASSWORD = "password"
+URI = "neo4j://localhost:7687"
+DATABASE = "neo4j"
+EXPECTED_DOCUMENT_COUNT = 1
+# NOTE: Precheck tests are read-only so we utilize the same container for all tests.
+# If new tests require clean neo4j container, this fixture's scope should be adjusted.
+@pytest.fixture(autouse=True, scope="module")
+def _neo4j_server():
+    with container_context(
+        image="neo4j:latest", environment={"NEO4J_AUTH": "neo4j/password"}, ports={"7687": "7687"}
+    ):
+        driver = GraphDatabase.driver(uri=URI, auth=(USERNAME, PASSWORD))
+        wait_for_connection(driver)
+        driver.close()
+        yield
+@pytest.mark.asyncio
+@pytest.mark.tags(DESTINATION_TAG, CONNECTOR_TYPE)
+async def test_neo4j_destination(upload_file: Path, tmp_path: Path):
+    stager = Neo4jUploadStager()
+    uploader = Neo4jUploader(
+        connection_config=Neo4jConnectionConfig(
+            access_config=Neo4jAccessConfig(password=PASSWORD),  # type: ignore
+            username=USERNAME,
+            uri=URI,
+            database=DATABASE,
+        ),
+        upload_config=Neo4jUploaderConfig(),
+    )
+    file_data = FileData(
+        identifier="mock-file-data",
+        connector_type="neo4j",
+        source_identifiers=SourceIdentifiers(
+            filename=upload_file.name,
+            fullpath=upload_file.name,
+        ),
+        metadata=FileDataSourceMetadata(
+            date_created=str(datetime(2022, 1, 1).timestamp()),
+            date_modified=str(datetime(2022, 1, 2).timestamp()),
+        ),
+    )
+    staged_filepath = stager.run(
+        upload_file,
+        file_data=file_data,
+        output_dir=tmp_path,
+        output_filename=upload_file.name,
+    )
+    await uploader.run_async(staged_filepath, file_data)
+    await validate_uploaded_graph(upload_file)
+    modified_upload_file = tmp_path / f"modified-{upload_file.name}"
+    with open(upload_file) as file:
+        elements = json.load(file)
+        for element in elements:
+            element["element_id"] = str(uuid.uuid4())
+    with open(modified_upload_file, "w") as file:
+        json.dump(elements, file, indent=4)
+    staged_filepath = stager.run(
+        modified_upload_file,
+        file_data=file_data,
+        output_dir=tmp_path,
+        output_filename=modified_upload_file.name,
+    )
+    await uploader.run_async(staged_filepath, file_data)
+    await validate_uploaded_graph(modified_upload_file)
+@pytest.mark.tags(DESTINATION_TAG, CONNECTOR_TYPE)
+class TestPrecheck:
+    @pytest.fixture
+    def configured_uploader(self) -> Neo4jUploader:
+        return Neo4jUploader(
+            connection_config=Neo4jConnectionConfig(
+                access_config=Neo4jAccessConfig(password=PASSWORD),  # type: ignore
+                username=USERNAME,
+                uri=URI,
+                database=DATABASE,
+            ),
+            upload_config=Neo4jUploaderConfig(),
+        )
+    def test_succeeds(self, configured_uploader: Neo4jUploader):
+        configured_uploader.precheck()
+    def test_fails_on_invalid_password(self, configured_uploader: Neo4jUploader):
+        configured_uploader.connection_config.access_config.get_secret_value().password = (
+            "invalid-password"
+        )
+        with pytest.raises(
+            DestinationConnectionError,
+            match="{code: Neo.ClientError.Security.Unauthorized}",
+        ):
+            configured_uploader.precheck()
+    def test_fails_on_invalid_username(self, configured_uploader: Neo4jUploader):
+        configured_uploader.connection_config.username = "invalid-username"
+        with pytest.raises(
+            DestinationConnectionError, match="{code: Neo.ClientError.Security.Unauthorized}"
+        ):
+            configured_uploader.precheck()
+    @pytest.mark.parametrize(
+        ("uri", "expected_error_msg"),
+        [
+            ("neo4j://localhst:7687", "Cannot resolve address"),
+            ("neo4j://localhost:7777", "Unable to retrieve routing information"),
+        ],
+    )
+    def test_fails_on_invalid_uri(
+        self, configured_uploader: Neo4jUploader, uri: str, expected_error_msg: str
+    ):
+        configured_uploader.connection_config.uri = uri
+        with pytest.raises(DestinationConnectionError, match=expected_error_msg):
+            configured_uploader.precheck()
+    def test_fails_on_invalid_database(self, configured_uploader: Neo4jUploader):
+        configured_uploader.connection_config.database = "invalid-database"
+        with pytest.raises(
+            DestinationConnectionError, match="{code: Neo.ClientError.Database.DatabaseNotFound}"
+        ):
+            configured_uploader.precheck()
+def wait_for_connection(driver: Driver, retries: int = 10, delay_seconds: int = 2):
+    attempts = 0
+    while attempts < retries:
+        try:
+            driver.verify_connectivity()
+            return
+        except ServiceUnavailable:
+            time.sleep(delay_seconds)
+            attempts += 1
+    pytest.fail("Failed to connect with Neo4j server.")
+async def validate_uploaded_graph(upload_file: Path):
+    with open(upload_file) as file:
+        elements = json.load(file)
+    for element in elements:
+        if "orig_elements" in element["metadata"]:
+            element["metadata"]["orig_elements"] = elements_from_base64_gzipped_json(
+                element["metadata"]["orig_elements"]
+            )
+        else:
+            element["metadata"]["orig_elements"] = []
+    expected_chunks_count = len(elements)
+    expected_element_count = len(
+        {
+            origin_element["element_id"]
+            for chunk in elements
+            for origin_element in chunk["metadata"]["orig_elements"]
+        }
+    )
+    expected_nodes_count = expected_chunks_count + expected_element_count + EXPECTED_DOCUMENT_COUNT
+    driver = AsyncGraphDatabase.driver(uri=URI, auth=(USERNAME, PASSWORD))
+    try:
+        nodes_count = len((await driver.execute_query("MATCH (n) RETURN n"))[0])
+        chunk_nodes_count = len(
+            (await driver.execute_query(f"MATCH (n: {Label.CHUNK}) RETURN n"))[0]
+        )
+        document_nodes_count = len(
+            (await driver.execute_query(f"MATCH (n: {Label.DOCUMENT}) RETURN n"))[0]
+        )
+        element_nodes_count = len(
+            (await driver.execute_query(f"MATCH (n: {Label.UNSTRUCTURED_ELEMENT}) RETURN n"))[0]
+        )
+        with check:
+            assert nodes_count == expected_nodes_count
+        with check:
+            assert document_nodes_count == EXPECTED_DOCUMENT_COUNT
+        with check:
+            assert chunk_nodes_count == expected_chunks_count
+        with check:
+            assert element_nodes_count == expected_element_count
+        records, _, _ = await driver.execute_query(
+            f"MATCH ()-[r:{Relationship.PART_OF_DOCUMENT}]->(:{Label.DOCUMENT}) RETURN r"
+        )
+        part_of_document_count = len(records)
+        records, _, _ = await driver.execute_query(
+            f"MATCH (:{Label.CHUNK})-[r:{Relationship.NEXT_CHUNK}]->(:{Label.CHUNK}) RETURN r"
+        )
+        next_chunk_count = len(records)
+        if not check.any_failures():
+            with check:
+                assert part_of_document_count == expected_chunks_count + expected_element_count
+            with check:
+                assert next_chunk_count == expected_chunks_count - 1
+    finally:
+        await driver.close()

test/integration/connectors/test_pinecone.py CHANGED Viewed

@@ -8,12 +8,17 @@ from typing import Generator
 from uuid import uuid4
 import pytest
+from _pytest.fixtures import TopRequest
 from pinecone import Pinecone, ServerlessSpec
 from pinecone.core.openapi.shared.exceptions import NotFoundException
 from test.integration.connectors.utils.constants import (
     DESTINATION_TAG,
 )
+from test.integration.connectors.utils.validation.destination import (
+    StagerValidationConfigs,
+    stager_validation,
+)
 from test.integration.utils import requires_env
 from unstructured_ingest.error import DestinationConnectionError
 from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
@@ -251,7 +256,10 @@ def test_large_metadata(pinecone_index: str, tmp_path: Path, upload_file: Path):
         identifier="mock-file-data",
     )
     staged_file = stager.run(
-        file_data, large_metadata_upload_file, tmp_path, large_metadata_upload_file.name
+        elements_filepath=large_metadata_upload_file,
+        file_data=file_data,
+        output_dir=tmp_path,
+        output_filename=large_metadata_upload_file.name,
     )
     try:
         uploader.run(staged_file, file_data)
@@ -262,3 +270,19 @@ def test_large_metadata(pinecone_index: str, tmp_path: Path, upload_file: Path):
         raise pytest.fail("Upload request failed due to metadata exceeding limits.")
     validate_pinecone_index(pinecone_index, 1, interval=5)
+@pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
+def test_pinecone_stager(
+    request: TopRequest,
+    upload_file_str: str,
+    tmp_path: Path,
+):
+    upload_file: Path = request.getfixturevalue(upload_file_str)
+    stager = PineconeUploadStager()
+    stager_validation(
+        configs=StagerValidationConfigs(test_id=CONNECTOR_TYPE, expected_count=22),
+        input_file=upload_file,
+        stager=stager,
+        tmp_dir=tmp_path,
+    )

test/integration/connectors/test_qdrant.py CHANGED Viewed

@@ -6,10 +6,15 @@ from pathlib import Path
 from typing import AsyncGenerator
 import pytest
+from _pytest.fixtures import TopRequest
 from qdrant_client import AsyncQdrantClient
 from test.integration.connectors.utils.constants import DESTINATION_TAG
 from test.integration.connectors.utils.docker import container_context
+from test.integration.connectors.utils.validation.destination import (
+    StagerValidationConfigs,
+    stager_validation,
+)
 from test.integration.utils import requires_env
 from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
 from unstructured_ingest.v2.processes.connectors.qdrant.cloud import (
@@ -138,7 +143,7 @@ async def test_qdrant_destination_server(upload_file: Path, tmp_path: Path, dock
         output_dir=tmp_path,
         output_filename=upload_file.name,
     )
+    uploader.precheck()
     if uploader.is_async():
         await uploader.run_async(path=staged_upload_file, file_data=file_data)
     else:
@@ -183,10 +188,28 @@ async def test_qdrant_destination_cloud(upload_file: Path, tmp_path: Path):
         output_dir=tmp_path,
         output_filename=upload_file.name,
     )
+    uploader.precheck()
     if uploader.is_async():
         await uploader.run_async(path=staged_upload_file, file_data=file_data)
     else:
         uploader.run(path=staged_upload_file, file_data=file_data)
     async with qdrant_client(connection_kwargs) as client:
         await validate_upload(client=client, upload_file=upload_file)
+@pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
+def test_qdrant_stager(
+    request: TopRequest,
+    upload_file_str: str,
+    tmp_path: Path,
+):
+    upload_file: Path = request.getfixturevalue(upload_file_str)
+    stager = LocalQdrantUploadStager(
+        upload_stager_config=LocalQdrantUploadStagerConfig(),
+    )
+    stager_validation(
+        configs=StagerValidationConfigs(test_id=LOCAL_CONNECTOR_TYPE, expected_count=22),
+        input_file=upload_file,
+        stager=stager,
+        tmp_dir=tmp_path,
+    )

test/integration/connectors/test_s3.py CHANGED Viewed

@@ -11,8 +11,8 @@ from test.integration.connectors.utils.constants import (
     env_setup_path,
 )
 from test.integration.connectors.utils.docker_compose import docker_compose_context
-from test.integration.connectors.utils.validation import (
-    ValidationConfigs,
+from test.integration.connectors.utils.validation.source import (
+    SourceValidationConfigs,
     source_connector_validation,
 )
 from test.integration.utils import requires_env
@@ -62,7 +62,7 @@ async def test_s3_source(anon_connection_config: S3ConnectionConfig):
         await source_connector_validation(
             indexer=indexer,
             downloader=downloader,
-            configs=ValidationConfigs(
+            configs=SourceValidationConfigs(
                 test_id="s3",
                 predownload_file_data_check=validate_predownload_file_data,
                 postdownload_file_data_check=validate_postdownload_file_data,
@@ -85,7 +85,7 @@ async def test_s3_source_special_char(anon_connection_config: S3ConnectionConfig
         await source_connector_validation(
             indexer=indexer,
             downloader=downloader,
-            configs=ValidationConfigs(
+            configs=SourceValidationConfigs(
                 test_id="s3-specialchar",
                 predownload_file_data_check=validate_predownload_file_data,
                 postdownload_file_data_check=validate_postdownload_file_data,
@@ -121,7 +121,7 @@ async def test_s3_minio_source(anon_connection_config: S3ConnectionConfig):
             await source_connector_validation(
                 indexer=indexer,
                 downloader=downloader,
-                configs=ValidationConfigs(
+                configs=SourceValidationConfigs(
                     test_id="s3-minio",
                     predownload_file_data_check=validate_predownload_file_data,
                     postdownload_file_data_check=validate_postdownload_file_data,
@@ -165,11 +165,14 @@ async def test_s3_destination(upload_file: Path):
         identifier="mock file data",
     )
     try:
+        uploader.precheck()
         if uploader.is_async():
             await uploader.run_async(path=upload_file, file_data=file_data)
         else:
             uploader.run(path=upload_file, file_data=file_data)
-        uploaded_files = s3fs.ls(path=destination_path)
+        uploaded_files = [
+            Path(file) for file in s3fs.ls(path=destination_path) if Path(file).name != "_empty"
+        ]
         assert len(uploaded_files) == 1
     finally:
         s3fs.rm(path=destination_path, recursive=True)

test/integration/connectors/utils/docker.py CHANGED Viewed

@@ -44,6 +44,7 @@ def get_container(
     docker_client: docker.DockerClient,
     image: str,
     ports: dict,
+    name: Optional[str] = "connector_test",
     environment: Optional[dict] = None,
     volumes: Optional[dict] = None,
     healthcheck: Optional[HealthCheck] = None,
@@ -59,6 +60,8 @@ def get_container(
         run_kwargs["volumes"] = volumes
     if healthcheck:
         run_kwargs["healthcheck"] = healthcheck.model_dump()
+    if name:
+        run_kwargs["name"] = name
     container: Container = docker_client.containers.run(**run_kwargs)
     return container
@@ -112,6 +115,7 @@ def container_context(
     healthcheck: Optional[HealthCheck] = None,
     healthcheck_retries: int = 30,
     docker_client: Optional[docker.DockerClient] = None,
+    name: Optional[str] = "connector_test",
 ):
     docker_client = docker_client or docker.from_env()
     print(f"pulling image {image}")
@@ -125,6 +129,7 @@ def container_context(
             environment=environment,
             volumes=volumes,
             healthcheck=healthcheck,
+            name=name,
         )
         if healthcheck_data := get_healthcheck(container):
             # Mirror whatever healthcheck config set on container
@@ -143,3 +148,4 @@ def container_context(
     finally:
         if container:
             container.kill()
+            container.remove()

test/integration/connectors/utils/validation/__init__.py ADDED Viewed

File without changes

unstructured-ingest 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.3.8py3-none-any.whl → 0.3.10py3-none-any.whl