PyPI - unstructured-ingest - Versions diffs - 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

unstructured-ingest 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (18) hide show

test/integration/connectors/test_lancedb.py CHANGED Viewed

@@ -12,6 +12,7 @@ from lancedb import AsyncConnection
 from upath import UPath
 from test.integration.connectors.utils.constants import DESTINATION_TAG
+from unstructured_ingest.v2.constants import RECORD_ID_LABEL
 from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
 from unstructured_ingest.v2.processes.connectors.lancedb.aws import (
     LanceDBAwsAccessConfig,
@@ -43,7 +44,6 @@ DATABASE_NAME = "database"
 TABLE_NAME = "elements"
 DIMENSION = 384
 NUMBER_EXPECTED_ROWS = 22
-NUMBER_EXPECTED_COLUMNS = 10
 S3_BUCKET = "s3://utic-ingest-test-fixtures/"
 GS_BUCKET = "gs://utic-test-ingest-fixtures-output/"
 AZURE_BUCKET = "az://utic-ingest-test-fixtures-output/"
@@ -54,9 +54,9 @@ REQUIRED_ENV_VARS = {
     "local": (),
 }
 SCHEMA = pa.schema(
     [
+        pa.field(RECORD_ID_LABEL, pa.string()),
         pa.field("vector", pa.list_(pa.float16(), DIMENSION)),
         pa.field("text", pa.string(), nullable=True),
         pa.field("type", pa.string(), nullable=True),
@@ -69,6 +69,7 @@ SCHEMA = pa.schema(
         pa.field("metadata-page_number", pa.int32(), nullable=True),
     ]
 )
+NUMBER_EXPECTED_COLUMNS = len(SCHEMA.names)
 @pytest_asyncio.fixture
@@ -116,7 +117,7 @@ async def test_lancedb_destination(
     file_data = FileData(
         source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),
         connector_type=CONNECTOR_TYPE,
-        identifier="mock file data",
+        identifier="mock-file-data",
     )
     stager = LanceDBUploadStager()
     uploader = _get_uploader(uri)
@@ -129,17 +130,52 @@ async def test_lancedb_destination(
     await uploader.run_async(path=staged_file_path, file_data=file_data)
-    table = await connection.open_table(TABLE_NAME)
-    table_df: pd.DataFrame = await table.to_pandas()
+    # Test upload to empty table
+    with await connection.open_table(TABLE_NAME) as table:
+        table_df: pd.DataFrame = await table.to_pandas()
     assert len(table_df) == NUMBER_EXPECTED_ROWS
     assert len(table_df.columns) == NUMBER_EXPECTED_COLUMNS
+    assert table_df[RECORD_ID_LABEL][0] == file_data.identifier
     assert table_df["element_id"][0] == "2470d8dc42215b3d68413b55bf00fed2"
     assert table_df["type"][0] == "CompositeElement"
     assert table_df["metadata-filename"][0] == "DA-1p-with-duplicate-pages.pdf.json"
     assert table_df["metadata-text_as_html"][0] is None
+    # Test upload of the second file, rows should be appended
+    file_data.identifier = "mock-file-data-2"
+    staged_second_file_path = stager.run(
+        elements_filepath=upload_file,
+        file_data=file_data,
+        output_dir=tmp_path,
+        output_filename=f"{upload_file.stem}-2{upload_file.suffix}",
+    )
+    await uploader.run_async(path=staged_second_file_path, file_data=file_data)
+    with await connection.open_table(TABLE_NAME) as table:
+        appended_table_df: pd.DataFrame = await table.to_pandas()
+    assert len(appended_table_df) == 2 * NUMBER_EXPECTED_ROWS
+    # Test re-upload of the first file, rows should be overwritten, not appended
+    await uploader.run_async(path=staged_file_path, file_data=file_data)
+    with await connection.open_table(TABLE_NAME) as table:
+        overwritten_table_df: pd.DataFrame = await table.to_pandas()
+    assert len(overwritten_table_df) == 2 * NUMBER_EXPECTED_ROWS
+class TestPrecheck:
+    @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
+    @pytest.mark.parametrize("connection_with_uri", ["local", "s3", "gcs", "az"], indirect=True)
+    def test_succeeds(
+        self,
+        upload_file: Path,
+        connection_with_uri: tuple[AsyncConnection, str],
+        tmp_path: Path,
+    ) -> None:
+        _, uri = connection_with_uri
+        uploader = _get_uploader(uri)
+        uploader.precheck()
 def _get_uri(target: Literal["local", "s3", "gcs", "az"], local_base_path: Path) -> str:
     if target == "local":
@@ -158,11 +194,12 @@ def _get_uploader(
     uri: str,
 ) -> Union[LanceDBAzureUploader, LanceDBAzureUploader, LanceDBAwsUploader, LanceDBGSPUploader]:
     target = uri.split("://", maxsplit=1)[0] if uri.startswith(("s3", "az", "gs")) else "local"
+    upload_config = LanceDBUploaderConfig(table_name=TABLE_NAME)
     if target == "az":
         azure_connection_string = os.getenv("AZURE_DEST_CONNECTION_STR")
         access_config_kwargs = _parse_azure_connection_string(azure_connection_string)
         return LanceDBAzureUploader(
-            upload_config=LanceDBUploaderConfig(table_name=TABLE_NAME),
+            upload_config=upload_config,
             connection_config=LanceDBAzureConnectionConfig(
                 access_config=LanceDBAzureAccessConfig(**access_config_kwargs),
                 uri=uri,
@@ -171,7 +208,7 @@ def _get_uploader(
     elif target == "s3":
         return LanceDBAwsUploader(
-            upload_config=LanceDBUploaderConfig(table_name=TABLE_NAME),
+            upload_config=upload_config,
             connection_config=LanceDBAwsConnectionConfig(
                 access_config=LanceDBAwsAccessConfig(
                     aws_access_key_id=os.getenv("S3_INGEST_TEST_ACCESS_KEY"),
@@ -182,7 +219,7 @@ def _get_uploader(
         )
     elif target == "gs":
         return LanceDBGSPUploader(
-            upload_config=LanceDBUploaderConfig(table_name=TABLE_NAME),
+            upload_config=upload_config,
             connection_config=LanceDBGCSConnectionConfig(
                 access_config=LanceDBGCSAccessConfig(
                     google_service_account_key=os.getenv("GCP_INGEST_SERVICE_KEY")
@@ -192,7 +229,7 @@ def _get_uploader(
         )
     else:
         return LanceDBLocalUploader(
-            upload_config=LanceDBUploaderConfig(table_name=TABLE_NAME),
+            upload_config=upload_config,
             connection_config=LanceDBLocalConnectionConfig(
                 access_config=LanceDBLocalAccessConfig(),
                 uri=uri,

test/integration/connectors/test_pinecone.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
+import math
 import os
 import re
 import time
@@ -19,6 +20,7 @@ from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
 from unstructured_ingest.v2.logger import logger
 from unstructured_ingest.v2.processes.connectors.pinecone import (
     CONNECTOR_TYPE,
+    MAX_QUERY_RESULTS,
     PineconeAccessConfig,
     PineconeConnectionConfig,
     PineconeUploader,
@@ -118,7 +120,10 @@ def validate_pinecone_index(
             f"retry attempt {i}: expected {expected_num_of_vectors} != vector count {vector_count}"
         )
         time.sleep(interval)
-    assert vector_count == expected_num_of_vectors
+    assert vector_count == expected_num_of_vectors, (
+        f"vector count from index ({vector_count}) doesn't "
+        f"match expected number: {expected_num_of_vectors}"
+    )
 @requires_env(API_KEY)
@@ -147,10 +152,7 @@ async def test_pinecone_destination(pinecone_index: str, upload_file: Path, temp
     uploader = PineconeUploader(connection_config=connection_config, upload_config=upload_config)
     uploader.precheck()
-    if uploader.is_async():
-        await uploader.run_async(path=new_upload_file, file_data=file_data)
-    else:
-        uploader.run(path=new_upload_file, file_data=file_data)
+    uploader.run(path=new_upload_file, file_data=file_data)
     with new_upload_file.open() as f:
         staged_content = json.load(f)
     expected_num_of_vectors = len(staged_content)
@@ -160,10 +162,59 @@ async def test_pinecone_destination(pinecone_index: str, upload_file: Path, temp
     )
     # Rerun uploader and make sure no duplicates exist
-    if uploader.is_async():
-        await uploader.run_async(path=new_upload_file, file_data=file_data)
-    else:
-        uploader.run(path=new_upload_file, file_data=file_data)
+    uploader.run(path=new_upload_file, file_data=file_data)
+    logger.info("validating second upload")
+    validate_pinecone_index(
+        index_name=pinecone_index, expected_num_of_vectors=expected_num_of_vectors
+    )
+@requires_env(API_KEY)
+@pytest.mark.asyncio
+@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
+@pytest.mark.skip(reason="TODO: get this to work")
+async def test_pinecone_destination_large_index(
+    pinecone_index: str, upload_file: Path, temp_dir: Path
+):
+    new_file = temp_dir / "large_file.json"
+    with upload_file.open() as f:
+        upload_content = json.load(f)
+    min_entries = math.ceil((MAX_QUERY_RESULTS * 2) / len(upload_content))
+    new_content = (upload_content * min_entries)[: (2 * MAX_QUERY_RESULTS)]
+    print(f"Creating large index content with {len(new_content)} records")
+    with new_file.open("w") as f:
+        json.dump(new_content, f)
+    expected_num_of_vectors = len(new_content)
+    file_data = FileData(
+        source_identifiers=SourceIdentifiers(fullpath=new_file.name, filename=new_file.name),
+        connector_type=CONNECTOR_TYPE,
+        identifier="pinecone_mock_id",
+    )
+    connection_config = PineconeConnectionConfig(
+        index_name=pinecone_index,
+        access_config=PineconeAccessConfig(api_key=get_api_key()),
+    )
+    stager_config = PineconeUploadStagerConfig()
+    stager = PineconeUploadStager(upload_stager_config=stager_config)
+    new_upload_file = stager.run(
+        elements_filepath=new_file,
+        output_dir=temp_dir,
+        output_filename=new_file.name,
+        file_data=file_data,
+    )
+    upload_config = PineconeUploaderConfig()
+    uploader = PineconeUploader(connection_config=connection_config, upload_config=upload_config)
+    uploader.precheck()
+    uploader.run(path=new_upload_file, file_data=file_data)
+    validate_pinecone_index(
+        index_name=pinecone_index, expected_num_of_vectors=expected_num_of_vectors
+    )
+    # Rerun uploader and make sure no duplicates exist
+    uploader.run(path=new_upload_file, file_data=file_data)
     logger.info("validating second upload")
     validate_pinecone_index(
         index_name=pinecone_index, expected_num_of_vectors=expected_num_of_vectors

test/integration/embedders/test_azure_openai.py ADDED Viewed

@@ -0,0 +1,59 @@
+import json
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from test.integration.embedders.utils import validate_embedding_output, validate_raw_embedder
+from test.integration.utils import requires_env
+from unstructured_ingest.embed.azure_openai import (
+    AzureOpenAIEmbeddingConfig,
+    AzureOpenAIEmbeddingEncoder,
+)
+from unstructured_ingest.v2.processes.embedder import Embedder, EmbedderConfig
+API_KEY = "AZURE_OPENAI_API_KEY"
+ENDPOINT = "AZURE_OPENAI_ENDPOINT"
+@dataclass(frozen=True)
+class AzureData:
+    api_key: str
+    endpoint: str
+def get_azure_data() -> AzureData:
+    api_key = os.getenv(API_KEY, None)
+    assert api_key
+    endpoint = os.getenv(ENDPOINT, None)
+    assert endpoint
+    return AzureData(api_key, endpoint)
+@requires_env(API_KEY, ENDPOINT)
+def test_azure_openai_embedder(embedder_file: Path):
+    azure_data = get_azure_data()
+    embedder_config = EmbedderConfig(
+        embedding_provider="azure-openai",
+        embedding_api_key=azure_data.api_key,
+        embedding_azure_endpoint=azure_data.endpoint,
+    )
+    embedder = Embedder(config=embedder_config)
+    results = embedder.run(elements_filepath=embedder_file)
+    assert results
+    with embedder_file.open("r") as f:
+        original_elements = json.load(f)
+    validate_embedding_output(original_elements=original_elements, output_elements=results)
+@requires_env(API_KEY, ENDPOINT)
+def test_raw_azure_openai_embedder(embedder_file: Path):
+    azure_data = get_azure_data()
+    embedder = AzureOpenAIEmbeddingEncoder(
+        config=AzureOpenAIEmbeddingConfig(
+            api_key=azure_data.api_key,
+            azure_endpoint=azure_data.endpoint,
+        )
+    )
+    validate_raw_embedder(
+        embedder=embedder, embedder_file=embedder_file, expected_dimensions=(1536,)
+    )

unstructured_ingest/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.3.3" # pragma: no cover
1	+ __version__ = "0.3.5" # pragma: no cover

unstructured_ingest/embed/azure_openai.py ADDED Viewed

@@ -0,0 +1,31 @@
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+from pydantic import Field
+from unstructured_ingest.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
+from unstructured_ingest.utils.dep_check import requires_dependencies
+if TYPE_CHECKING:
+    from openai import AzureOpenAI
+class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
+    api_version: str = Field(description="Azure API version", default="2024-06-01")
+    azure_endpoint: str
+    embedder_model_name: str = Field(default="text-embedding-ada-002", alias="model_name")
+    @requires_dependencies(["openai"], extras="openai")
+    def get_client(self) -> "AzureOpenAI":
+        from openai import AzureOpenAI
+        return AzureOpenAI(
+            api_key=self.api_key.get_secret_value(),
+            api_version=self.api_version,
+            azure_endpoint=self.azure_endpoint,
+        )
+@dataclass
+class AzureOpenAIEmbeddingEncoder(OpenAIEmbeddingEncoder):
+    config: AzureOpenAIEmbeddingConfig

unstructured_ingest/v2/processes/connectors/couchbase.py CHANGED Viewed

@@ -219,6 +219,9 @@ class CouchbaseIndexer(Indexer):
 class CouchbaseDownloaderConfig(DownloaderConfig):
+    collection_id: str = Field(
+        default="id", description="The unique key of the id field in the collection"
+    )
     fields: list[str] = field(default_factory=list)
@@ -250,7 +253,7 @@ class CouchbaseDownloader(Downloader):
     def generate_download_response(
         self, result: dict, bucket: str, file_data: FileData
     ) -> DownloadResponse:
-        record_id = result["id"]
+        record_id = result[self.download_config.collection_id]
         filename_id = self.get_identifier(bucket=bucket, record_id=record_id)
         filename = f"{filename_id}.txt"
         download_path = self.download_dir / Path(filename)

unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py CHANGED Viewed

@@ -142,8 +142,6 @@ class ElasticsearchIndexer(Indexer):
     def precheck(self) -> None:
         try:
             with self.connection_config.get_client() as client:
-                if not client.ping():
-                    raise SourceConnectionError("cluster not detected")
                 indices = client.indices.get_alias(index="*")
                 if self.index_config.index_name not in indices:
                     raise SourceConnectionError(
@@ -393,11 +391,9 @@ class ElasticsearchUploader(Uploader):
     def precheck(self) -> None:
         try:
             with self.connection_config.get_client() as client:
-                if not client.ping():
-                    raise DestinationConnectionError("cluster not detected")
                 indices = client.indices.get_alias(index="*")
                 if self.upload_config.index_name not in indices:
-                    raise SourceConnectionError(
+                    raise DestinationConnectionError(
                         "index {} not found: {}".format(
                             self.upload_config.index_name, ", ".join(indices.keys())
                         )

unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py CHANGED Viewed

@@ -15,6 +15,7 @@ from unstructured_ingest.error import DestinationConnectionError
 from unstructured_ingest.logger import logger
 from unstructured_ingest.utils.data_prep import flatten_dict
 from unstructured_ingest.utils.dep_check import requires_dependencies
+from unstructured_ingest.v2.constants import RECORD_ID_LABEL
 from unstructured_ingest.v2.interfaces.connector import ConnectionConfig
 from unstructured_ingest.v2.interfaces.file_data import FileData
 from unstructured_ingest.v2.interfaces.upload_stager import UploadStager, UploadStagerConfig
@@ -84,7 +85,7 @@ class LanceDBUploadStager(UploadStager):
         df = pd.DataFrame(
             [
-                self._conform_element_contents(element_contents)
+                self._conform_element_contents(element_contents, file_data)
                 for element_contents in elements_contents
             ]
         )
@@ -94,9 +95,10 @@ class LanceDBUploadStager(UploadStager):
         return output_path
-    def _conform_element_contents(self, element: dict) -> dict:
+    def _conform_element_contents(self, element: dict, file_data: FileData) -> dict:
         return {
             "vector": element.pop("embeddings", None),
+            RECORD_ID_LABEL: file_data.identifier,
             **flatten_dict(element, separator="-"),
         }
@@ -134,6 +136,14 @@ class LanceDBUploader(Uploader):
         async with self.get_table() as table:
             schema = await table.schema()
             df = self._fit_to_schema(df, schema)
+            if RECORD_ID_LABEL not in schema.names:
+                logger.warning(
+                    f"Designated table doesn't contain {RECORD_ID_LABEL} column of type"
+                    " string which is required to support overwriting updates on subsequent"
+                    " uploads of the same record. New rows will be appended instead."
+                )
+            else:
+                await table.delete(f'{RECORD_ID_LABEL} = "{file_data.identifier}"')
             await table.add(data=df)
     def _fit_to_schema(self, df: pd.DataFrame, schema) -> pd.DataFrame:

unstructured_ingest/v2/processes/connectors/pinecone.py CHANGED Viewed

@@ -31,6 +31,7 @@ CONNECTOR_TYPE = "pinecone"
 MAX_PAYLOAD_SIZE = 2 * 1024 * 1024  # 2MB
 MAX_POOL_THREADS = 100
 MAX_METADATA_BYTES = 40960  # 40KB https://docs.pinecone.io/reference/quotas-and-limits#hard-limits
+MAX_QUERY_RESULTS = 10000
 class PineconeAccessConfig(AccessConfig):
@@ -84,7 +85,7 @@ ALLOWED_FIELDS = (
 class PineconeUploadStagerConfig(UploadStagerConfig):
     metadata_fields: list[str] = Field(
-        default=str(ALLOWED_FIELDS),
+        default=list(ALLOWED_FIELDS),
         description=(
             "which metadata from the source element to map to the payload metadata being sent to "
             "Pinecone."
@@ -137,7 +138,6 @@ class PineconeUploadStager(UploadStager):
             flatten_lists=True,
             remove_none=True,
         )
-        metadata[RECORD_ID_LABEL] = file_data.identifier
         metadata_size_bytes = len(json.dumps(metadata).encode())
         if metadata_size_bytes > MAX_METADATA_BYTES:
             logger.info(
@@ -146,6 +146,8 @@ class PineconeUploadStager(UploadStager):
             )
             metadata = {}
+        metadata[RECORD_ID_LABEL] = file_data.identifier
         return {
             "id": str(uuid.uuid4()),
             "values": embeddings,
@@ -213,6 +215,18 @@ class PineconeUploader(Uploader):
             f"from pinecone index: {resp}"
         )
+    def delete_by_query(self, index: "PineconeIndex", query_params: dict) -> None:
+        while True:
+            query_results = index.query(**query_params)
+            matches = query_results.get("matches", [])
+            if not matches:
+                break
+            ids = [match["id"] for match in matches]
+            delete_params = {"ids": ids}
+            if namespace := self.upload_config.namespace:
+                delete_params["namespace"] = namespace
+            index.delete(**delete_params)
     def serverless_delete_by_record_id(self, file_data: FileData) -> None:
         logger.debug(
             f"deleting any content with metadata "
@@ -221,29 +235,25 @@ class PineconeUploader(Uploader):
         )
         index = self.connection_config.get_index(pool_threads=MAX_POOL_THREADS)
         index_stats = index.describe_index_stats()
+        dimension = index_stats["dimension"]
         total_vectors = index_stats["total_vector_count"]
         if total_vectors == 0:
             return
-        dimension = index_stats["dimension"]
-        query_params = {
-            "filter": {self.upload_config.record_id_key: {"$eq": file_data.identifier}},
-            "vector": [0] * dimension,
-            "top_k": total_vectors,
-        }
-        if namespace := self.upload_config.namespace:
-            query_params["namespace"] = namespace
-        while True:
-            query_results = index.query(**query_params)
-            matches = query_results.get("matches", [])
-            if not matches:
-                break
-            ids = [match["id"] for match in matches]
-            delete_params = {"ids": ids}
+        while total_vectors > 0:
+            top_k = min(total_vectors, MAX_QUERY_RESULTS)
+            query_params = {
+                "filter": {self.upload_config.record_id_key: {"$eq": file_data.identifier}},
+                "vector": [0] * dimension,
+                "top_k": top_k,
+            }
             if namespace := self.upload_config.namespace:
-                delete_params["namespace"] = namespace
-            index.delete(**delete_params)
-        logger.debug(
-            f"deleted any content with metadata "
+                query_params["namespace"] = namespace
+            self.delete_by_query(index=index, query_params=query_params)
+            index_stats = index.describe_index_stats()
+            total_vectors = index_stats["total_vector_count"]
+        logger.info(
+            f"deleted {total_vectors} records with metadata "
             f"{self.upload_config.record_id_key}={file_data.identifier} "
             f"from pinecone index"
         )

unstructured_ingest/v2/processes/connectors/weaviate/__init__.py CHANGED Viewed

@@ -10,8 +10,6 @@ from .embedded import CONNECTOR_TYPE as EMBEDDED_WEAVIATE_CONNECTOR_TYPE
 from .embedded import weaviate_embedded_destination_entry
 from .local import CONNECTOR_TYPE as LOCAL_WEAVIATE_CONNECTOR_TYPE
 from .local import weaviate_local_destination_entry
-from .weaviate import CONNECTOR_TYPE as WEAVIATE_CONNECTOR_TYPE
-from .weaviate import weaviate_destination_entry
 add_destination_entry(
     destination_type=LOCAL_WEAVIATE_CONNECTOR_TYPE, entry=weaviate_local_destination_entry
@@ -22,4 +20,3 @@ add_destination_entry(
 add_destination_entry(
     destination_type=EMBEDDED_WEAVIATE_CONNECTOR_TYPE, entry=weaviate_embedded_destination_entry
 )
-add_destination_entry(destination_type=WEAVIATE_CONNECTOR_TYPE, entry=weaviate_destination_entry)

unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py CHANGED Viewed

@@ -22,7 +22,6 @@ from unstructured_ingest.v2.interfaces import (
     UploadStagerConfig,
 )
 from unstructured_ingest.v2.logger import logger
-from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
 if TYPE_CHECKING:
     from weaviate.classes.init import Timeout
@@ -288,12 +287,3 @@ class WeaviateUploader(Uploader, ABC):
                         vector=vector,
                     )
             self.check_for_errors(client=weaviate_client)
-weaviate_destination_entry = DestinationRegistryEntry(
-    connection_config=WeaviateConnectionConfig,
-    uploader=WeaviateUploader,
-    uploader_config=WeaviateUploaderConfig,
-    upload_stager=WeaviateUploadStager,
-    upload_stager_config=WeaviateUploadStagerConfig,
-)

unstructured_ingest/v2/processes/embedder.py CHANGED Viewed

@@ -16,6 +16,7 @@ class EmbedderConfig(BaseModel):
     embedding_provider: Optional[
         Literal[
             "openai",
+            "azure-openai",
             "huggingface",
             "aws-bedrock",
             "vertexai",
@@ -43,6 +44,14 @@ class EmbedderConfig(BaseModel):
     embedding_aws_region: Optional[str] = Field(
         default="us-west-2", description="AWS region used for AWS-based embedders, such as bedrock"
     )
+    embedding_azure_endpoint: Optional[str] = Field(
+        default=None,
+        description="Your Azure endpoint, including the resource, "
+        "e.g. `https://example-resource.azure.openai.com/`",
+    )
+    embedding_azure_api_version: Optional[str] = Field(
+        description="Azure API version", default=None
+    )
     def get_huggingface_embedder(self, embedding_kwargs: dict) -> "BaseEmbeddingEncoder":
         from unstructured_ingest.embed.huggingface import (
@@ -59,6 +68,25 @@ class EmbedderConfig(BaseModel):
         return OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig.model_validate(embedding_kwargs))
+    def get_azure_openai_embedder(self, embedding_kwargs: dict) -> "BaseEmbeddingEncoder":
+        from unstructured_ingest.embed.azure_openai import (
+            AzureOpenAIEmbeddingConfig,
+            AzureOpenAIEmbeddingEncoder,
+        )
+        config_kwargs = {
+            "api_key": self.embedding_api_key,
+            "azure_endpoint": self.embedding_azure_endpoint,
+        }
+        if api_version := self.embedding_azure_api_version:
+            config_kwargs["api_version"] = api_version
+        if model_name := self.embedding_model_name:
+            config_kwargs["model_name"] = model_name
+        return AzureOpenAIEmbeddingEncoder(
+            config=AzureOpenAIEmbeddingConfig.model_validate(config_kwargs)
+        )
     def get_octoai_embedder(self, embedding_kwargs: dict) -> "BaseEmbeddingEncoder":
         from unstructured_ingest.embed.octoai import OctoAiEmbeddingConfig, OctoAIEmbeddingEncoder
@@ -146,6 +174,8 @@ class EmbedderConfig(BaseModel):
             return self.get_mixedbread_embedder(embedding_kwargs=kwargs)
         if self.embedding_provider == "togetherai":
             return self.get_togetherai_embedder(embedding_kwargs=kwargs)
+        if self.embedding_provider == "azure-openai":
+            return self.get_azure_openai_embedder(embedding_kwargs=kwargs)
         raise ValueError(f"{self.embedding_provider} not a recognized encoder")

{unstructured_ingest-0.3.3.dist-info → unstructured_ingest-0.3.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: unstructured-ingest
-Version: 0.3.3
+Version: 0.3.5
 Summary: A library that prepares raw documents for downstream ML tasks.
 Home-page: https://github.com/Unstructured-IO/unstructured-ingest
 Author: Unstructured Technologies
@@ -22,13 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0,<3.13
 Description-Content-Type: text/markdown
 License-File: LICENSE.md
-Requires-Dist: dataclasses-json
-Requires-Dist: pydantic>=2.7
 Requires-Dist: pandas
+Requires-Dist: dataclasses-json
 Requires-Dist: tqdm
-Requires-Dist: python-dateutil
-Requires-Dist: click
 Requires-Dist: opentelemetry-sdk
+Requires-Dist: pydantic>=2.7
+Requires-Dist: click
+Requires-Dist: python-dateutil
 Provides-Extra: airtable
 Requires-Dist: pyairtable; extra == "airtable"
 Provides-Extra: astradb
@@ -51,8 +51,8 @@ Requires-Dist: chromadb; extra == "chroma"
 Provides-Extra: clarifai
 Requires-Dist: clarifai; extra == "clarifai"
 Provides-Extra: confluence
-Requires-Dist: atlassian-python-api; extra == "confluence"
 Requires-Dist: requests; extra == "confluence"
+Requires-Dist: atlassian-python-api; extra == "confluence"
 Provides-Extra: couchbase
 Requires-Dist: couchbase; extra == "couchbase"
 Provides-Extra: csv
@@ -60,8 +60,8 @@ Requires-Dist: unstructured[tsv]; extra == "csv"
 Provides-Extra: databricks-volumes
 Requires-Dist: databricks-sdk; extra == "databricks-volumes"
 Provides-Extra: delta-table
-Requires-Dist: boto3; extra == "delta-table"
 Requires-Dist: deltalake; extra == "delta-table"
+Requires-Dist: boto3; extra == "delta-table"
 Provides-Extra: discord
 Requires-Dist: discord-py; extra == "discord"
 Provides-Extra: doc
@@ -78,8 +78,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
 Provides-Extra: embed-mixedbreadai
 Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
 Provides-Extra: embed-octoai
-Requires-Dist: openai; extra == "embed-octoai"
 Requires-Dist: tiktoken; extra == "embed-octoai"
+Requires-Dist: openai; extra == "embed-octoai"
 Provides-Extra: embed-vertexai
 Requires-Dist: vertexai; extra == "embed-vertexai"
 Provides-Extra: embed-voyageai
@@ -88,8 +88,8 @@ Provides-Extra: epub
 Requires-Dist: unstructured[epub]; extra == "epub"
 Provides-Extra: gcs
 Requires-Dist: bs4; extra == "gcs"
-Requires-Dist: gcsfs; extra == "gcs"
 Requires-Dist: fsspec; extra == "gcs"
+Requires-Dist: gcsfs; extra == "gcs"
 Provides-Extra: github
 Requires-Dist: pygithub>1.58.0; extra == "github"
 Requires-Dist: requests; extra == "github"
@@ -117,26 +117,26 @@ Requires-Dist: pymongo; extra == "mongodb"
 Provides-Extra: msg
 Requires-Dist: unstructured[msg]; extra == "msg"
 Provides-Extra: notion
+Requires-Dist: notion-client; extra == "notion"
 Requires-Dist: backoff; extra == "notion"
 Requires-Dist: htmlBuilder; extra == "notion"
-Requires-Dist: notion-client; extra == "notion"
 Requires-Dist: httpx; extra == "notion"
 Provides-Extra: odt
 Requires-Dist: unstructured[odt]; extra == "odt"
 Provides-Extra: onedrive
-Requires-Dist: msal; extra == "onedrive"
-Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
 Requires-Dist: bs4; extra == "onedrive"
+Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
+Requires-Dist: msal; extra == "onedrive"
 Provides-Extra: openai
-Requires-Dist: openai; extra == "openai"
 Requires-Dist: tiktoken; extra == "openai"
+Requires-Dist: openai; extra == "openai"
 Provides-Extra: opensearch
 Requires-Dist: opensearch-py; extra == "opensearch"
 Provides-Extra: org
 Requires-Dist: unstructured[org]; extra == "org"
 Provides-Extra: outlook
-Requires-Dist: msal; extra == "outlook"
 Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
+Requires-Dist: msal; extra == "outlook"
 Provides-Extra: pdf
 Requires-Dist: unstructured[pdf]; extra == "pdf"
 Provides-Extra: pinecone
@@ -158,16 +158,16 @@ Requires-Dist: unstructured[rst]; extra == "rst"
 Provides-Extra: rtf
 Requires-Dist: unstructured[rtf]; extra == "rtf"
 Provides-Extra: s3
-Requires-Dist: fsspec; extra == "s3"
 Requires-Dist: s3fs; extra == "s3"
+Requires-Dist: fsspec; extra == "s3"
 Provides-Extra: salesforce
 Requires-Dist: simple-salesforce; extra == "salesforce"
 Provides-Extra: sftp
 Requires-Dist: paramiko; extra == "sftp"
 Requires-Dist: fsspec; extra == "sftp"
 Provides-Extra: sharepoint
-Requires-Dist: msal; extra == "sharepoint"
 Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
+Requires-Dist: msal; extra == "sharepoint"
 Provides-Extra: singlestore
 Requires-Dist: singlestoredb; extra == "singlestore"
 Provides-Extra: slack

{unstructured_ingest-0.3.3.dist-info → unstructured_ingest-0.3.5.dist-info}/RECORD RENAMED Viewed

@@ -10,11 +10,11 @@ test/integration/connectors/test_azure_ai_search.py,sha256=dae4GifRiKue5YpsxworD
 test/integration/connectors/test_confluence.py,sha256=xcPmZ_vi_pkCt-tUPn10P49FH9i_9YUbrAPO6fYk5rU,3521
 test/integration/connectors/test_delta_table.py,sha256=GSzWIkbEUzOrRPt2F1uO0dabcp7kTFDj75BhhI2y-WU,6856
 test/integration/connectors/test_kafka.py,sha256=j7jsNWZumNBv9v-5Bpx8geUUXpxxad5EuA4CMRsl4R8,7104
-test/integration/connectors/test_lancedb.py,sha256=8hRlqw3zYOcFCu6PPlejquSvvEM_3OEBzKTQbNm_Zmg,7635
+test/integration/connectors/test_lancedb.py,sha256=U2HfIrf6iJ7lYMn-vz0j-LesVyDY-jc9QrQhlJVhG9Q,9183
 test/integration/connectors/test_milvus.py,sha256=p4UujDr_tsRaQDmhDmDZp38t8oSFm7hrTqiq6NNuhGo,5933
 test/integration/connectors/test_mongodb.py,sha256=YeS_DUnVYN02F76j87W8RhXGHnJMzQYb3n-L1-oWGXI,12254
 test/integration/connectors/test_onedrive.py,sha256=KIkBwKh1hnv203VCL2UABnDkS_bP4NxOFm1AL8EPGLA,3554
-test/integration/connectors/test_pinecone.py,sha256=X10OWZ6IrO6YyhuR3ydMAZOQq3u2f5u_lCjKNYUUcnI,7558
+test/integration/connectors/test_pinecone.py,sha256=i-v5WkAI9M6SUZI7ch9qdILlRHopAdptpkSY12-BaTk,9483
 test/integration/connectors/test_qdrant.py,sha256=ASvO-BNyhv8m8or28KljrJy27Da0uaTNeoR5w_QsvFg,5121
 test/integration/connectors/test_s3.py,sha256=YHEYMqWTKTfR7wlL4VoxtgMs1YiYKyhLIBdG-anaQGo,6896
 test/integration/connectors/databricks_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -39,6 +39,7 @@ test/integration/connectors/weaviate/test_cloud.py,sha256=07VxNRxWWcgTstFfpoZ1Fl
 test/integration/connectors/weaviate/test_local.py,sha256=SK6iEwQUKiCd0X99BEk8GlQoLaCcJcFPt09NN526Ct0,4508
 test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
+test/integration/embedders/test_azure_openai.py,sha256=6tFpKFBFRXD49imhhRzsvy3MPtuZ4L1PtnKyMVBRAqc,1808
 test/integration/embedders/test_bedrock.py,sha256=0oBRNS_DtFDGQ22Z1T3t6VOJ31PrItgvnJpqcLe9Fg4,1903
 test/integration/embedders/test_huggingface.py,sha256=0mMTOO-Nh7KB70AGs_7LLQIxMYrnSPqyihriUeqACbM,1007
 test/integration/embedders/test_mixedbread.py,sha256=RrLv8SByMNXsgrlh94RbaT-VyxZ4-DILO-OPpmOwvSI,1441
@@ -82,7 +83,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
 test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
 unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
-unstructured_ingest/__version__.py,sha256=p5uBTX3-kWJF_Qc2XUwgA0BcGSwYkkJo-kqLi89Vqo4,42
+unstructured_ingest/__version__.py,sha256=70Yw9e-njzEFR9kr-pzp5J1EslWrJuu4TCVbxa-fdmM,42
 unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
 unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
 unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -250,6 +251,7 @@ unstructured_ingest/connector/notion/types/database_properties/unique_id.py,sha2
 unstructured_ingest/connector/notion/types/database_properties/url.py,sha256=iXQ2tVUm9UlKVtDA0NQiFIRJ5PHYW9wOaWt2vFfSVCg,862
 unstructured_ingest/connector/notion/types/database_properties/verification.py,sha256=J_DLjY-v2T6xDGMQ7FkI0YMKMA6SG6Y3yYW7qUD1hKA,2334
 unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+unstructured_ingest/embed/azure_openai.py,sha256=4YBOIxv66wVZ5EqNNC4uCDPNJ3VrsLPe5wwagT6zqe0,1001
 unstructured_ingest/embed/bedrock.py,sha256=-PRdZsF44vwi6G4G75gdO31AJKfZWClOXkJQAk7rEO8,3096
 unstructured_ingest/embed/huggingface.py,sha256=2cBiQhOhfWHX3hS-eKjocysOkUaRlyRfUj9Kxjrp6cE,1934
 unstructured_ingest/embed/interfaces.py,sha256=au4Xp8ciDvo4bidlUbazFW2aC7NZW5-UDLKXBFVzAX4,2025
@@ -389,7 +391,7 @@ unstructured_ingest/v2/pipeline/steps/upload.py,sha256=zlgXgwReX9TBOdfTpS9hETah4
 unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
 unstructured_ingest/v2/processes/chunker.py,sha256=31-7ojsM2coIt2rMR0KOb82IxLVJfNHbqYUOsDkhxN8,5491
 unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
-unstructured_ingest/v2/processes/embedder.py,sha256=PQn0IO8xbGRQHpcT2VVl-J8gTJ5HGGEP9gdEAwMVK3U,6498
+unstructured_ingest/v2/processes/embedder.py,sha256=xCBpaL07WnVUOUW8SHktaf1vwBGZxl3Nf8-99509ClQ,7721
 unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
 unstructured_ingest/v2/processes/partitioner.py,sha256=agpHwB9FR8OZVQqE7zFEb0IcDPCOPA_BZjLzLF71nOY,8194
 unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
@@ -399,7 +401,7 @@ unstructured_ingest/v2/processes/connectors/astradb.py,sha256=QTUQ-cv_iZi9eaXRRH
 unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=-6IijSWGqj-85vD0c4l5wdMHp-LF371jO8j53PPRB4I,12002
 unstructured_ingest/v2/processes/connectors/chroma.py,sha256=skrxRPHZ8y3JxNa0dt5SVitHiDQ5WVxLvY_kh2-QUrQ,8029
 unstructured_ingest/v2/processes/connectors/confluence.py,sha256=qQApDcmPBGg4tHXwSOj4JPkAbrO9GQ4NRlaETjhp25U,7003
-unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=yhMDbpkZXs-Kis7tFlgjvNemU-MdWMdpCZDrpZNFaU4,12180
+unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=LbUJLt6fqaNYSmy9vUiovG-UOALMcvh8OD-gZAaf-f4,12333
 unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=1yS7ivEyiucwd_kv6LL5HQdGabT43yeG6XCdwiz89hc,8019
 unstructured_ingest/v2/processes/connectors/gitlab.py,sha256=yBgCeLy9iCVI8bBDcHHuHB0H3BO05e9E1OccbHwvKAo,9724
 unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=EEwXK1Anlu-eXl5qxmdDIqPYW7eMSez6WGlTPG2vSn8,13121
@@ -409,7 +411,7 @@ unstructured_ingest/v2/processes/connectors/milvus.py,sha256=3sV0Yv2vYMLyxszKCqA
 unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=XLuprTCY0D9tAh_qn81MjJrDN9YaNqMlKe7BJl3eTZc,14998
 unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=heZMtOIrCySi552ldIk8iH0pSRXZ0W2LeD-CcNOwCFQ,15979
 unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
-unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=hWkXgVDAzCtrBxf7A4HoexBACGAfVf_Qvn9YHbeiBSY,11505
+unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=-J6QPJv_jmjln8cTUsfEEAyd_hi_fmD-uwB6C84rA4w,11930
 unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
 unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=Ndn2Wm7RupfjAtlLxxQwJueeE0V8aGMbNVPuFq9nqdQ,19730
 unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
@@ -421,7 +423,7 @@ unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=P
 unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=UUotY_-HpgSEJkvdQfZTlbxY7CRLZ4ctL8TlryeFvxk,2790
 unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=Wk7s2_u5G0BOV5slvGc8IlUf7ivznY9PrgPqe6nlJKM,2897
 unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py,sha256=Zzc0JNPP-eFqpwWw1Gp-XC8H-s__IgkYKzoagECycZY,829
-unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=lzbrQ66zz3Dh_G29XFkyzQ84St8H_xfQVsYV4mTf32c,19141
+unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=sI58uypWr1mpSl4bxr46nIfypGZ4aqryCT83qqCVnSM,18921
 unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=qRz8Fyr2RSZIPZGkhPeme6AZxM0aX-c_xOa1ZtSr2Kg,6781
 unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
 unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=Y01BuVRql0Kvzc_cdaZE9dDGYjJzrwJu-etfUrEGcUU,7061
@@ -441,7 +443,7 @@ unstructured_ingest/v2/processes/connectors/lancedb/aws.py,sha256=eeXWsh8UeVm1Ur
 unstructured_ingest/v2/processes/connectors/lancedb/azure.py,sha256=Ms5vQVRIpTF1Q2qBl_bET9wbgaf4diPaH-iR8kJlr4E,1461
 unstructured_ingest/v2/processes/connectors/lancedb/cloud.py,sha256=BFy0gW2OZ_qaZJM97m-tNsFaJPi9zOKrrd2y4thcNP0,1341
 unstructured_ingest/v2/processes/connectors/lancedb/gcp.py,sha256=p5BPaFtS3y3Yh8PIr3tUqsAXrUYu4QYYAWQNh5W2ucE,1361
-unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py,sha256=7WIShs2V3dpN6wUhDTt1j2rvdiPp6yopbh7XYkb9T3s,5129
+unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py,sha256=7FODnesYu8cFx1PeQJZxXij-8Dei4Kk3Bs0oxoUGBtI,5745
 unstructured_ingest/v2/processes/connectors/lancedb/local.py,sha256=_7-6iO6B60gAWwJUUrmlsRzYMFIBeZgu_QT3mhw5L0I,1272
 unstructured_ingest/v2/processes/connectors/qdrant/__init__.py,sha256=xM19uYzAuGizVoZIM_hnVZ5AcBN69aOBGpqZcpWPtuE,760
 unstructured_ingest/v2/processes/connectors/qdrant/cloud.py,sha256=accJ4sNWBVWV-KiVBDBDBYYx5A9CUoikP5NCErRmfik,1624
@@ -454,14 +456,14 @@ unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=YrmhAL1RQ1
 unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=jl524VudwmFK63emCT7DmZan_EWJAMiGir5_zoO9FuY,5697
 unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=LFzGeAUagLknK07DsXg2oSG7ZAgR6VqT9wfI_tYlHUg,14782
 unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=9605K36nQ5-gBxzt1daYKYotON1SE85RETusqCJrbdk,5230
-unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=eXamSnQdzzMvt62z80B8nmlkwDKO-Pogln_K_zLz53A,1067
+unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWan69KnzVELvaqX34tMhCytIa-C8EDsXVKsEo,856
 unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
 unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
 unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
-unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=ln1p9ahFTaT-qsL7p4bgw_IqnU60As_l6vVAqUWyQVE,11655
-unstructured_ingest-0.3.3.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
-unstructured_ingest-0.3.3.dist-info/METADATA,sha256=AEumzINrBNXXeBEBQiIB8309_9OkIWhLeo7Giqzl1ew,7393
-unstructured_ingest-0.3.3.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-unstructured_ingest-0.3.3.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
-unstructured_ingest-0.3.3.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
-unstructured_ingest-0.3.3.dist-info/RECORD,,
+unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=dBDC_M8GVKupl7i9UMRCZyRIUv6gTkq8bJE_SILydAc,11291
+unstructured_ingest-0.3.5.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
+unstructured_ingest-0.3.5.dist-info/METADATA,sha256=6lMRXK_RZho8cMblH299fqDfZix6a9843VGiPvhnDV8,7393
+unstructured_ingest-0.3.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+unstructured_ingest-0.3.5.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
+unstructured_ingest-0.3.5.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
+unstructured_ingest-0.3.5.dist-info/RECORD,,

{unstructured_ingest-0.3.3.dist-info → unstructured_ingest-0.3.5.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{unstructured_ingest-0.3.3.dist-info → unstructured_ingest-0.3.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{unstructured_ingest-0.3.3.dist-info → unstructured_ingest-0.3.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{unstructured_ingest-0.3.3.dist-info → unstructured_ingest-0.3.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

unstructured-ingest 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl