PyPI - unstructured-ingest - Versions diffs - 0.4.7__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

unstructured-ingest 0.4.7py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (29) hide show

test/integration/connectors/sql/test_vastdb.py ADDED Viewed

@@ -0,0 +1,34 @@
+from pathlib import Path
+import pytest
+from _pytest.fixtures import TopRequest
+from test.integration.connectors.utils.constants import DESTINATION_TAG, SQL_TAG
+from test.integration.connectors.utils.validation.destination import (
+    StagerValidationConfigs,
+    stager_validation,
+)
+from unstructured_ingest.v2.processes.connectors.sql.vastdb import (
+    CONNECTOR_TYPE,
+    VastdbUploadStager,
+    VastdbUploadStagerConfig,
+)
+@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, SQL_TAG)
+@pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
+def test_vast_stager(
+    request: TopRequest,
+    upload_file_str: str,
+    tmp_path: Path,
+):
+    upload_file: Path = request.getfixturevalue(upload_file_str)
+    stager = VastdbUploadStager(
+        upload_stager_config=VastdbUploadStagerConfig(rename_columns_map={"page_number": "page"})
+    )
+    stager_validation(
+        configs=StagerValidationConfigs(test_id=CONNECTOR_TYPE, expected_count=22),
+        input_file=upload_file,
+        stager=stager,
+        tmp_dir=tmp_path,
+    )

test/integration/connectors/test_google_drive.py ADDED Viewed

@@ -0,0 +1,116 @@
+import os
+import pytest
+from test.integration.connectors.utils.constants import (
+    SOURCE_TAG,
+    UNCATEGORIZED_TAG,
+)
+from test.integration.connectors.utils.validation.source import (
+    SourceValidationConfigs,
+    get_all_file_data,
+    run_all_validations,
+    update_fixtures,
+)
+from test.integration.utils import requires_env
+from unstructured_ingest.v2.interfaces import Downloader, Indexer
+from unstructured_ingest.v2.processes.connectors.google_drive import (
+    CONNECTOR_TYPE,
+    GoogleDriveAccessConfig,
+    GoogleDriveConnectionConfig,
+    GoogleDriveDownloader,
+    GoogleDriveDownloaderConfig,
+    GoogleDriveIndexer,
+    GoogleDriveIndexerConfig,
+)
+@requires_env("GOOGLE_DRIVE_SERVICE_KEY")
+@pytest.mark.tags(SOURCE_TAG, CONNECTOR_TYPE)
+def test_google_drive_source(temp_dir):
+    # Retrieve environment variables
+    service_account_key = os.environ["GOOGLE_DRIVE_SERVICE_KEY"]
+    # Create connection and indexer configurations
+    access_config = GoogleDriveAccessConfig(service_account_key=service_account_key)
+    connection_config = GoogleDriveConnectionConfig(
+        drive_id="1XidSOO76VpZ4m0i3gJN2m1X0Obol3UAi",
+        access_config=access_config,
+    )
+    index_config = GoogleDriveIndexerConfig(recursive=True)
+    download_config = GoogleDriveDownloaderConfig(download_dir=temp_dir)
+    # Instantiate indexer and downloader
+    indexer = GoogleDriveIndexer(
+        connection_config=connection_config,
+        index_config=index_config,
+    )
+    downloader = GoogleDriveDownloader(
+        connection_config=connection_config,
+        download_config=download_config,
+    )
+    # Run the source connector validation
+    source_connector_validation(
+        indexer=indexer,
+        downloader=downloader,
+        configs=SourceValidationConfigs(
+            test_id="google_drive_source",
+            expected_num_files=1,
+            validate_downloaded_files=True,
+        ),
+    )
+@pytest.mark.tags(SOURCE_TAG, CONNECTOR_TYPE, UNCATEGORIZED_TAG)
+def source_connector_validation(
+    indexer: Indexer,
+    downloader: Downloader,
+    configs: SourceValidationConfigs,
+    overwrite_fixtures: bool = os.getenv("OVERWRITE_FIXTURES", "False").lower() == "true",
+) -> None:
+    # Run common validations on the process of running a source connector, supporting dynamic
+    # validators that get passed in along with comparisons on the saved expected values.
+    # If overwrite_fixtures is st to True, will ignore all validators but instead overwrite the
+    # expected values with what gets generated by this test.
+    all_predownload_file_data = []
+    all_postdownload_file_data = []
+    indexer.precheck()
+    download_dir = downloader.download_config.download_dir
+    test_output_dir = configs.test_output_dir()
+    for file_data in indexer.run():
+        assert file_data
+        predownload_file_data = file_data.model_copy(deep=True)
+        all_predownload_file_data.append(predownload_file_data)
+        resp = downloader.run(file_data=file_data)
+        if isinstance(resp, list):
+            for r in resp:
+                postdownload_file_data = r["file_data"].model_copy(deep=True)
+                all_postdownload_file_data.append(postdownload_file_data)
+        else:
+            postdownload_file_data = resp["file_data"].model_copy(deep=True)
+            all_postdownload_file_data.append(postdownload_file_data)
+    if not overwrite_fixtures:
+        print("Running validation")
+        run_all_validations(
+            configs=configs,
+            predownload_file_data=all_predownload_file_data,
+            postdownload_file_data=all_postdownload_file_data,
+            download_dir=download_dir,
+            test_output_dir=test_output_dir,
+        )
+    else:
+        print("Running fixtures update")
+        update_fixtures(
+            output_dir=test_output_dir,
+            download_dir=download_dir,
+            all_file_data=get_all_file_data(
+                all_predownload_file_data=all_predownload_file_data,
+                all_postdownload_file_data=all_postdownload_file_data,
+            ),
+            save_downloads=configs.validate_downloaded_files,
+            save_filedata=configs.validate_file_data,
+        )

test/integration/connectors/test_onedrive.py CHANGED Viewed

@@ -5,13 +5,25 @@ from pathlib import Path
 import pytest
 from office365.graph_client import GraphClient
-from test.integration.connectors.utils.constants import BLOB_STORAGE_TAG, DESTINATION_TAG
+from test.integration.connectors.utils.constants import (
+    BLOB_STORAGE_TAG,
+    DESTINATION_TAG,
+    SOURCE_TAG,
+)
+from test.integration.connectors.utils.validation.source import (
+    SourceValidationConfigs,
+    source_connector_validation,
+)
 from test.integration.utils import requires_env
 from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
 from unstructured_ingest.v2.processes.connectors.onedrive import (
     CONNECTOR_TYPE,
     OnedriveAccessConfig,
     OnedriveConnectionConfig,
+    OnedriveDownloader,
+    OnedriveDownloaderConfig,
+    OnedriveIndexer,
+    OnedriveIndexerConfig,
     OnedriveUploader,
     OnedriveUploaderConfig,
 )
@@ -62,9 +74,46 @@ def get_connection_config():
     return connection_config
+@pytest.mark.asyncio
+@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
+@requires_env("MS_CLIENT_CRED", "MS_CLIENT_ID", "MS_TENANT_ID", "MS_USER_PNAME")
+async def test_onedrive_source(temp_dir):
+    connection_config = get_connection_config()
+    index_config = OnedriveIndexerConfig(recursive=True, path="eml")
+    download_config = OnedriveDownloaderConfig(download_dir=temp_dir)
+    # Instantiate indexer and downloader
+    indexer = OnedriveIndexer(
+        connection_config=connection_config,
+        index_config=index_config,
+    )
+    downloader = OnedriveDownloader(
+        connection_config=connection_config,
+        download_config=download_config,
+    )
+    # Run the source connector validation
+    await source_connector_validation(
+        indexer=indexer,
+        downloader=downloader,
+        configs=SourceValidationConfigs(
+            test_id="onedrive",
+            expected_num_files=1,
+            validate_downloaded_files=True,
+            exclude_fields_extend=[
+                "metadata.date_created",
+                "metadata.date_modified",
+                "additional_metadata.LastModified",
+                "additional_metadata.@microsoft.graph.downloadUrl",
+            ],
+        ),
+    )
 @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, BLOB_STORAGE_TAG)
 @requires_env("MS_CLIENT_CRED", "MS_CLIENT_ID", "MS_TENANT_ID", "MS_USER_PNAME")
-def test_onedrive_destination(upload_file: Path, onedrive_test_folder: str):
+def xtest_onedrive_destination(upload_file: Path, onedrive_test_folder: str):
     """
     Integration test for the OneDrive destination connector.

test/integration/connectors/test_sharepoint.py ADDED Viewed

@@ -0,0 +1,71 @@
+import os
+import pytest
+from test.integration.connectors.utils.constants import BLOB_STORAGE_TAG, SOURCE_TAG
+from test.integration.connectors.utils.validation.source import (
+    SourceValidationConfigs,
+    source_connector_validation,
+)
+from test.integration.utils import requires_env
+from unstructured_ingest.v2.processes.connectors.sharepoint import (
+    CONNECTOR_TYPE,
+    SharepointAccessConfig,
+    SharepointConnectionConfig,
+    SharepointDownloader,
+    SharepointDownloaderConfig,
+    SharepointIndexer,
+    SharepointIndexerConfig,
+)
+@pytest.mark.asyncio
+@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
+@requires_env("SHAREPOINT_CLIENT_ID", "SHAREPOINT_CRED", "MS_TENANT_ID", "MS_USER_PNAME")
+async def test_sharepoint_source(temp_dir):
+    # Retrieve environment variables
+    site = "https://unstructuredio.sharepoint.com/sites/utic-platform-test-source"
+    client_id = os.environ["SHAREPOINT_CLIENT_ID"]
+    client_cred = os.environ["SHAREPOINT_CRED"]
+    user_pname = os.environ["MS_USER_PNAME"]
+    tenant = os.environ["MS_TENANT_ID"]
+    # Create connection and indexer configurations
+    access_config = SharepointAccessConfig(client_cred=client_cred)
+    connection_config = SharepointConnectionConfig(
+        client_id=client_id,
+        site=site,
+        tenant=tenant,
+        user_pname=user_pname,
+        access_config=access_config,
+    )
+    index_config = SharepointIndexerConfig(recursive=True)
+    download_config = SharepointDownloaderConfig(download_dir=temp_dir)
+    # Instantiate indexer and downloader
+    indexer = SharepointIndexer(
+        connection_config=connection_config,
+        index_config=index_config,
+    )
+    downloader = SharepointDownloader(
+        connection_config=connection_config,
+        download_config=download_config,
+    )
+    # Run the source connector validation
+    await source_connector_validation(
+        indexer=indexer,
+        downloader=downloader,
+        configs=SourceValidationConfigs(
+            test_id="sharepoint",
+            expected_num_files=4,
+            validate_downloaded_files=True,
+            exclude_fields_extend=[
+                "metadata.date_created",
+                "metadata.date_modified",
+                "additional_metadata.LastModified",
+                "additional_metadata.@microsoft.graph.downloadUrl",
+            ],
+        ),
+    )

test/integration/connectors/utils/validation/source.py CHANGED Viewed

@@ -10,6 +10,13 @@ from pydantic import Field
 from test.integration.connectors.utils.validation.utils import ValidationConfig
 from unstructured_ingest.v2.interfaces import Downloader, FileData, Indexer
+NONSTANDARD_METADATA_FIELDS = {
+    "additional_metadata.@microsoft.graph.downloadUrl": [
+        "additional_metadata",
+        "@microsoft.graph.downloadUrl",
+    ]
+}
 class SourceValidationConfigs(ValidationConfig):
     expected_number_indexed_file_data: Optional[int] = None
@@ -26,7 +33,7 @@ class SourceValidationConfigs(ValidationConfig):
     def get_exclude_fields(self) -> list[str]:
         exclude_fields = self.exclude_fields
         exclude_fields.extend(self.exclude_fields_extend)
-        return exclude_fields
+        return list(set(exclude_fields))
     def run_file_data_validation(
         self, predownload_file_data: FileData, postdownload_file_data: FileData
@@ -45,8 +52,13 @@ class SourceValidationConfigs(ValidationConfig):
         exclude_fields = self.get_exclude_fields()
         # Ignore fields that dynamically change every time the tests run
         copied_data = data.copy()
         for exclude_field in exclude_fields:
-            exclude_field_vals = exclude_field.split(".")
+            exclude_field_vals = (
+                NONSTANDARD_METADATA_FIELDS[exclude_field]
+                if exclude_field in NONSTANDARD_METADATA_FIELDS
+                else exclude_field.split(".")
+            )
             if len(exclude_field_vals) == 1:
                 current_val = copied_data
                 drop_field = exclude_field_vals[0]
@@ -261,21 +273,38 @@ async def source_connector_validation(
     indexer.precheck()
     download_dir = downloader.download_config.download_dir
     test_output_dir = configs.test_output_dir()
-    for file_data in indexer.run():
-        assert file_data
-        predownload_file_data = file_data.model_copy(deep=True)
-        all_predownload_file_data.append(predownload_file_data)
-        if downloader.is_async():
-            resp = await downloader.run_async(file_data=file_data)
-        else:
-            resp = downloader.run(file_data=file_data)
-        if isinstance(resp, list):
-            for r in resp:
-                postdownload_file_data = r["file_data"].model_copy(deep=True)
+    if indexer.is_async():
+        async for file_data in indexer.run_async():
+            assert file_data
+            predownload_file_data = file_data.model_copy(deep=True)
+            all_predownload_file_data.append(predownload_file_data)
+            if downloader.is_async():
+                resp = await downloader.run_async(file_data=file_data)
+            else:
+                resp = downloader.run(file_data=file_data)
+            if isinstance(resp, list):
+                for r in resp:
+                    postdownload_file_data = r["file_data"].model_copy(deep=True)
+                    all_postdownload_file_data.append(postdownload_file_data)
+            else:
+                postdownload_file_data = resp["file_data"].model_copy(deep=True)
+                all_postdownload_file_data.append(postdownload_file_data)
+    else:
+        for file_data in indexer.run():
+            assert file_data
+            predownload_file_data = file_data.model_copy(deep=True)
+            all_predownload_file_data.append(predownload_file_data)
+            if downloader.is_async():
+                resp = await downloader.run_async(file_data=file_data)
+            else:
+                resp = downloader.run(file_data=file_data)
+            if isinstance(resp, list):
+                for r in resp:
+                    postdownload_file_data = r["file_data"].model_copy(deep=True)
+                    all_postdownload_file_data.append(postdownload_file_data)
+            else:
+                postdownload_file_data = resp["file_data"].model_copy(deep=True)
                 all_postdownload_file_data.append(postdownload_file_data)
-        else:
-            postdownload_file_data = resp["file_data"].model_copy(deep=True)
-            all_postdownload_file_data.append(postdownload_file_data)
     if not overwrite_fixtures:
         print("Running validation")
         run_all_validations(

test/integration/embedders/test_bedrock.py CHANGED Viewed

@@ -31,7 +31,7 @@ def get_aws_credentials() -> dict:
 def test_bedrock_embedder(embedder_file: Path):
     aws_credentials = get_aws_credentials()
     embedder_config = EmbedderConfig(
-        embedding_provider="aws-bedrock",
+        embedding_provider="bedrock",
         embedding_aws_access_key_id=aws_credentials["aws_access_key_id"],
         embedding_aws_secret_access_key=aws_credentials["aws_secret_access_key"],
     )

test/integration/partitioners/test_partitioner.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import json
 import os
 from pathlib import Path
@@ -15,6 +14,9 @@ all_partition_files = [path for path in assets_dir.iterdir() if path.is_file()]
 non_image_partition_files = [
     path for path in all_partition_files if path.suffix not in [".jpg", ".png", ".tif"]
 ]
+supported_fast_partition_files = [
+    path for path in non_image_partition_files if path.suffix != ".eml"
+]
 image_partition_files = [
     path for path in all_partition_files if path not in non_image_partition_files
 ]
@@ -33,18 +35,13 @@ async def test_partitioner_api_hi_res(partition_file: Path):
     )
     partitioner = Partitioner(config=partitioner_config)
     results = await partitioner.run_async(filename=partition_file)
-    results_dir = int_test_dir / "results"
-    results_dir.mkdir(exist_ok=True)
-    results_path = results_dir / f"{partition_file.name}.json"
-    with results_path.open("w") as f:
-        json.dump(results, f, indent=2)
     assert results
 @pytest.mark.parametrize(
     "partition_file",
-    non_image_partition_files,
-    ids=[path.name for path in non_image_partition_files],
+    supported_fast_partition_files,
+    ids=[path.name for path in supported_fast_partition_files],
 )
 @requires_env("UNSTRUCTURED_API_KEY", "UNSTRUCTURED_API_URL")
 @pytest.mark.asyncio
@@ -68,7 +65,11 @@ async def test_partitioner_api_fast_error(partition_file: Path):
     api_key = os.getenv("UNSTRUCTURED_API_KEY")
     api_url = os.getenv("UNSTRUCTURED_API_URL")
     partitioner_config = PartitionerConfig(
-        strategy="fast", partition_by_api=True, api_key=api_key, partition_endpoint=api_url
+        strategy="fast",
+        partition_by_api=True,
+        api_key=api_key,
+        partition_endpoint=api_url,
+        raise_unsupported_filetype=True,
     )
     partitioner = Partitioner(config=partitioner_config)
     with pytest.raises(UserError):

test/unit/v2/connectors/motherduck/__init__.py ADDED Viewed

File without changes

test/unit/v2/connectors/motherduck/test_base.py ADDED Viewed

@@ -0,0 +1,74 @@
+from pathlib import Path
+import pytest
+from pytest_mock import MockerFixture
+from unstructured_ingest.v2.interfaces import FileData
+from unstructured_ingest.v2.interfaces.file_data import SourceIdentifiers
+from unstructured_ingest.v2.interfaces.upload_stager import UploadStagerConfig
+from unstructured_ingest.v2.processes.connectors.duckdb.base import BaseDuckDBUploadStager
+@pytest.fixture
+def mock_instance() -> BaseDuckDBUploadStager:
+    return BaseDuckDBUploadStager(UploadStagerConfig())
+@pytest.mark.parametrize(
+    ("input_filepath", "output_filename", "expected"),
+    [
+        (
+            "/path/to/input_file.ndjson",
+            "output_file.ndjson",
+            "output_file.ndjson",
+        ),
+        ("input_file.txt", "output_file.json", "output_file.txt"),
+        ("/path/to/input_file.json", "output_file", "output_file.json"),
+    ],
+)
+def test_run_output_filename_suffix(
+    mocker: MockerFixture,
+    mock_instance: BaseDuckDBUploadStager,
+    input_filepath: str,
+    output_filename: str,
+    expected: str,
+):
+    output_dir = Path("/tmp/test/output_dir")
+    # Mocks
+    mock_get_data = mocker.patch(
+        "unstructured_ingest.v2.processes.connectors.duckdb.base.get_data",
+        return_value=[{"key": "value"}, {"key": "value2"}],
+    )
+    mock_conform_dict = mocker.patch.object(
+        BaseDuckDBUploadStager,
+        "conform_dict",
+        side_effect=lambda element_dict, file_data: element_dict,
+    )
+    mock_get_output_path = mocker.patch.object(
+        BaseDuckDBUploadStager, "get_output_path", return_value=output_dir / expected
+    )
+    mock_write_output = mocker.patch(
+        "unstructured_ingest.v2.processes.connectors.duckdb.base.write_data", return_value=None
+    )
+    # Act
+    result = mock_instance.run(
+        elements_filepath=Path(input_filepath),
+        file_data=FileData(
+            identifier="test",
+            connector_type="test",
+            source_identifiers=SourceIdentifiers(filename=input_filepath, fullpath=input_filepath),
+        ),
+        output_dir=output_dir,
+        output_filename=output_filename,
+    )
+    # Assert
+    mock_get_data.assert_called_once_with(path=Path(input_filepath))
+    assert mock_conform_dict.call_count == 2
+    mock_get_output_path.assert_called_once_with(output_filename=expected, output_dir=output_dir)
+    mock_write_output.assert_called_once_with(
+        path=output_dir / expected, data=[{"key": "value"}, {"key": "value2"}]
+    )
+    assert result.name == expected

unstructured_ingest/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.4.7" # pragma: no cover
1	+ __version__ = "0.5.1" # pragma: no cover

unstructured_ingest/cli/interfaces.py CHANGED Viewed

@@ -417,7 +417,7 @@ class CliEmbeddingConfig(EmbeddingConfig, CliMixin):
         embed_providers = [
             "openai",
             "huggingface",
-            "aws-bedrock",
+            "bedrock",
             "vertexai",
             "voyageai",
             "octoai",

unstructured_ingest/interfaces.py CHANGED Viewed

@@ -226,7 +226,7 @@ class EmbeddingConfig(BaseConfig):
             )
             return OctoAIEmbeddingEncoder(config=OctoAiEmbeddingConfig(**kwargs))
-        elif self.provider == "aws-bedrock":
+        elif self.provider == "bedrock":
             from unstructured_ingest.embed.bedrock import (
                 BedrockEmbeddingConfig,
                 BedrockEmbeddingEncoder,

unstructured_ingest/v2/pipeline/pipeline.py CHANGED Viewed

@@ -268,6 +268,7 @@ class Pipeline:
         # Partition content
         elements = self.partitioner_step(downloaded_data)
+        elements = self.clean_results(results=elements)
         # Download data non longer needed, delete if possible
         self.downloader_step.delete_cache()
         elements = self.clean_results(results=elements)

unstructured_ingest/v2/processes/connectors/duckdb/base.py CHANGED Viewed

@@ -81,6 +81,8 @@ class BaseDuckDBUploadStager(UploadStager):
         **kwargs: Any,
     ) -> Path:
         elements_contents = get_data(path=elements_filepath)
+        output_filename_suffix = Path(elements_filepath).suffix
+        output_filename = f"{Path(output_filename).stem}{output_filename_suffix}"
         output_path = self.get_output_path(output_filename=output_filename, output_dir=output_dir)
         output = [

unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py CHANGED Viewed

@@ -61,7 +61,7 @@ class MotherDuckConnectionConfig(ConnectionConfig):
                 "custom_user_agent": f"unstructured-io-ingest/{unstructured_io_ingest_version}"
             },
         ) as conn:
-            conn.sql(f"USE {self.database}")
+            conn.sql(f'USE "{self.database}"')
             yield conn
     @contextmanager
@@ -102,11 +102,12 @@ class MotherDuckUploader(Uploader):
     def upload_dataframe(self, df: pd.DataFrame) -> None:
         logger.debug(f"uploading {len(df)} entries to {self.connection_config.database} ")
+        database = self.connection_config.database
+        db_schema = self.connection_config.db_schema
+        table = self.connection_config.table
         with self.connection_config.get_client() as conn:
-            conn.query(
-                f"INSERT INTO {self.connection_config.db_schema}.{self.connection_config.table} BY NAME SELECT * FROM df"  # noqa: E501
-            )
+            conn.query(f'INSERT INTO "{database}"."{db_schema}"."{table}" BY NAME SELECT * FROM df')
     def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
         df = pd.DataFrame(data=data)

unstructured_ingest/v2/processes/connectors/google_drive.py CHANGED Viewed

@@ -310,20 +310,22 @@ class GoogleDriveDownloader(Downloader):
         from googleapiclient.http import MediaIoBaseDownload
         logger.debug(f"fetching file: {file_data.source_identifiers.fullpath}")
-        mime_type = file_data.additional_metadata["mimeType"]
         record_id = file_data.identifier
+        mime_type = file_data.additional_metadata["mimeType"]
+        if not mime_type:
+            raise TypeError(
+                f"File not supported. Name: {file_data.source_identifiers.filename} "
+                f"ID: {record_id} "
+                f"MimeType: {mime_type}"
+            )
         with self.connection_config.get_client() as client:
-            if mime_type.startswith("application/vnd.google-apps"):
+            if (
+                mime_type.startswith("application/vnd.google-apps")
+                and mime_type in GOOGLE_DRIVE_EXPORT_TYPES
+            ):
                 export_mime = GOOGLE_DRIVE_EXPORT_TYPES.get(
-                    self.meta.get("mimeType"),  # type: ignore
+                    mime_type,  # type: ignore
                 )
-                if not export_mime:
-                    raise TypeError(
-                        f"File not supported. Name: {file_data.source_identifiers.filename} "
-                        f"ID: {record_id} "
-                        f"MimeType: {mime_type}"
-                    )
                 request = client.export_media(
                     fileId=record_id,
                     mimeType=export_mime,

unstructured_ingest/v2/processes/connectors/onedrive.py CHANGED Viewed

@@ -105,6 +105,7 @@ class OnedriveIndexerConfig(IndexerConfig):
 class OnedriveIndexer(Indexer):
     connection_config: OnedriveConnectionConfig
     index_config: OnedriveIndexerConfig
+    connector_type: str = CONNECTOR_TYPE
     def precheck(self) -> None:
         try:
@@ -172,7 +173,7 @@ class OnedriveIndexer(Indexer):
         )
         return FileData(
             identifier=drive_item.id,
-            connector_type=CONNECTOR_TYPE,
+            connector_type=self.connector_type,
             source_identifiers=SourceIdentifiers(
                 fullpath=server_path, filename=drive_item.name, rel_path=rel_path
             ),
@@ -201,7 +202,8 @@ class OnedriveIndexer(Indexer):
         token_resp = await asyncio.to_thread(self.connection_config.get_token)
         if "error" in token_resp:
             raise SourceConnectionError(
-                f"[{CONNECTOR_TYPE}]: {token_resp['error']} ({token_resp.get('error_description')})"
+                f"[{self.connector_type}]: {token_resp['error']} "
+                f"({token_resp.get('error_description')})"
             )
         client = await asyncio.to_thread(self.connection_config.get_client)
@@ -221,6 +223,7 @@ class OnedriveDownloaderConfig(DownloaderConfig):
 class OnedriveDownloader(Downloader):
     connection_config: OnedriveConnectionConfig
     download_config: OnedriveDownloaderConfig
+    connector_type: str = CONNECTOR_TYPE
     @SourceConnectionNetworkError.wrap
     def _fetch_file(self, file_data: FileData) -> DriveItem:
@@ -260,7 +263,9 @@ class OnedriveDownloader(Downloader):
                     file.download_session(f).execute_query()
             return self.generate_download_response(file_data=file_data, download_path=download_path)
         except Exception as e:
-            logger.error(f"[{CONNECTOR_TYPE}] Exception during downloading: {e}", exc_info=True)
+            logger.error(
+                f"[{self.connector_type}] Exception during downloading: {e}", exc_info=True
+            )
             # Re-raise to see full stack trace locally
             raise

unstructured_ingest/v2/processes/connectors/pinecone.py CHANGED Viewed

@@ -81,6 +81,7 @@ ALLOWED_FIELDS = (
     "link_urls",
     "link_texts",
     "text_as_html",
+    "entities",
 )

unstructured_ingest/v2/processes/connectors/sharepoint.py CHANGED Viewed

@@ -56,6 +56,7 @@ class SharepointIndexerConfig(OnedriveIndexerConfig):
 class SharepointIndexer(OnedriveIndexer):
     connection_config: SharepointConnectionConfig
     index_config: SharepointIndexerConfig
+    connector_type: str = CONNECTOR_TYPE
     @requires_dependencies(["office365"], extras="sharepoint")
     async def run_async(self, **kwargs: Any) -> AsyncIterator[FileData]:
@@ -64,7 +65,8 @@ class SharepointIndexer(OnedriveIndexer):
         token_resp = await asyncio.to_thread(self.connection_config.get_token)
         if "error" in token_resp:
             raise SourceConnectionError(
-                f"[{CONNECTOR_TYPE}]: {token_resp['error']} ({token_resp.get('error_description')})"
+                f"[{self.connector_type}]: {token_resp['error']} "
+                f"({token_resp.get('error_description')})"
             )
         client = await asyncio.to_thread(self.connection_config.get_client)
@@ -90,6 +92,7 @@ class SharepointDownloaderConfig(OnedriveDownloaderConfig):
 class SharepointDownloader(OnedriveDownloader):
     connection_config: SharepointConnectionConfig
     download_config: SharepointDownloaderConfig
+    connector_type: str = CONNECTOR_TYPE
     @SourceConnectionNetworkError.wrap
     @requires_dependencies(["office365"], extras="onedrive")

unstructured_ingest/v2/processes/connectors/sql/sql.py CHANGED Viewed

@@ -38,48 +38,6 @@ from unstructured_ingest.v2.interfaces import (
 from unstructured_ingest.v2.logger import logger
 from unstructured_ingest.v2.utils import get_enhanced_element_id
-_COLUMNS = (
-    "id",
-    "element_id",
-    "text",
-    "embeddings",
-    "type",
-    "system",
-    "layout_width",
-    "layout_height",
-    "points",
-    "url",
-    "version",
-    "date_created",
-    "date_modified",
-    "date_processed",
-    "permissions_data",
-    "record_locator",
-    "category_depth",
-    "parent_id",
-    "attached_filename",
-    "filetype",
-    "last_modified",
-    "file_directory",
-    "filename",
-    "languages",
-    "page_number",
-    "links",
-    "page_name",
-    "link_urls",
-    "link_texts",
-    "sent_from",
-    "sent_to",
-    "subject",
-    "section",
-    "header_footer_type",
-    "emphasized_text_contents",
-    "emphasized_text_tags",
-    "text_as_html",
-    "regex_metadata",
-    "detection_class_prob",
-)
 _DATE_COLUMNS = ("date_created", "date_modified", "date_processed", "last_modified")
@@ -270,10 +228,8 @@ class SQLUploadStager(UploadStager):
         data["id"] = get_enhanced_element_id(element_dict=data, file_data=file_data)
-        # remove extraneous, not supported columns
-        element = {k: v for k, v in data.items() if k in _COLUMNS}
-        element[RECORD_ID_LABEL] = file_data.identifier
-        return element
+        data[RECORD_ID_LABEL] = file_data.identifier
+        return data
     def conform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
         for column in filter(lambda x: x in df.columns, _DATE_COLUMNS):
@@ -375,7 +331,7 @@ class SQLUploader(Uploader):
         missing_columns = schema_fields - columns
         if columns_to_drop:
-            logger.warning(
+            logger.info(
                 "Following columns will be dropped to match the table's schema: "
                 f"{', '.join(columns_to_drop)}"
             )

unstructured_ingest/v2/processes/connectors/sql/vastdb.py CHANGED Viewed

@@ -19,7 +19,6 @@ from unstructured_ingest.v2.processes.connector_registry import (
     SourceRegistryEntry,
 )
 from unstructured_ingest.v2.processes.connectors.sql.sql import (
-    _COLUMNS,
     SQLAccessConfig,
     SqlBatchFileData,
     SQLConnectionConfig,
@@ -149,13 +148,11 @@ class VastdbUploadStagerConfig(SQLUploadStagerConfig):
         default=None,
         description="Map of column names to rename, ex: {'old_name': 'new_name'}",
     )
-    additional_columns: Optional[list[str]] = Field(
-        default_factory=list, description="Additional columns to include in the upload"
-    )
+@dataclass
 class VastdbUploadStager(SQLUploadStager):
-    upload_stager_config: VastdbUploadStagerConfig
+    upload_stager_config: VastdbUploadStagerConfig = field(default_factory=VastdbUploadStagerConfig)
     def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
         data = element_dict.copy()
@@ -168,13 +165,8 @@ class VastdbUploadStager(SQLUploadStager):
         data.update(coordinates)
         data["id"] = get_enhanced_element_id(element_dict=data, file_data=file_data)
-        # remove extraneous, not supported columns
-        # but also allow for additional columns
-        approved_columns = set(_COLUMNS).union(self.upload_stager_config.additional_columns)
-        element = {k: v for k, v in data.items() if k in approved_columns}
-        element[RECORD_ID_LABEL] = file_data.identifier
-        return element
+        data[RECORD_ID_LABEL] = file_data.identifier
+        return data
     def conform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
         df = super().conform_dataframe(df=df)

unstructured_ingest/v2/processes/embedder.py CHANGED Viewed

@@ -18,7 +18,7 @@ class EmbedderConfig(BaseModel):
             "openai",
             "azure-openai",
             "huggingface",
-            "aws-bedrock",
+            "bedrock",
             "vertexai",
             "voyageai",
             "octoai",
@@ -162,7 +162,7 @@ class EmbedderConfig(BaseModel):
         if self.embedding_provider == "octoai":
             return self.get_octoai_embedder(embedding_kwargs=kwargs)
-        if self.embedding_provider == "aws-bedrock":
+        if self.embedding_provider == "bedrock":
             return self.get_bedrock_embedder()
         if self.embedding_provider == "vertexai":

unstructured_ingest/v2/processes/partitioner.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 from abc import ABC
 from dataclasses import dataclass
 from pathlib import Path
@@ -7,6 +8,7 @@ from pydantic import BaseModel, Field, SecretStr
 from unstructured_ingest.utils.data_prep import flatten_dict
 from unstructured_ingest.utils.dep_check import requires_dependencies
+from unstructured_ingest.v2.errors import UserError
 from unstructured_ingest.v2.interfaces.process import BaseProcess
 from unstructured_ingest.v2.logger import logger
 from unstructured_ingest.v2.unstructured_api import call_api_async
@@ -73,6 +75,9 @@ class PartitionerConfig(BaseModel):
     hi_res_model_name: Optional[str] = Field(
         default=None, description="Model name for hi-res strategy."
     )
+    raise_unsupported_filetype: bool = Field(
+        default=False, description="Raise an error if the file type is not supported"
+    )
     def model_post_init(self, __context: Any) -> None:
         if self.metadata_exclude and self.metadata_include:
@@ -151,13 +156,25 @@ class Partitioner(BaseProcess, ABC):
         class FileDataSourceMetadata(DataSourceMetadata):
             filesize_bytes: Optional[int] = None
+        metadata = metadata or {}
         logger.debug(f"using local partition with kwargs: {self.config.to_partition_kwargs()}")
         logger.debug(f"partitioning file {filename} with metadata {metadata}")
-        elements = partition(
-            filename=str(filename.resolve()),
-            data_source_metadata=FileDataSourceMetadata.from_dict(metadata),
-            **self.config.to_partition_kwargs(),
-        )
+        try:
+            elements = partition(
+                filename=str(filename.resolve()),
+                data_source_metadata=FileDataSourceMetadata.from_dict(metadata),
+                **self.config.to_partition_kwargs(),
+            )
+        except ValueError as sdk_error:
+            if (
+                self.is_unstructured_error_unsupported_filetype(sdk_error=sdk_error)
+                and not self.config.raise_unsupported_filetype
+            ):
+                logger.warning(
+                    f"Unsupported file type for strategy {self.config.strategy}: {filename}"
+                )
+                return []
+            raise sdk_error
         return self.postprocess(elements=elements_to_dicts(elements))
     @requires_dependencies(dependencies=["unstructured_client"], extras="remote")
@@ -179,10 +196,37 @@ class Partitioner(BaseProcess, ABC):
             element["metadata"]["data_source"] = metadata
         return self.postprocess(elements=elements)
+    def is_unstructured_error_unsupported_filetype(self, sdk_error: ValueError) -> bool:
+        error_msg = sdk_error.args[0]
+        return (
+            "Invalid file" in error_msg
+            or "Unstructured schema" in error_msg
+            or "fast strategy is not available for image files" in error_msg
+        )
+    def is_client_error_unsupported_filetype(self, error: UserError) -> bool:
+        error_msg = error.args[0]
+        error_dict = json.loads(error_msg)
+        details = error_dict["detail"]
+        return "fast strategy is not available for image files" in details or (
+            "file type" in details.lower() and "is not supported" in details.lower()
+        )
     def run(self, filename: Path, metadata: Optional[dict] = None, **kwargs) -> list[dict]:
         return self.partition_locally(filename, metadata=metadata, **kwargs)
     async def run_async(
         self, filename: Path, metadata: Optional[dict] = None, **kwargs
     ) -> list[dict]:
-        return await self.partition_via_api(filename, metadata=metadata, **kwargs)
+        try:
+            return await self.partition_via_api(filename, metadata=metadata, **kwargs)
+        except UserError as user_error:
+            if (
+                self.is_client_error_unsupported_filetype(error=user_error)
+                and not self.config.raise_unsupported_filetype
+            ):
+                logger.warning(
+                    f"Unsupported file type for strategy {self.config.strategy}: {filename}"
+                )
+                return []
+            raise user_error

{unstructured_ingest-0.4.7.dist-info → unstructured_ingest-0.5.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: unstructured-ingest
-Version: 0.4.7
+Version: 0.5.1
 Summary: A library that prepares raw documents for downstream ML tasks.
 Home-page: https://github.com/Unstructured-IO/unstructured-ingest
 Author: Unstructured Technologies
@@ -22,38 +22,38 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0,<3.14
 Description-Content-Type: text/markdown
 License-File: LICENSE.md
-Requires-Dist: pandas
-Requires-Dist: pydantic>=2.7
 Requires-Dist: dataclasses-json
-Requires-Dist: python-dateutil
+Requires-Dist: click
 Requires-Dist: opentelemetry-sdk
+Requires-Dist: pydantic>=2.7
+Requires-Dist: python-dateutil
+Requires-Dist: pandas
 Requires-Dist: tqdm
-Requires-Dist: click
 Provides-Extra: airtable
 Requires-Dist: pyairtable; extra == "airtable"
 Provides-Extra: astradb
 Requires-Dist: astrapy; extra == "astradb"
 Provides-Extra: azure
-Requires-Dist: fsspec; extra == "azure"
 Requires-Dist: adlfs; extra == "azure"
+Requires-Dist: fsspec; extra == "azure"
 Provides-Extra: azure-ai-search
 Requires-Dist: azure-search-documents; extra == "azure-ai-search"
 Provides-Extra: bedrock
-Requires-Dist: boto3; extra == "bedrock"
 Requires-Dist: aioboto3; extra == "bedrock"
+Requires-Dist: boto3; extra == "bedrock"
 Provides-Extra: biomed
 Requires-Dist: requests; extra == "biomed"
 Requires-Dist: bs4; extra == "biomed"
 Provides-Extra: box
-Requires-Dist: fsspec; extra == "box"
 Requires-Dist: boxfs; extra == "box"
+Requires-Dist: fsspec; extra == "box"
 Provides-Extra: chroma
 Requires-Dist: chromadb; extra == "chroma"
 Provides-Extra: clarifai
 Requires-Dist: clarifai; extra == "clarifai"
 Provides-Extra: confluence
-Requires-Dist: requests; extra == "confluence"
 Requires-Dist: atlassian-python-api; extra == "confluence"
+Requires-Dist: requests; extra == "confluence"
 Provides-Extra: couchbase
 Requires-Dist: couchbase; extra == "couchbase"
 Provides-Extra: csv
@@ -83,8 +83,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
 Provides-Extra: embed-mixedbreadai
 Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
 Provides-Extra: embed-octoai
-Requires-Dist: tiktoken; extra == "embed-octoai"
 Requires-Dist: openai; extra == "embed-octoai"
+Requires-Dist: tiktoken; extra == "embed-octoai"
 Provides-Extra: embed-vertexai
 Requires-Dist: vertexai; extra == "embed-vertexai"
 Provides-Extra: embed-voyageai
@@ -92,9 +92,9 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
 Provides-Extra: epub
 Requires-Dist: unstructured[epub]; extra == "epub"
 Provides-Extra: gcs
-Requires-Dist: fsspec; extra == "gcs"
-Requires-Dist: bs4; extra == "gcs"
 Requires-Dist: gcsfs; extra == "gcs"
+Requires-Dist: bs4; extra == "gcs"
+Requires-Dist: fsspec; extra == "gcs"
 Provides-Extra: github
 Requires-Dist: requests; extra == "github"
 Requires-Dist: pygithub>1.58.0; extra == "github"
@@ -103,8 +103,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
 Provides-Extra: google-drive
 Requires-Dist: google-api-python-client; extra == "google-drive"
 Provides-Extra: hubspot
-Requires-Dist: urllib3; extra == "hubspot"
 Requires-Dist: hubspot-api-client; extra == "hubspot"
+Requires-Dist: urllib3; extra == "hubspot"
 Provides-Extra: jira
 Requires-Dist: atlassian-python-api; extra == "jira"
 Provides-Extra: kafka
@@ -122,30 +122,30 @@ Requires-Dist: pymongo; extra == "mongodb"
 Provides-Extra: msg
 Requires-Dist: unstructured[msg]; extra == "msg"
 Provides-Extra: neo4j
-Requires-Dist: networkx; extra == "neo4j"
-Requires-Dist: cymple; extra == "neo4j"
 Requires-Dist: neo4j; extra == "neo4j"
+Requires-Dist: cymple; extra == "neo4j"
+Requires-Dist: networkx; extra == "neo4j"
 Provides-Extra: notion
-Requires-Dist: httpx; extra == "notion"
+Requires-Dist: backoff; extra == "notion"
 Requires-Dist: htmlBuilder; extra == "notion"
+Requires-Dist: httpx; extra == "notion"
 Requires-Dist: notion-client; extra == "notion"
-Requires-Dist: backoff; extra == "notion"
 Provides-Extra: odt
 Requires-Dist: unstructured[odt]; extra == "odt"
 Provides-Extra: onedrive
-Requires-Dist: msal; extra == "onedrive"
 Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
+Requires-Dist: msal; extra == "onedrive"
 Requires-Dist: bs4; extra == "onedrive"
 Provides-Extra: openai
-Requires-Dist: tiktoken; extra == "openai"
 Requires-Dist: openai; extra == "openai"
+Requires-Dist: tiktoken; extra == "openai"
 Provides-Extra: opensearch
 Requires-Dist: opensearch-py; extra == "opensearch"
 Provides-Extra: org
 Requires-Dist: unstructured[org]; extra == "org"
 Provides-Extra: outlook
-Requires-Dist: msal; extra == "outlook"
 Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
+Requires-Dist: msal; extra == "outlook"
 Provides-Extra: pdf
 Requires-Dist: unstructured[pdf]; extra == "pdf"
 Provides-Extra: pinecone
@@ -177,8 +177,8 @@ Provides-Extra: sftp
 Requires-Dist: paramiko; extra == "sftp"
 Requires-Dist: fsspec; extra == "sftp"
 Provides-Extra: sharepoint
-Requires-Dist: msal; extra == "sharepoint"
 Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
+Requires-Dist: msal; extra == "sharepoint"
 Provides-Extra: singlestore
 Requires-Dist: singlestoredb; extra == "singlestore"
 Provides-Extra: slack
@@ -191,13 +191,13 @@ Requires-Dist: together; extra == "togetherai"
 Provides-Extra: tsv
 Requires-Dist: unstructured[tsv]; extra == "tsv"
 Provides-Extra: vastdb
-Requires-Dist: vastdb; extra == "vastdb"
 Requires-Dist: pyarrow; extra == "vastdb"
+Requires-Dist: vastdb; extra == "vastdb"
 Requires-Dist: ibis; extra == "vastdb"
 Provides-Extra: vectara
-Requires-Dist: aiofiles; extra == "vectara"
 Requires-Dist: httpx; extra == "vectara"
 Requires-Dist: requests; extra == "vectara"
+Requires-Dist: aiofiles; extra == "vectara"
 Provides-Extra: weaviate
 Requires-Dist: weaviate-client; extra == "weaviate"
 Provides-Extra: wikipedia

{unstructured_ingest-0.4.7.dist-info → unstructured_ingest-0.5.1.dist-info}/RECORD RENAMED Viewed

@@ -10,16 +10,18 @@ test/integration/connectors/test_azure_ai_search.py,sha256=MxFwk84vI_HT4taQTGrNp
 test/integration/connectors/test_chroma.py,sha256=NuQv0PWPM0_LQfdPeUd6IYKqaKKXWmVaHGWjq5aBfOY,3721
 test/integration/connectors/test_confluence.py,sha256=Ju0gRQbD2g9l9iRf2HDZKi7RyPnBGtFRWcGpsqhO3F8,3588
 test/integration/connectors/test_delta_table.py,sha256=4qm2Arfc9Eb7SOZOnOlLF-vNpHy6Eqvr5Q45svfX1PY,6911
+test/integration/connectors/test_google_drive.py,sha256=0zJZ4UJOq4TkfU-bkc556_abV7q6zVS9ZgIvW9qcTU4,4204
 test/integration/connectors/test_lancedb.py,sha256=8MBxK_CUtOt87-4B7svDDK82NFII5psceo5cNN8HJMs,9228
 test/integration/connectors/test_milvus.py,sha256=7mI6zznN0PTxDL9DLogH1k3dxx6R8DgGzlpyevsFu2w,7173
 test/integration/connectors/test_mongodb.py,sha256=0A6DvF-iTCSZzOefisd_i20j9li8uNWTF2wyLGwlhco,12446
 test/integration/connectors/test_neo4j.py,sha256=r4TRYtTXeeOdcRcfa_gvslhSKvoIWrwN1FRJ5XRoH4k,8456
 test/integration/connectors/test_notion.py,sha256=ueXyVqYWzP4LuZYe6PauptkXNG6qkoV3srltFOSSKTA,5403
-test/integration/connectors/test_onedrive.py,sha256=rjgN2LhaW1htEMBJPxmlP_kcRB7p_oOeZcogFlHyJH4,3721
+test/integration/connectors/test_onedrive.py,sha256=iwiDK0kWCfQbIEPnWUzzAA5PiCsHcmFZSxEcIZy_6cc,5229
 test/integration/connectors/test_pinecone.py,sha256=acKEu1vnAk0Ht3FhCnGtOEKaj_YlgCzZB7wRU17ehQ0,12407
 test/integration/connectors/test_qdrant.py,sha256=Yme3ZZ5zIbaZ-yYLUqN2oy0hsrcAfvlleRLYWMSYeSE,8049
 test/integration/connectors/test_redis.py,sha256=1aKwOb-K4zCxZwHmgW_WzGJwqLntbWTbpGQ-rtUwN9o,4360
 test/integration/connectors/test_s3.py,sha256=E1dypeag_E3OIfpQWIz3jb7ctRHRD63UtyTrzyvJzpc,7473
+test/integration/connectors/test_sharepoint.py,sha256=8HlcnrP4K8oPUzef6AA11P2cMlxSp7tiddTkT4JOeRU,2378
 test/integration/connectors/test_vectara.py,sha256=4kKOOTGUjeZw2jKRcgVDI7ifbRPRZfjjVO4d_7H5C6I,8710
 test/integration/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/integration/connectors/databricks/test_volumes_native.py,sha256=KqiapQAV0s_Zv0CO8BwYoiCk30dwrSZzuigUWNRIem0,9559
@@ -39,6 +41,7 @@ test/integration/connectors/sql/test_postgres.py,sha256=bGDyzLRpgrXO7nl0U8nF2zSN
 test/integration/connectors/sql/test_singlestore.py,sha256=XeU2s4Kt_3tGyaDYYKTgYjdOyb8j2dnz4TgSMwFUjWs,6153
 test/integration/connectors/sql/test_snowflake.py,sha256=LEwsRDoC6-rRiwYsqeo5B9Eo6RYygLLGAUsrtrgI9pM,7494
 test/integration/connectors/sql/test_sqlite.py,sha256=MHvhFRx1y_LTgfS-aPz-Zn9yOGsm-TF_s0t1seBzV1k,5956
+test/integration/connectors/sql/test_vastdb.py,sha256=66T-o_y7NaDKGmKFkT778AB-nanlLv9KdtgUGPOdnLs,1069
 test/integration/connectors/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/integration/connectors/utils/constants.py,sha256=JhTk6YNw7JVpkk-Pl8zn2YYkExeL1oE9VBWm_kMYGfo,369
 test/integration/connectors/utils/docker.py,sha256=4g1STiSbYN5qcmDTXyPxVJgwx97O6wk7n-DJ-zgzgag,4971
@@ -46,7 +49,7 @@ test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQ
 test/integration/connectors/utils/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/integration/connectors/utils/validation/destination.py,sha256=ZvMSvqz9in35xaoUJGx9rG8oWCU3FYlfLLQ6sfdI0pw,2649
 test/integration/connectors/utils/validation/equality.py,sha256=R6d_1c-Si5518WJcBcshF_wBRnywnZ0ORQ-NL0xNmGo,2602
-test/integration/connectors/utils/validation/source.py,sha256=VALU5ms_JBu_eFkp2WQ7oZtJKozJ8MZSJ7h7ZA3Fz_Q,12296
+test/integration/connectors/utils/validation/source.py,sha256=xnAZI26ILdeMhgrWAGrU2N2fqK58YNGkfyUhJekZ0Ho,13541
 test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_JjB4w2Sf8aQPvDVT5vZTs05ILs,1428
 test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/integration/connectors/weaviate/conftest.py,sha256=6Q6QdrLJmGHowRFSmoVSzup2EX6qASfS2Z5tqlpTm9M,387
@@ -55,7 +58,7 @@ test/integration/connectors/weaviate/test_local.py,sha256=gXMpnzVcrNQdptDjx0haPW
 test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
 test/integration/embedders/test_azure_openai.py,sha256=YQ3uq2-NuxtTyGsSgMNa10pcITLKMJ4E1scTGFgwujw,1790
-test/integration/embedders/test_bedrock.py,sha256=ZehreheLgY9Bqdjk-3MQOaou9IP-H3Pcz7WWiOWAxTU,3557
+test/integration/embedders/test_bedrock.py,sha256=vmjoi1uUk-LX4Yz0ZPn6Ry1JdVEsyIhLhPbSPmkeT9o,3553
 test/integration/embedders/test_huggingface.py,sha256=qFblyXounVNRaNkk3gbKoBqU5E2dNecgKU2Bz2LyOa8,989
 test/integration/embedders/test_mixedbread.py,sha256=lLz_cooyC38VSo-FMHbhKpHvYs3QzA20NOIvM5oooaw,1998
 test/integration/embedders/test_octoai.py,sha256=qs-bqZ7iGWO_BzUZvKJmOHBT3cmFSkEYbleWhj3snJc,2197
@@ -65,7 +68,7 @@ test/integration/embedders/test_vertexai.py,sha256=4-E4plJXFf1b02RhOqOCBHR2GA4gT
 test/integration/embedders/test_voyageai.py,sha256=Gm3sVjhsym1ASIDfr-sZoCbpsNMaAk_l4E3-dtjRCQ4,1832
 test/integration/embedders/utils.py,sha256=Sqqg-X31ZV1hojqPQBaZgM2lb2u8cG6s6OnH9JRsFjs,2717
 test/integration/partitioners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-test/integration/partitioners/test_partitioner.py,sha256=MEQJbRoc01uPLT6O8CkXeQF_DXK21nz3KVJkzkBtsgM,2835
+test/integration/partitioners/test_partitioner.py,sha256=6sdZhhtqEICBPqEgpKrCQIfJ-7hKcwuTFqjWs1mbQf8,2787
 test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
 test/unit/test_html.py,sha256=LKGi_QaH4U4gktrbd2NcURL-d-0Rm1UnG5Y6r9EvTG0,4489
@@ -86,6 +89,8 @@ test/unit/v2/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
 test/unit/v2/connectors/test_confluence.py,sha256=bXrn_kRb4IQdqkk4rc-P2gJAtPba7n7pNplQgfbqZDY,1047
 test/unit/v2/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/unit/v2/connectors/databricks/test_volumes_table.py,sha256=-R_EJHqv1BseGRK9VRAZhF-2EXA64LAlhycoyIu556U,1078
+test/unit/v2/connectors/motherduck/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+test/unit/v2/connectors/motherduck/test_base.py,sha256=f3W7hppEZ904_I_fKax-5LVDp-0yj04DjF1ccZ4k5O8,2503
 test/unit/v2/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/unit/v2/connectors/sql/test_sql.py,sha256=51-AKUBxw6ThO68bjenLopUUuxM88YZb2rMUV8L6YwY,2464
 test/unit/v2/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -102,9 +107,9 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
 test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
 unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
-unstructured_ingest/__version__.py,sha256=i2QrUEuUnVPQuTv5hg_JWbhbwm5k6KU4hPIFq0SIgdc,42
+unstructured_ingest/__version__.py,sha256=LXdgOM6QWErpDu1oCqJrypfmAkBaXzRxVPcjHL8yPrI,42
 unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
-unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
+unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
 unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
 unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
 unstructured_ingest/processor.py,sha256=XKKrvbxsb--5cDzz4hB3-GfWZYyIjJ2ah8FpzQKF_DM,2760
@@ -112,7 +117,7 @@ unstructured_ingest/cli/__init__.py,sha256=9kNcBOHuXON5lB1MJU9QewEhwPmId56vXqB29
 unstructured_ingest/cli/cli.py,sha256=LutBTBYMqboKw8cputHVszpenyfnySzcUC15ifwuYyg,1049
 unstructured_ingest/cli/cmd_factory.py,sha256=UdHm1KacTombpF6DxyTSwTCuApsKHUYw_kVu5Nhcy3Y,364
 unstructured_ingest/cli/common.py,sha256=I0El08FHz5kxw7iz0VWOWPrvcJD1rBgXJSwVIpVmmwU,204
-unstructured_ingest/cli/interfaces.py,sha256=lpaaOdAQ4NMsawVaHSk5lXCcZ0Mw85kRzfElu1ODCB0,24090
+unstructured_ingest/cli/interfaces.py,sha256=pvEwNfYwINx3-TQ0LPudjpYNR3PnanUiXpEePPEtRSw,24086
 unstructured_ingest/cli/utils.py,sha256=KNhkFNKOeEihc8HlvMz_MTbYVQNFklrBKbC8xg9h1xE,7982
 unstructured_ingest/cli/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 unstructured_ingest/cli/base/cmd.py,sha256=BbfjA2v203Jh-7DL6bzxQ7fOeNixd5BsBMuzXz6h5IQ,583
@@ -399,7 +404,7 @@ unstructured_ingest/v2/interfaces/uploader.py,sha256=rrZLTjmTcrDL-amQIKzIP6j2fW-
 unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
 unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
-unstructured_ingest/v2/pipeline/pipeline.py,sha256=4IwCWMlBrMpZI6V82q5nzrbyQNDVM62AQsWt6MUBWa8,16508
+unstructured_ingest/v2/pipeline/pipeline.py,sha256=b37fQGm_lGutQ3Jc0qePB15lkBiFavH9tCso3inm-3I,16564
 unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=LK2ldM24TE4ukX_Z6Z81LpF53orMaRkddM3uhLtT5EQ,3221
 unstructured_ingest/v2/pipeline/steps/download.py,sha256=nZ4B0d9p-6TgWqrBoKUQPlr8m6dz1RGNr_3OjUhRpWg,8259
@@ -413,9 +418,9 @@ unstructured_ingest/v2/pipeline/steps/upload.py,sha256=We4OAtStuZwWKKBCOPhfeAz_v
 unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
 unstructured_ingest/v2/processes/chunker.py,sha256=31-7ojsM2coIt2rMR0KOb82IxLVJfNHbqYUOsDkhxN8,5491
 unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
-unstructured_ingest/v2/processes/embedder.py,sha256=uiuCOSwwasHp4eqtewMvgnM86WVch7HDFiWqpGLahvo,7812
+unstructured_ingest/v2/processes/embedder.py,sha256=PTBlRgNbAXkSaLg7JrZzHwAoqpHmopg8jNU1TmaXguU,7804
 unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
-unstructured_ingest/v2/processes/partitioner.py,sha256=agpHwB9FR8OZVQqE7zFEb0IcDPCOPA_BZjLzLF71nOY,8194
+unstructured_ingest/v2/processes/partitioner.py,sha256=ZC9mt85I3o_SLR4DvE7vPBGphMET994phFkTuT-L9B8,9998
 unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
 unstructured_ingest/v2/processes/connectors/__init__.py,sha256=KO1zn-96Qa49TOSZn-gv_RUMGMCmUcdtHoeJqCpxPLY,6219
 unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XEFCdcGeJCguJU5WN2Mv9kLp5dVQ,8917
@@ -427,18 +432,18 @@ unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=i7vuNKsUkN93JRVm
 unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=SotSXZQ85_6TO906YvFi3yTml8jE9A_zV6nBJ4oTx8A,7075
 unstructured_ingest/v2/processes/connectors/discord.py,sha256=-e4-cBK4TnHkknK1qIb86AIVMy81lBgC288_iLpTzM8,5246
 unstructured_ingest/v2/processes/connectors/gitlab.py,sha256=ufE65Z8q_tC4oppGg5BsGXwSaL7RbEXcaagJQYsylNo,9984
-unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=5k7pdAzJGXSdyPCzW9vu2OaAjGVTo2JevDyGaXM1Hvk,13370
+unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=tSbyibwm9RQyXD-HJGZa1Y9lBSCXaEFnvxpf6bHwBSE,13394
 unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=VRDAiou_7oWOIAgQTdOGQWxudzQEDopXM8XkfkQ2j6g,5004
 unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWoaQyp9zp0WVqAywMaHJ2kcAc,7153
 unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
 unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
 unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=HU1IwchTM7Q1kkeIFVe-Lg6gInMItBpgkDkVwuTvkGY,14259
-unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=b616B_-9MfU6gxvpw7IBUa2szNFURA_VP8q5j2FXxnA,17632
+unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=EM9fq67RsiudZvZbi6nDXkS-i6W0xLvbkNvD0G-Ni5E,17779
 unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
-unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=bQDCch7OGiQgpWO3n3ncLuQ4XCWqDc7ZWEB-Qrqkss8,10730
+unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=U5gSa8S08JvCwmAhE8aV0yxGTIFnUlKVsQDybE8Fqb8,10746
 unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bWzpScvVbLTVlI3DzsCNUKiBI5M,6757
 unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
-unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=SdcbOEUzgi1sUZJA6doZDm-a8d4F3Qtud-OVbDKW7Ng,4456
+unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=f0F7KioXgucVc3tVASTa67ynlTa4s9_FKGPHop6Xm0A,4563
 unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
 unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
 unstructured_ingest/v2/processes/connectors/vectara.py,sha256=BlI_4nkpNR99aYxDd9eusm5LQsVB9EI0r-5Kc1D7pgQ,12255
@@ -452,9 +457,9 @@ unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=tR8
 unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=dJLD1fueXf8_0AfC4cg0G7siJZVefz68iuEx2Kq7rMs,2890
 unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=2KNLwDZJDhsMAUGCzktEIn4Lvb0nxLWabBOPJbgyoEE,5010
 unstructured_ingest/v2/processes/connectors/duckdb/__init__.py,sha256=5sVvJCWhU-YkjHIwk4W6BZCanFYK5W4xTpWtQ8xzeB4,561
-unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=0YBdOpTX5mbRLhP00lRHSMpl2-LfuRpqB1XPMJMxn04,2647
+unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=IHaY1mWuidt6GDEJhB1c_orwmjeyXuRCVJ88djYDciM,2793
 unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=oUHHaLpO2pWW2Lu4Mc-XFjrA0ze97205WQ_xP95ua4M,4296
-unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=mU5x6SnbFgRsVicNGh4y4gtR6ek7eQFinI0dQQmzMds,4481
+unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=OsRy-rcrP4_KSustpxlEKoZ_FmJNFMyMmIfFk6WJ3UY,4559
 unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py,sha256=Zzc0JNPP-eFqpwWw1Gp-XC8H-s__IgkYKzoagECycZY,829
 unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=MEKU64OsiQmbLPb3ken-WWCIV6-pnFbs_6kjJweG-SY,18813
 unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=qRz8Fyr2RSZIPZGkhPeme6AZxM0aX-c_xOa1ZtSr2Kg,6781
@@ -554,17 +559,17 @@ unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py,sha25
 unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=BATfX1PQGT2kl8jAbdNKXTojYKJxh3pJV9-h3OBnHGo,5124
 unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=OPBDQ2c_5KjWHEFfqXxf3pQ2tWC-N4MtslMulMgP1Wc,5503
 unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=QE-WBqrPVjCgcxR5EdVD9iTHBjgDSSSQgWYvq5N61qU,7746
-unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=O2XBu_E2WqNia9OUTdhTWkYo0xhoMMm6ZuanTz-0V9s,16192
+unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=F5PPUxt2W8JaAQGfz5Od0FvKqYa15RfwMIlnrdJu1nk,15317
 unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=PRjN_S7UQv0k4ZpSyclW1AJrsrugyxbR-GoOrHvBpks,5200
-unstructured_ingest/v2/processes/connectors/sql/vastdb.py,sha256=4DckpVAXpmMTcoKrWiJbnFQQlcrwMA-GMaDsAYchTUs,9992
+unstructured_ingest/v2/processes/connectors/sql/vastdb.py,sha256=0rxrb1ByXIefB9umzMTEJbpvzdTttXHK5DjRY97-GG8,9618
 unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWan69KnzVELvaqX34tMhCytIa-C8EDsXVKsEo,856
 unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
 unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
 unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
 unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=yJza_jBSEFnzZRq5L6vJ0Mm3uS1uxkOiKIimPpUyQds,12418
-unstructured_ingest-0.4.7.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
-unstructured_ingest-0.4.7.dist-info/METADATA,sha256=yGcahQ8fZmoU_c1h02b76tRn5w0uj_931AAQKlFrqxs,8051
-unstructured_ingest-0.4.7.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-unstructured_ingest-0.4.7.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
-unstructured_ingest-0.4.7.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
-unstructured_ingest-0.4.7.dist-info/RECORD,,
+unstructured_ingest-0.5.1.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
+unstructured_ingest-0.5.1.dist-info/METADATA,sha256=4fo4K5ac0RNRlWGGyNumZ5gXJf-0PwknZWjS6HvAD6w,8051
+unstructured_ingest-0.5.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+unstructured_ingest-0.5.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
+unstructured_ingest-0.5.1.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
+unstructured_ingest-0.5.1.dist-info/RECORD,,

{unstructured_ingest-0.4.7.dist-info → unstructured_ingest-0.5.1.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{unstructured_ingest-0.4.7.dist-info → unstructured_ingest-0.5.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{unstructured_ingest-0.4.7.dist-info → unstructured_ingest-0.5.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{unstructured_ingest-0.4.7.dist-info → unstructured_ingest-0.5.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

unstructured-ingest 0.4.7__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.4.7py3-none-any.whl → 0.5.1py3-none-any.whl