PyPI - unstructured-ingest - Versions diffs - 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl - Mend

unstructured-ingest 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (52) hide show

unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py CHANGED Viewed

@@ -3,12 +3,12 @@ from typing import Optional
 from pydantic import Field, Secret
-from unstructured_ingest.v2.interfaces import AccessConfig
 from unstructured_ingest.v2.processes.connector_registry import (
     DestinationRegistryEntry,
     SourceRegistryEntry,
 )
 from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
+    DatabricksVolumesAccessConfig,
     DatabricksVolumesConnectionConfig,
     DatabricksVolumesDownloader,
     DatabricksVolumesDownloaderConfig,
@@ -21,7 +21,7 @@ from unstructured_ingest.v2.processes.connectors.databricks.volumes import (
 CONNECTOR_TYPE = "databricks_volumes"
-class DatabricksNativeVolumesAccessConfig(AccessConfig):
+class DatabricksNativeVolumesAccessConfig(DatabricksVolumesAccessConfig):
     client_id: Optional[str] = Field(default=None, description="Client ID of the OAuth app.")
     client_secret: Optional[str] = Field(
         default=None, description="Client Secret of the OAuth app."

unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import collections
 import hashlib
-import sys
 from contextlib import contextmanager
 from dataclasses import dataclass, field
 from pathlib import Path
@@ -15,11 +14,17 @@ from unstructured_ingest.error import (
     SourceConnectionNetworkError,
     WriteError,
 )
-from unstructured_ingest.utils.data_prep import flatten_dict, generator_batching_wbytes
+from unstructured_ingest.utils.data_prep import (
+    batch_generator,
+    flatten_dict,
+    generator_batching_wbytes,
+)
 from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.constants import RECORD_ID_LABEL
 from unstructured_ingest.v2.interfaces import (
     AccessConfig,
+    BatchFileData,
+    BatchItem,
     ConnectionConfig,
     Downloader,
     DownloaderConfig,
@@ -48,6 +53,14 @@ if TYPE_CHECKING:
 CONNECTOR_TYPE = "elasticsearch"
+class ElastisearchAdditionalMetadata(BaseModel):
+    index_name: str
+class ElasticsearchBatchFileData(BatchFileData):
+    additional_metadata: ElastisearchAdditionalMetadata
 class ElasticsearchAccessConfig(AccessConfig):
     password: Optional[str] = Field(
         default=None, description="password when using basic auth or connecting to a cloud instance"
@@ -174,36 +187,21 @@ class ElasticsearchIndexer(Indexer):
             return {hit["_id"] for hit in hits}
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
+    def run(self, **kwargs: Any) -> Generator[ElasticsearchBatchFileData, None, None]:
         all_ids = self._get_doc_ids()
         ids = list(all_ids)
-        id_batches: list[frozenset[str]] = [
-            frozenset(
-                ids[
-                    i
-                    * self.index_config.batch_size : (i + 1)  # noqa
-                    * self.index_config.batch_size
-                ]
-            )
-            for i in range(
-                (len(ids) + self.index_config.batch_size - 1) // self.index_config.batch_size
-            )
-        ]
-        for batch in id_batches:
+        for batch in batch_generator(ids, self.index_config.batch_size):
             # Make sure the hash is always a positive number to create identified
-            identified = str(hash(batch) + sys.maxsize + 1)
-            yield FileData(
-                identifier=identified,
+            yield ElasticsearchBatchFileData(
                 connector_type=CONNECTOR_TYPE,
-                doc_type="batch",
                 metadata=FileDataSourceMetadata(
                     url=f"{self.connection_config.hosts[0]}/{self.index_config.index_name}",
                     date_processed=str(time()),
                 ),
-                additional_metadata={
-                    "ids": list(batch),
-                    "index_name": self.index_config.index_name,
-                },
+                additional_metadata=ElastisearchAdditionalMetadata(
+                    index_name=self.index_config.index_name,
+                ),
+                batch_items=[BatchItem(identifier=b) for b in batch],
             )
@@ -237,7 +235,7 @@ class ElasticsearchDownloader(Downloader):
         return concatenated_values
     def generate_download_response(
-        self, result: dict, index_name: str, file_data: FileData
+        self, result: dict, index_name: str, file_data: ElasticsearchBatchFileData
     ) -> DownloadResponse:
         record_id = result["_id"]
         filename_id = self.get_identifier(index_name=index_name, record_id=record_id)
@@ -257,22 +255,19 @@ class ElasticsearchDownloader(Downloader):
                 exc_info=True,
             )
             raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}")
-        return DownloadResponse(
-            file_data=FileData(
-                identifier=filename_id,
-                connector_type=CONNECTOR_TYPE,
-                source_identifiers=SourceIdentifiers(filename=filename, fullpath=filename),
-                metadata=FileDataSourceMetadata(
-                    version=str(result["_version"]) if "_version" in result else None,
-                    date_processed=str(time()),
-                    record_locator={
-                        "hosts": self.connection_config.hosts,
-                        "index_name": index_name,
-                        "document_id": record_id,
-                    },
-                ),
-            ),
-            path=download_path,
+        file_data.source_identifiers = SourceIdentifiers(filename=filename, fullpath=filename)
+        cast_file_data = FileData.cast(file_data=file_data)
+        cast_file_data.identifier = filename_id
+        cast_file_data.metadata.date_processed = str(time())
+        cast_file_data.metadata.version = str(result["_version"]) if "_version" in result else None
+        cast_file_data.metadata.record_locator = {
+            "hosts": self.connection_config.hosts,
+            "index_name": index_name,
+            "document_id": record_id,
+        }
+        return super().generate_download_response(
+            file_data=cast_file_data,
+            download_path=download_path,
         )
     def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
@@ -285,11 +280,12 @@ class ElasticsearchDownloader(Downloader):
         return AsyncElasticsearch, async_scan
-    async def run_async(self, file_data: FileData, **kwargs: Any) -> download_responses:
+    async def run_async(self, file_data: BatchFileData, **kwargs: Any) -> download_responses:
+        elasticsearch_filedata = ElasticsearchBatchFileData.cast(file_data=file_data)
         AsyncClient, async_scan = self.load_async()
-        index_name: str = file_data.additional_metadata["index_name"]
-        ids: list[str] = file_data.additional_metadata["ids"]
+        index_name: str = elasticsearch_filedata.additional_metadata.index_name
+        ids: list[str] = [item.identifier for item in elasticsearch_filedata.batch_items]
         scan_query = {
             "_source": self.download_config.fields,
@@ -307,7 +303,7 @@ class ElasticsearchDownloader(Downloader):
             ):
                 download_responses.append(
                     self.generate_download_response(
-                        result=result, index_name=index_name, file_data=file_data
+                        result=result, index_name=index_name, file_data=elasticsearch_filedata
                     )
                 )
         return download_responses

unstructured_ingest/v2/processes/connectors/fsspec/azure.py CHANGED Viewed

@@ -1,14 +1,14 @@
 from __future__ import annotations
+from contextlib import contextmanager
 from dataclasses import dataclass, field
-from pathlib import Path
 from time import time
-from typing import Any, Generator, Optional
+from typing import TYPE_CHECKING, Any, Generator, Optional
 from pydantic import Field, Secret
 from unstructured_ingest.utils.dep_check import requires_dependencies
-from unstructured_ingest.v2.interfaces import DownloadResponse, FileData, FileDataSourceMetadata
+from unstructured_ingest.v2.interfaces import FileDataSourceMetadata
 from unstructured_ingest.v2.processes.connector_registry import (
     DestinationRegistryEntry,
     SourceRegistryEntry,
@@ -25,6 +25,9 @@ from unstructured_ingest.v2.processes.connectors.fsspec.fsspec import (
 )
 from unstructured_ingest.v2.processes.connectors.fsspec.utils import json_serial, sterilize_dict
+if TYPE_CHECKING:
+    from adlfs import AzureBlobFileSystem
 CONNECTOR_TYPE = "azure"
@@ -89,6 +92,12 @@ class AzureConnectionConfig(FsspecConnectionConfig):
         }
         return access_configs
+    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
+    @contextmanager
+    def get_client(self, protocol: str) -> Generator["AzureBlobFileSystem", None, None]:
+        with super().get_client(protocol=protocol) as client:
+            yield client
 @dataclass
 class AzureIndexer(FsspecIndexer):
@@ -96,17 +105,9 @@ class AzureIndexer(FsspecIndexer):
     index_config: AzureIndexerConfig
     connector_type: str = CONNECTOR_TYPE
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    def precheck(self) -> None:
-        super().precheck()
     def sterilize_info(self, file_data: dict) -> dict:
         return sterilize_dict(data=file_data, default=azure_json_serial)
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        return super().run(**kwargs)
     def get_metadata(self, file_data: dict) -> FileDataSourceMetadata:
         path = file_data["name"]
         date_created = (
@@ -149,14 +150,6 @@ class AzureDownloader(FsspecDownloader):
     connector_type: str = CONNECTOR_TYPE
     download_config: Optional[AzureDownloaderConfig] = field(default_factory=AzureDownloaderConfig)
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return super().run(file_data=file_data, **kwargs)
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return await super().run_async(file_data=file_data, **kwargs)
 class AzureUploaderConfig(FsspecUploaderConfig):
     pass
@@ -168,22 +161,6 @@ class AzureUploader(FsspecUploader):
     connection_config: AzureConnectionConfig
     upload_config: AzureUploaderConfig = field(default=None)
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    def __post_init__(self):
-        super().__post_init__()
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    def precheck(self) -> None:
-        super().precheck()
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        return super().run(path=path, file_data=file_data, **kwargs)
-    @requires_dependencies(["adlfs", "fsspec"], extras="azure")
-    async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        return await super().run_async(path=path, file_data=file_data, **kwargs)
 azure_source_entry = SourceRegistryEntry(
     indexer=AzureIndexer,

unstructured_ingest/v2/processes/connectors/fsspec/box.py CHANGED Viewed

@@ -1,16 +1,16 @@
 from __future__ import annotations
+from contextlib import contextmanager
 from dataclasses import dataclass, field
-from pathlib import Path
 from time import time
-from typing import Annotated, Any, Generator, Optional
+from typing import TYPE_CHECKING, Annotated, Any, Generator, Optional
 from dateutil import parser
 from pydantic import Field, Secret
 from pydantic.functional_validators import BeforeValidator
 from unstructured_ingest.utils.dep_check import requires_dependencies
-from unstructured_ingest.v2.interfaces import DownloadResponse, FileData, FileDataSourceMetadata
+from unstructured_ingest.v2.interfaces import FileDataSourceMetadata
 from unstructured_ingest.v2.processes.connector_registry import (
     DestinationRegistryEntry,
     SourceRegistryEntry,
@@ -28,6 +28,9 @@ from unstructured_ingest.v2.processes.connectors.fsspec.fsspec import (
 )
 from unstructured_ingest.v2.processes.connectors.utils import conform_string_to_dict
+if TYPE_CHECKING:
+    from boxfs import BoxFileSystem
 CONNECTOR_TYPE = "box"
@@ -72,6 +75,12 @@ class BoxConnectionConfig(FsspecConnectionConfig):
         return access_kwargs_with_oauth
+    @requires_dependencies(["boxfs"], extras="box")
+    @contextmanager
+    def get_client(self, protocol: str) -> Generator["BoxFileSystem", None, None]:
+        with super().get_client(protocol=protocol) as client:
+            yield client
 @dataclass
 class BoxIndexer(FsspecIndexer):
@@ -79,14 +88,6 @@ class BoxIndexer(FsspecIndexer):
     index_config: BoxIndexerConfig
     connector_type: str = CONNECTOR_TYPE
-    @requires_dependencies(["boxfs"], extras="box")
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        return super().run(**kwargs)
-    @requires_dependencies(["boxfs"], extras="box")
-    def precheck(self) -> None:
-        super().precheck()
     def get_metadata(self, file_data: dict) -> FileDataSourceMetadata:
         path = file_data["name"]
         date_created = None
@@ -126,14 +127,6 @@ class BoxDownloader(FsspecDownloader):
     connector_type: str = CONNECTOR_TYPE
     download_config: Optional[BoxDownloaderConfig] = field(default_factory=BoxDownloaderConfig)
-    @requires_dependencies(["boxfs"], extras="box")
-    def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return super().run(file_data=file_data, **kwargs)
-    @requires_dependencies(["boxfs"], extras="box")
-    async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return await super().run_async(file_data=file_data, **kwargs)
 class BoxUploaderConfig(FsspecUploaderConfig):
     pass
@@ -145,22 +138,6 @@ class BoxUploader(FsspecUploader):
     connection_config: BoxConnectionConfig
     upload_config: BoxUploaderConfig = field(default=None)
-    @requires_dependencies(["boxfs"], extras="box")
-    def __post_init__(self):
-        super().__post_init__()
-    @requires_dependencies(["boxfs"], extras="box")
-    def precheck(self) -> None:
-        super().precheck()
-    @requires_dependencies(["boxfs"], extras="box")
-    def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        return super().run(path=path, file_data=file_data, **kwargs)
-    @requires_dependencies(["boxfs"], extras="box")
-    async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        return await super().run_async(path=path, file_data=file_data, **kwargs)
 box_source_entry = SourceRegistryEntry(
     indexer=BoxIndexer,

unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py CHANGED Viewed

@@ -1,14 +1,14 @@
 from __future__ import annotations
+from contextlib import contextmanager
 from dataclasses import dataclass, field
-from pathlib import Path
 from time import time
-from typing import Any, Generator, Optional
+from typing import TYPE_CHECKING, Generator, Optional
 from pydantic import Field, Secret
 from unstructured_ingest.utils.dep_check import requires_dependencies
-from unstructured_ingest.v2.interfaces import DownloadResponse, FileData, FileDataSourceMetadata
+from unstructured_ingest.v2.interfaces import FileDataSourceMetadata
 from unstructured_ingest.v2.processes.connector_registry import (
     DestinationRegistryEntry,
     SourceRegistryEntry,
@@ -24,11 +24,16 @@ from unstructured_ingest.v2.processes.connectors.fsspec.fsspec import (
     FsspecUploaderConfig,
 )
+if TYPE_CHECKING:
+    from dropboxdrivefs import DropboxDriveFileSystem
 CONNECTOR_TYPE = "dropbox"
 class DropboxIndexerConfig(FsspecIndexerConfig):
-    pass
+    def model_post_init(self, __context):
+        if not self.path_without_protocol.startswith("/"):
+            self.path_without_protocol = "/" + self.path_without_protocol
 class DropboxAccessConfig(FsspecAccessConfig):
@@ -42,6 +47,12 @@ class DropboxConnectionConfig(FsspecConnectionConfig):
     )
     connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
+    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
+    @contextmanager
+    def get_client(self, protocol: str) -> Generator["DropboxDriveFileSystem", None, None]:
+        with super().get_client(protocol=protocol) as client:
+            yield client
 @dataclass
 class DropboxIndexer(FsspecIndexer):
@@ -83,20 +94,6 @@ class DropboxIndexer(FsspecIndexer):
             filesize_bytes=file_size,
         )
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def __post_init__(self):
-        # dropbox expects the path to start with a /
-        if not self.index_config.path_without_protocol.startswith("/"):
-            self.index_config.path_without_protocol = "/" + self.index_config.path_without_protocol
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def precheck(self) -> None:
-        super().precheck()
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
-        return super().run(**kwargs)
 class DropboxDownloaderConfig(FsspecDownloaderConfig):
     pass
@@ -111,14 +108,6 @@ class DropboxDownloader(FsspecDownloader):
         default_factory=DropboxDownloaderConfig
     )
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return super().run(file_data=file_data, **kwargs)
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-        return await super().run_async(file_data=file_data, **kwargs)
 class DropboxUploaderConfig(FsspecUploaderConfig):
     pass
@@ -130,22 +119,6 @@ class DropboxUploader(FsspecUploader):
     connection_config: DropboxConnectionConfig
     upload_config: DropboxUploaderConfig = field(default=None)
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def __post_init__(self):
-        super().__post_init__()
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def precheck(self) -> None:
-        super().precheck()
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        return super().run(path=path, file_data=file_data, **kwargs)
-    @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
-    async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        return await super().run_async(path=path, file_data=file_data, **kwargs)
 dropbox_source_entry = SourceRegistryEntry(
     indexer=DropboxIndexer,

unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py CHANGED Viewed

@@ -4,6 +4,7 @@ import os
 import random
 import shutil
 import tempfile
+from contextlib import contextmanager
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Generator, Optional, TypeVar
@@ -78,6 +79,15 @@ class FsspecConnectionConfig(ConnectionConfig):
     access_config: Secret[FsspecAccessConfig]
     connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
+    @contextmanager
+    def get_client(self, protocol: str) -> Generator["AbstractFileSystem", None, None]:
+        from fsspec import get_filesystem_class
+        client = get_filesystem_class(protocol)(
+            **self.get_access_config(),
+        )
+        yield client
 FsspecIndexerConfigT = TypeVar("FsspecIndexerConfigT", bound=FsspecIndexerConfig)
 FsspecConnectionConfigT = TypeVar("FsspecConnectionConfigT", bound=FsspecConnectionConfig)
@@ -89,14 +99,6 @@ class FsspecIndexer(Indexer):
     index_config: FsspecIndexerConfigT
     connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
-    @property
-    def fs(self) -> "AbstractFileSystem":
-        from fsspec import get_filesystem_class
-        return get_filesystem_class(self.index_config.protocol)(
-            **self.connection_config.get_access_config(),
-        )
     def precheck(self) -> None:
         from fsspec import get_filesystem_class
@@ -110,7 +112,8 @@ class FsspecIndexer(Indexer):
                 return
             file_to_sample = valid_files[0]
             logger.debug(f"attempting to make HEAD request for file: {file_to_sample}")
-            self.fs.head(path=file_to_sample)
+            with self.connection_config.get_client(protocol=self.index_config.protocol) as client:
+                client.head(path=file_to_sample)
         except Exception as e:
             logger.error(f"failed to validate connection: {e}", exc_info=True)
             raise SourceConnectionError(f"failed to validate connection: {e}")
@@ -120,16 +123,18 @@ class FsspecIndexer(Indexer):
             # fs.ls does not walk directories
             # directories that are listed in cloud storage can cause problems
             # because they are seen as 0 byte files
-            files = self.fs.ls(self.index_config.path_without_protocol, detail=True)
+            with self.connection_config.get_client(protocol=self.index_config.protocol) as client:
+                files = client.ls(self.index_config.path_without_protocol, detail=True)
         else:
             # fs.find will recursively walk directories
             # "size" is a common key for all the cloud protocols with fs
-            found = self.fs.find(
-                self.index_config.path_without_protocol,
-                detail=True,
-            )
-            files = found.values()
+            with self.connection_config.get_client(protocol=self.index_config.protocol) as client:
+                found = client.find(
+                    self.index_config.path_without_protocol,
+                    detail=True,
+                )
+                files = found.values()
         filtered_files = [
             file for file in files if file.get("size") > 0 and file.get("type") == "file"
         ]
@@ -200,15 +205,8 @@ class FsspecDownloader(Downloader):
     )
     def is_async(self) -> bool:
-        return self.fs.async_impl
-    @property
-    def fs(self) -> "AbstractFileSystem":
-        from fsspec import get_filesystem_class
-        return get_filesystem_class(self.protocol)(
-            **self.connection_config.get_access_config(),
-        )
+        with self.connection_config.get_client(protocol=self.protocol) as client:
+            return client.async_impl
     def handle_directory_download(self, lpath: Path) -> None:
         # If the object's name contains certain characters (i.e. '?'), it
@@ -237,7 +235,8 @@ class FsspecDownloader(Downloader):
         download_path.parent.mkdir(parents=True, exist_ok=True)
         try:
             rpath = file_data.additional_metadata["original_file_path"]
-            self.fs.get(rpath=rpath, lpath=download_path.as_posix())
+            with self.connection_config.get_client(protocol=self.protocol) as client:
+                client.get(rpath=rpath, lpath=download_path.as_posix())
             self.handle_directory_download(lpath=download_path)
         except Exception as e:
             logger.error(f"failed to download file {file_data.identifier}: {e}", exc_info=True)
@@ -249,7 +248,8 @@ class FsspecDownloader(Downloader):
         download_path.parent.mkdir(parents=True, exist_ok=True)
         try:
             rpath = file_data.additional_metadata["original_file_path"]
-            await self.fs.get(rpath=rpath, lpath=download_path.as_posix())
+            with self.connection_config.get_client(protocol=self.protocol) as client:
+                await client.get(rpath=rpath, lpath=download_path.as_posix())
             self.handle_directory_download(lpath=download_path)
         except Exception as e:
             logger.error(f"failed to download file {file_data.identifier}: {e}", exc_info=True)
@@ -268,9 +268,11 @@ FsspecUploaderConfigT = TypeVar("FsspecUploaderConfigT", bound=FsspecUploaderCon
 class FsspecUploader(Uploader):
     connector_type: str = CONNECTOR_TYPE
     upload_config: FsspecUploaderConfigT = field(default=None)
+    connection_config: FsspecConnectionConfigT
     def is_async(self) -> bool:
-        return self.fs.async_impl
+        with self.connection_config.get_client(protocol=self.upload_config.protocol) as client:
+            return client.async_impl
     @property
     def fs(self) -> "AbstractFileSystem":
@@ -314,11 +316,13 @@ class FsspecUploader(Uploader):
         path_str = str(path.resolve())
         upload_path = self.get_upload_path(file_data=file_data)
         logger.debug(f"writing local file {path_str} to {upload_path}")
-        self.fs.upload(lpath=path_str, rpath=upload_path.as_posix())
+        with self.connection_config.get_client(protocol=self.upload_config.protocol) as client:
+            client.upload(lpath=path_str, rpath=upload_path.as_posix())
     async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
         upload_path = self.get_upload_path(file_data=file_data)
         path_str = str(path.resolve())
         # Odd that fsspec doesn't run exists() as async even when client support async
         logger.debug(f"writing local file {path_str} to {upload_path}")
-        self.fs.upload(lpath=path_str, rpath=upload_path.as_posix())
+        with self.connection_config.get_client(protocol=self.upload_config.protocol) as client:
+            client.upload(lpath=path_str, rpath=upload_path.as_posix())

unstructured-ingest 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.3.9py3-none-any.whl → 0.3.11py3-none-any.whl