PyPI - unstructured-ingest - Versions diffs - 1.0.28__py3-none-any.whl → 1.0.32__py3-none-any.whl - Mend

unstructured-ingest 1.0.28py3-none-any.whl → 1.0.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (11) hide show

unstructured_ingest/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.0.28" # pragma: no cover
1	+ __version__ = "1.0.32" # pragma: no cover

unstructured_ingest/processes/connectors/databricks/volumes.py CHANGED Viewed

@@ -196,9 +196,14 @@ class DatabricksVolumesUploader(Uploader, ABC):
     connection_config: DatabricksVolumesConnectionConfig
     def get_output_path(self, file_data: FileData) -> str:
-        return os.path.join(
-            self.upload_config.path, f"{file_data.source_identifiers.filename}.json"
-        )
+        if file_data.source_identifiers.fullpath:
+            return os.path.join(
+                self.upload_config.path, f"{file_data.source_identifiers.fullpath}.json"
+            )
+        else:
+            return os.path.join(
+                self.upload_config.path, f"{file_data.source_identifiers.filename}.json"
+            )
     def precheck(self) -> None:
         try:

unstructured_ingest/processes/connectors/fsspec/fsspec.py CHANGED Viewed

@@ -343,10 +343,9 @@ class FsspecUploader(Uploader):
             raise self.wrap_error(e=e)
     def get_upload_path(self, file_data: FileData) -> Path:
-        upload_path = (
-            Path(self.upload_config.path_without_protocol)
-            / file_data.source_identifiers.relative_path
-        )
+        upload_path = Path(
+            self.upload_config.path_without_protocol
+        ) / file_data.source_identifiers.fullpath.lstrip("/")
         updated_upload_path = upload_path.parent / f"{upload_path.name}.json"
         return updated_upload_path
@@ -358,8 +357,8 @@ class FsspecUploader(Uploader):
             client.upload(lpath=path_str, rpath=upload_path.as_posix())
     async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        upload_path = self.get_upload_path(file_data=file_data)
         path_str = str(path.resolve())
+        upload_path = self.get_upload_path(file_data=file_data)
         # Odd that fsspec doesn't run exists() as async even when client support async
         logger.debug(f"writing local file {path_str} to {upload_path}")
         with self.connection_config.get_client(protocol=self.upload_config.protocol) as client:

unstructured_ingest/processes/connectors/google_drive.py CHANGED Viewed

@@ -52,6 +52,10 @@ EXPORT_EXTENSION_MAP = {
     "text/html": ".html",
 }
+# LRO Export Size Threshold is 10MB in real but the exported file might be slightly larger
+# than the original Google Workspace file - thus the threshold is set to 9MB
+LRO_EXPORT_SIZE_THRESHOLD = 9 * 1024 * 1024  # 9MB
 class GoogleDriveAccessConfig(AccessConfig):
     service_account_key: Optional[Annotated[dict, BeforeValidator(conform_string_to_dict)]] = Field(
@@ -142,8 +146,7 @@ class GoogleDriveIndexer(Indexer):
             "originalFilename",
             "capabilities",
             "permissionIds",
-            "webViewLink",
-            "webContentLink",
+            "size",
         ]
     )
@@ -178,7 +181,9 @@ class GoogleDriveIndexer(Indexer):
                 raise SourceConnectionError("Google drive API unreachable for an unknown reason!")
     @staticmethod
-    def count_files_recursively(files_client, folder_id: str, extensions: list[str] = None) -> int:
+    def count_files_recursively(
+        files_client: "GoogleAPIResource", folder_id: str, extensions: list[str] = None
+    ) -> int:
         """
         Count non-folder files recursively under the given folder.
         If `extensions` is provided, only count files
@@ -477,22 +482,26 @@ class GoogleDriveIndexer(Indexer):
 class GoogleDriveDownloaderConfig(DownloaderConfig):
-    pass
+    lro_max_tries: int = 10
+    lro_max_time: int = 10 * 60  # 10 minutes
-@dataclass
-class GoogleDriveDownloader(Downloader):
+def _get_extension(file_data: FileData) -> str:
+    """
+    Returns the extension for a given source MIME type.
     """
-    Downloads files from Google Drive using authenticated direct HTTP requests
-    via `exportLinks` (for Google-native files) and `webContentLink` (for binary files).
+    source_mime_type = file_data.additional_metadata.get("export_mime_type", "")
+    export_mime_type = GOOGLE_EXPORT_MIME_MAP.get(source_mime_type, "")
+    if export_mime_type:
+        return EXPORT_EXTENSION_MAP.get(export_mime_type, "")
+    return ""
-    These links emulate the behavior of Google Drive's "File > Download as..." options
-    in the UI and bypass the size limitations of `files.export()`.
-    Behavior:
-    - Google-native formats are downloaded using `exportLinks` in appropriate MIME formats.
-    - Binary files (non-Google-native) are downloaded using `webContentLink`.
-    - All downloads are performed via `requests.get()` using a valid bearer token.
+@dataclass
+class GoogleDriveDownloader(Downloader):
+    """
+    Downloads files from Google Drive using googleapis client. For native files, it uses the export
+    functionality for files <10MB and LRO (Long Running Operation) for files >10MB.
     """
     connection_config: GoogleDriveConnectionConfig
@@ -501,73 +510,233 @@ class GoogleDriveDownloader(Downloader):
     )
     connector_type: str = CONNECTOR_TYPE
-    def _get_download_url_and_ext(self, file_id: str, mime_type: str) -> tuple[str, str]:
+    @requires_dependencies(["googleapiclient"], extras="google-drive")
+    def _direct_download_file(self, file_id, download_path: Path):
+        """Downloads a file from Google Drive using the Drive API's media download functionality.
+        The method uses Google Drive API's media download functionality to stream the file
+        content directly to disk.
+        Args:
+            file_id (str): The ID of the file to download from Google Drive.
+            download_path (Path): The local path where the file should be saved.
+        Raises:
+            SourceConnectionError: If the download operation fails.
         """
-        Resolves the appropriate download URL and expected file extension for a Google Drive file.
+        from googleapiclient.errors import HttpError
+        from googleapiclient.http import MediaIoBaseDownload
-        - Google-native files use export MIME types from exportLinks (e.g., .docx, .xlsx).
-        - Binary files use webContentLink (e.g., uploaded PDFs or ZIPs).
+        try:
+            with self.connection_config.get_client() as client:
+                # pylint: disable=maybe-no-member
+                request = client.get_media(fileId=file_id)
+                with open(download_path, "wb") as file:
+                    downloader = MediaIoBaseDownload(file, request)
+                    done = False
+                    while done is False:
+                        status, done = downloader.next_chunk()
+                        logger.debug(f"Download progress:{int(status.progress() * 100)}.")
+        except (HttpError, ValueError) as error:
+            logger.exception(f"Error downloading file {file_id} to {download_path}: {error}")
+            raise SourceConnectionError("Failed to download file") from error
+    @requires_dependencies(["googleapiclient"], extras="google-drive")
+    def _export_gdrive_file_with_lro(self, file_id: str, download_path: Path, mime_type: str):
+        """Exports a Google Drive file using Long-Running Operation (LRO) for large files
+        (>10MB of the exported file size).
+        This method is used when the standard export method fails due to file size limitations.
+        It uses the Drive API's LRO functionality to handle large file exports.
+        Args:
+            file_id (str): The ID of the Google Drive file to export.
+            download_path (Path): The local path where the exported file should be saved.
+            mime_type (str): The target MIME type for the exported file.
+        Raises:
+            SourceConnectionError: If the export operation fails.
+        """
+        import tenacity
+        from googleapiclient.errors import HttpError
+        max_time = self.download_config.lro_max_time
+        max_tries = self.download_config.lro_max_tries
+        class OperationNotFinished(Exception):
+            """
+            Exception raised when the operation is not finished.
+            """
+            pass
+        def is_fatal_code(e: Exception) -> bool:
+            """
+            Returns True if the error is fatal and should not be retried.
+            403 and 429 can mean "Too many requests" or "User rate limit exceeded"
+            which should be retried.
+            """
+            return (
+                isinstance(e, HttpError)
+                and 400 <= e.resp.status < 500
+                and e.resp.status not in [403, 429]
+            )
+        @tenacity.retry(
+            wait=tenacity.wait_exponential(),
+            retry=tenacity.retry_if_exception(
+                lambda e: (
+                    isinstance(e, (HttpError, OperationNotFinished)) and not is_fatal_code(e)
+                )
+            ),
+            stop=(tenacity.stop_after_attempt(max_tries) | tenacity.stop_after_delay(max_time)),
+        )
+        def _poll_operation(operation: dict, operations_client: "GoogleAPIResource") -> dict:
+            """
+            Helper function to poll the operation until it's complete.
+            Uses backoff exponential retry logic.
+            Each `operations.get` call uses the Google API requests limit. Details:
+            https://developers.google.com/workspace/drive/api/guides/limits
+            The limits as of May 2025 are:
+            - 12.000 calls per 60 seconds
+            In case of request limitting, the API will return 403 `User rate limit exceeded` error
+            or 429 `Too many requests` error.
+            """
+            if operation.get("done", False):
+                return operation
+            if "error" in operation:
+                raise SourceConnectionError(
+                    f"Export operation failed: {operation['error']['message']}"
+                )
+            # Refresh the operation status:
+            # FYI: In some cases the `operations.get` call errors with 403 "User does not have
+            # permission" error even if the same user create the operation with `download` method.
+            updated_operation = operations_client.get(name=operation["name"]).execute()
+            if not updated_operation.get("done", False):
+                raise OperationNotFinished()
+            return updated_operation
+        try:
+            with self._get_files_and_operations_client() as (files_client, operations_client):
+                # Start the LRO
+                operation = files_client.download(fileId=file_id, mimeType=mime_type).execute()
+                # In case the operation is not finished, poll it until it's complete
+                updated_operation = _poll_operation(operation, operations_client)
+                # Get the download URI from the completed operation
+                download_uri = updated_operation["response"]["downloadUri"]
+            # Download the file using the URI
+            self._raw_download_google_drive_file(download_uri, download_path)
+        except HttpError as error:
+            raise SourceConnectionError(
+                f"Failed to export file using Google Drive LRO: {error}"
+            ) from error
+    @requires_dependencies(["googleapiclient"], extras="google-drive")
+    def _export_gdrive_native_file(
+        self, file_id: str, download_path: Path, mime_type: str, file_size: int
+    ):
+        """Exports a Google Drive native file (Docs, Sheets, Slides) to a specified format.
+        This method uses the Google Drive API's export functionality to convert Google Workspace
+        files to other formats (e.g., Google Docs to PDF, Google Sheets to Excel).
+        For files larger than 10MB, it falls back to using Long-Running Operation (LRO).
+        Args:
+            file_id (str): The ID of the Google Drive file to export.
+            download_path (Path): The local path where the exported file should be saved.
+            mime_type (str): The target MIME type for the exported file (e.g., 'application/pdf').
+            file_size (int): The size of the file to export - used to determine if the
+                file is large enough to use LRO instead of direct export endpoint.
         Returns:
-            Tuple[str, str]: (download URL, file extension or "")
+            bytes: The exported file content.
         Raises:
-            SourceConnectionError: If no valid export or download link is available.
+            HttpError: If the export operation fails.
         """
+        from googleapiclient.errors import HttpError
+        from googleapiclient.http import MediaIoBaseDownload
+        if file_size > LRO_EXPORT_SIZE_THRESHOLD:
+            self._export_gdrive_file_with_lro(file_id, download_path, mime_type)
+            return
         with self.connection_config.get_client() as client:
-            metadata = client.get(fileId=file_id, fields="exportLinks,webContentLink").execute()
+            try:
+                # pylint: disable=maybe-no-member
+                request = client.export_media(fileId=file_id, mimeType=mime_type)
+                with open(download_path, "wb") as file:
+                    downloader = MediaIoBaseDownload(file, request)
+                    done = False
+                    while done is False:
+                        status, done = downloader.next_chunk()
+                        logger.debug(f"Download progress: {int(status.progress() * 100)}.")
+            except HttpError as error:
+                if error.resp.status == 403 and "too large" in error.reason.lower():
+                    # Even though we have the LRO threashold, for some smaller files the
+                    # export size might exceed 10MB and we get a 403 error.
+                    # In that case, we use LRO as a fallback.
+                    self._export_gdrive_file_with_lro(file_id, download_path, mime_type)
+                else:
+                    raise SourceConnectionError(f"Failed to export file: {error}") from error
-        export_links = metadata.get("exportLinks", {})
-        web_link = metadata.get("webContentLink")
+    @requires_dependencies(["googleapiclient"], extras="google-drive")
+    @contextmanager
+    def _get_files_and_operations_client(
+        self,
+    ) -> Generator[tuple["GoogleAPIResource", "GoogleAPIResource"], None, None]:
+        """
+        Returns a context manager for the files and operations clients for the Google Drive API.
-        if export_mime := GOOGLE_EXPORT_MIME_MAP.get(mime_type):
-            url = export_links.get(export_mime)
-            if not url:
-                raise SourceConnectionError(f"No export link found for {file_id} as {export_mime}")
-            ext = EXPORT_EXTENSION_MAP.get(export_mime, "")
-            return url, ext
+        Yields:
+            Tuple[GoogleAPIResource, GoogleAPIResource]: A tuple of the files
+                and operations clients.
+        """
+        from googleapiclient.discovery import build
-        if not web_link:
-            raise SourceConnectionError(f"No webContentLink available for file {file_id}")
-        return web_link, ""
+        creds = self._get_credentials()
+        service = build("drive", "v3", credentials=creds)
+        with (
+            service.operations() as operations_client,
+            service.files() as files_client,
+        ):
+            yield files_client, operations_client
-    @requires_dependencies(["httpx", "google.auth"], extras="google-drive")
-    def _download_url(self, file_data: FileData, url: str, ext: str = "") -> Path:
+    @requires_dependencies(["httpx"])
+    def _raw_download_google_drive_file(self, url: str, download_path: Path) -> Path:
         """
         Streams file content directly to disk using authenticated HTTP request.
+        Must use httpx to stream the file to disk as currently there's no google SDK
+        functionality to download a file like for get media or export operations.
         Writes the file to the correct path in the download directory while downloading.
         Avoids buffering large files in memory.
-        Returns:
-            Path to the downloaded file.
+        Args:
+            url (str): The URL of the file to download.
+            download_path (Path): The path to save the downloaded file.
-        Raises:
-            SourceConnectionError: If the HTTP request fails.
+        Returns:
+            Path: The path to the downloaded file.
         """
         import httpx
         from google.auth.transport.requests import Request
-        from google.oauth2 import service_account
-        access_config = self.connection_config.access_config.get_secret_value()
-        key_data = access_config.get_service_account_key()
-        creds = service_account.Credentials.from_service_account_info(
-            key_data,
-            scopes=["https://www.googleapis.com/auth/drive.readonly"],
-        )
+        creds = self._get_credentials()
         creds.refresh(Request())
         headers = {
             "Authorization": f"Bearer {creds.token}",
         }
-        download_path = self.get_download_path(file_data)
-        if ext:
-            download_path = download_path.with_suffix(ext)
-        download_path.parent.mkdir(parents=True, exist_ok=True)
-        logger.debug(f"Streaming file to {download_path}")
         with (
             httpx.Client(timeout=None, follow_redirects=True) as client,
             client.stream("GET", url, headers=headers) as response,
@@ -579,26 +748,91 @@ class GoogleDriveDownloader(Downloader):
             with open(download_path, "wb") as f:
                 for chunk in response.iter_bytes():
                     f.write(chunk)
+        return download_path
+    @requires_dependencies(["google"], extras="google-drive")
+    def _get_credentials(self):
+        """
+        Retrieves the credentials for Google Drive API access.
+        Returns:
+            Credentials: The credentials for Google Drive API access.
+        """
+        from google.oauth2 import service_account
+        access_config = self.connection_config.access_config.get_secret_value()
+        key_data = access_config.get_service_account_key()
+        creds = service_account.Credentials.from_service_account_info(
+            key_data,
+            scopes=["https://www.googleapis.com/auth/drive.readonly"],
+        )
+        return creds
+    def _download_file(self, file_data: FileData) -> Path:
+        """Downloads a file from Google Drive using either direct download or export based
+        on the source file's MIME type.
+        This method determines the appropriate download method based on the file's MIME type:
+        - For Google Workspace files (Docs, Sheets, Slides), uses export functionality
+        - For other files, uses direct download
+        Args:
+            file_data (FileData): The metadata of the file being downloaded.
+        Returns:
+            Path: The path to the downloaded file.
+        Raises:
+            SourceConnectionError: If the download fails.
+        """
+        mime_type = file_data.additional_metadata.get("mimeType", "")
+        file_size = int(file_data.additional_metadata.get("size", 0))
+        file_id = file_data.identifier
+        download_path = self.get_download_path(file_data)
+        if not download_path:
+            raise SourceConnectionError(f"Failed to get download path for file {file_id}")
+        if mime_type in GOOGLE_EXPORT_MIME_MAP:
+            # For Google Workspace files, use export functionality
+            ext = _get_extension(file_data)
+            download_path = download_path.with_suffix(ext)
+            download_path.parent.mkdir(parents=True, exist_ok=True)
+            export_mime = GOOGLE_EXPORT_MIME_MAP[mime_type]
+            self._export_gdrive_native_file(
+                file_id=file_id,
+                download_path=download_path,
+                mime_type=export_mime,
+                file_size=file_size,
+            )
+            file_data.additional_metadata.update(
+                {
+                    "export_mime_type": export_mime,
+                    "export_extension": ext,
+                    "download_method": "google_workspace_export",
+                }
+            )
+        else:
+            # For other files, use direct download
+            download_path.parent.mkdir(parents=True, exist_ok=True)
+            self._direct_download_file(file_id=file_id, download_path=download_path)
+            file_data.additional_metadata.update(
+                {
+                    "download_method": "direct_download",
+                }
+            )
         return download_path
     def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
         mime_type = file_data.additional_metadata.get("mimeType", "")
-        record_id = file_data.identifier
         logger.debug(
             f"Downloading file {file_data.source_identifiers.fullpath} of type {mime_type}"
         )
-        download_url, ext = self._get_download_url_and_ext(record_id, mime_type)
-        download_path = self._download_url(file_data, download_url, ext)
+        download_path = self._download_file(file_data)
-        file_data.additional_metadata.update(
-            {
-                "download_method": "export_link" if ext else "web_content_link",
-                "download_url_used": download_url,
-            }
-        )
         file_data.local_download_path = str(download_path.resolve())
         return self.generate_download_response(file_data=file_data, download_path=download_path)

unstructured_ingest/processes/connectors/onedrive.py CHANGED Viewed

@@ -370,14 +370,14 @@ class OnedriveUploader(Uploader):
         # Use the remote_url from upload_config as the base destination folder
         base_destination_folder = self.upload_config.url
-        # Use the file's relative path to maintain directory structure, if needed
-        if file_data.source_identifiers and file_data.source_identifiers.rel_path:
-            # Combine the base destination folder with the file's relative path
+        # Use the file's full path to maintain directory structure, if needed
+        if file_data.source_identifiers and file_data.source_identifiers.fullpath:
+            # Combine the base destination folder with the file's full path
             destination_path = Path(base_destination_folder) / Path(
-                f"{file_data.source_identifiers.rel_path}.json"
+                f"{file_data.source_identifiers.fullpath}.json"
             )
         else:
-            # If no relative path is provided, upload directly to the base destination folder
+            # If no full path is provided, upload directly to the base destination folder
             destination_path = Path(base_destination_folder) / f"{path.name}.json"
         destination_folder = destination_path.parent

unstructured_ingest/processes/connectors/redisdb.py CHANGED Viewed

@@ -32,7 +32,9 @@ class RedisAccessConfig(AccessConfig):
         default=None, description="If not anonymous, use this uri, if specified."
     )
     password: Optional[str] = Field(
-        default=None, description="If not anonymous, use this password, if specified."
+        default=None,
+        description="Password used to connect to database if uri is "
+        "not specified and connection is not anonymous.",
     )
@@ -41,20 +43,32 @@ class RedisConnectionConfig(ConnectionConfig):
         default=RedisAccessConfig(), validate_default=True
     )
     host: Optional[str] = Field(
-        default=None, description="Hostname or IP address of a Redis instance to connect to."
+        default=None,
+        description="Hostname or IP address of a Redis instance to connect to "
+        "if uri is not specified.",
     )
     database: int = Field(default=0, description="Database index to connect to.")
-    port: int = Field(default=6379, description="port used to connect to database.")
+    port: Optional[int] = Field(
+        default=6379, description="Port used to connect to database if uri is not specified."
+    )
     username: Optional[str] = Field(
-        default=None, description="Username used to connect to database."
+        default=None, description="Username used to connect to database if uri is not specified."
+    )
+    ssl: Optional[bool] = Field(
+        default=True,
+        description="Whether the connection should use SSL encryption if uri is not specified.",
     )
-    ssl: bool = Field(default=True, description="Whether the connection should use SSL encryption.")
     connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
     @model_validator(mode="after")
     def validate_host_or_url(self) -> "RedisConnectionConfig":
-        if not self.access_config.get_secret_value().uri and not self.host:
-            raise ValueError("Please pass a hostname either directly or through uri")
+        if not self.access_config.get_secret_value().uri:
+            if not self.host:
+                raise ValueError("Please pass a hostname either directly or through uri")
+            if self.port is None:
+                raise ValueError("Since URI is not specified, port cannot be None")
+            if self.ssl is None:
+                raise ValueError("Since URI is not specified, ssl cannot be None")
         return self
     @requires_dependencies(["redis"], extras="redis")
@@ -64,21 +78,20 @@ class RedisConnectionConfig(ConnectionConfig):
         access_config = self.access_config.get_secret_value()
-        options = {
-            "host": self.host,
-            "port": self.port,
-            "db": self.database,
-            "ssl": self.ssl,
-            "username": self.username,
-        }
-        if access_config.password:
-            options["password"] = access_config.password
         if access_config.uri:
             async with from_url(access_config.uri) as client:
                 yield client
         else:
+            options = {
+                "host": self.host,
+                "port": self.port,
+                "db": self.database,
+                "ssl": self.ssl,
+                "username": self.username,
+            }
+            if access_config.password:
+                options["password"] = access_config.password
             async with Redis(**options) as client:
                 yield client
@@ -113,6 +126,20 @@ class RedisUploaderConfig(UploaderConfig):
     key_prefix: str = Field(default="", description="Prefix for Redis keys")
+def _form_redis_pipeline_error_message(error: str) -> str:
+    """
+    Form a user-friendly error message for Redis pipeline errors.
+    The error message has `$` character at the beginning and `) of pipeline` at the end.
+    Everything between these two strings is the value an should be removed.
+    """
+    start = error.find("$")
+    end = error.find(") of pipeline")
+    if start != -1 and end != -1:
+        return error[: start + 1] + "<value>" + error[end:]
+    else:
+        return error
 @dataclass
 class RedisUploader(Uploader):
     upload_config: RedisUploaderConfig
@@ -169,14 +196,14 @@ class RedisUploader(Uploader):
                 # Redis with stack extension supports JSON type
                 await pipe.json().set(key_with_prefix, "$", element).execute()
             except redis_exceptions.ResponseError as e:
-                message = str(e)
+                message = _form_redis_pipeline_error_message(str(e))
                 if "unknown command `JSON.SET`" in message:
                     # if this error occurs, Redis server doesn't support JSON type,
                     # so save as string type instead
                     await pipe.set(key_with_prefix, json.dumps(element)).execute()
                     redis_stack = False
                 else:
-                    raise e
+                    raise redis_exceptions.ResponseError(message) from e
         return redis_stack

{unstructured_ingest-1.0.28.dist-info → unstructured_ingest-1.0.32.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: unstructured_ingest
-Version: 1.0.28
+Version: 1.0.32
 Summary: Local ETL data pipeline to get data RAG ready
 Author-email: Unstructured Technologies <devops@unstructuredai.io>
 License-Expression: Apache-2.0
@@ -87,6 +87,7 @@ Provides-Extra: gitlab
 Requires-Dist: python-gitlab; extra == 'gitlab'
 Provides-Extra: google-drive
 Requires-Dist: google-api-python-client; extra == 'google-drive'
+Requires-Dist: tenacity; extra == 'google-drive'
 Provides-Extra: hubspot
 Requires-Dist: hubspot-api-client; extra == 'hubspot'
 Requires-Dist: urllib3; extra == 'hubspot'
@@ -163,7 +164,7 @@ Requires-Dist: qdrant-client; extra == 'qdrant'
 Provides-Extra: reddit
 Requires-Dist: praw; extra == 'reddit'
 Provides-Extra: redis
-Requires-Dist: redis; extra == 'redis'
+Requires-Dist: redis<=5.3.0; extra == 'redis'
 Provides-Extra: remote
 Requires-Dist: unstructured-client>=0.30.0; extra == 'remote'
 Provides-Extra: rst

{unstructured_ingest-1.0.28.dist-info → unstructured_ingest-1.0.32.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
-unstructured_ingest/__version__.py,sha256=9ilMs9aEgY_oAgkTk4JfWz5bhMHPPXDWre-49z0HXgo,43
+unstructured_ingest/__version__.py,sha256=tjMRa0J78uLr4Q1KetAAzKJ8jimjDywme7jVeFHwNx4,43
 unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
 unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
 unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -72,17 +72,17 @@ unstructured_ingest/processes/connectors/delta_table.py,sha256=2DFox_Vzoopt_D3Jy
 unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMkcd8lcLJC0uqbo4izjdZ3rU,5294
 unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
 unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
-unstructured_ingest/processes/connectors/google_drive.py,sha256=BIFBZGp26JlBBOcXy5Gq0UoNzWv6pwRKhEAHMVMI2_M,25050
+unstructured_ingest/processes/connectors/google_drive.py,sha256=W6zjpuNS-mnLJtwTKAAPn0_4pcEc1bySO2u4V3fPXVo,35250
 unstructured_ingest/processes/connectors/jira.py,sha256=a7OuVi4RFfr22Tqgk60lwmtWTRBw2fI1m8KPqfA8Ffo,18504
 unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
 unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
 unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
 unstructured_ingest/processes/connectors/mongodb.py,sha256=1g_5bfbS6lah3nsOXqLAanR3zNYJ47_Njw_uV-uj3_U,14324
 unstructured_ingest/processes/connectors/neo4j.py,sha256=ztxvI9KY8RF5kYUuMGSzzN5mz7Fu_4Ai9P7dqCpJLc0,20267
-unstructured_ingest/processes/connectors/onedrive.py,sha256=VBkKlbJgR7uKlKTnjNybAw6ZawLKflDPpy2uVvgWYWw,19296
+unstructured_ingest/processes/connectors/onedrive.py,sha256=k0bhQCCSIgmHAk3lQd4CMA3dc4fPAjegNlLxlDWGowc,19284
 unstructured_ingest/processes/connectors/outlook.py,sha256=zHM5frO7CqQG0-KcTyX49aZeSlsvVrl8kh_lR_ESgQw,9275
 unstructured_ingest/processes/connectors/pinecone.py,sha256=pSREUNsQqel6q1EFZsFWelg-uZgGubQY5m_6nVnBFKs,15090
-unstructured_ingest/processes/connectors/redisdb.py,sha256=YzvSlfHs83XWsWMaIC3bV5enKfxejMQ9BQ8CtXfnJ5o,6923
+unstructured_ingest/processes/connectors/redisdb.py,sha256=rTihbfv0Mlk1eo5Izn-JXRu5Ad5C-KD58nSqeKsaZJ8,8024
 unstructured_ingest/processes/connectors/salesforce.py,sha256=OaKEWCqZrirHqFJ650K5jSPwYlWefPOapas8Y-4D9oc,11661
 unstructured_ingest/processes/connectors/sharepoint.py,sha256=jI-erp4YUfHxPeUTcfHSPEG3w0wjSBYfAnMg1WT6lfw,4996
 unstructured_ingest/processes/connectors/slack.py,sha256=EkFj9PcAu5_gF2xLogikKDADLbJYq-_jvchzYrTdLO4,9224
@@ -92,7 +92,7 @@ unstructured_ingest/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-
 unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=8a9HTcRWA6IuswSD632b_uZSO6Dax_0rUYnflqktcek,226
 unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
 unstructured_ingest/processes/connectors/databricks/__init__.py,sha256=RtKAPyNtXh6fzEsOQ08pA0-vC1uMr3KqYG6cqiBoo70,2133
-unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=OWQrne9-5hPzc-kxGa2P53M3DoksDzMDyjLhQyihdCo,8020
+unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=fZeXRozTUM3JeZlmsxhn_glqRhxr8CGG-8I8QRhRcP8,8232
 unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=WhGTp6aRTLSdc4GChCL4mz2b-IanderW8j1IqezX6YA,2958
 unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=pF2d6uAIbwJJUeOIG5xknUMCGc5d9Aztmc2776wp-a0,3740
 unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=y9AvVl6PtnIxlTlrPj_wyHBDBRJNq3uoTOuZwTryNg8,2994
@@ -109,7 +109,7 @@ unstructured_ingest/processes/connectors/fsspec/__init__.py,sha256=3HTdw4L4mdN4W
 unstructured_ingest/processes/connectors/fsspec/azure.py,sha256=31VNiG5YnXfhrFX7QJ2O1ubeWHxbe1sYVIztefbscAQ,7148
 unstructured_ingest/processes/connectors/fsspec/box.py,sha256=1gLS7xR2vbjgKBrQ4ZpI1fKTsJuIDfXuAzx_a4FzxG4,5873
 unstructured_ingest/processes/connectors/fsspec/dropbox.py,sha256=HwwKjQmjM7yFk9Esh_F20xDisRPXGUkFduzaasByRDE,8355
-unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=13TJmipcz9mYQT6Qi4WmqUV3veHIhbLZIW_70qY-5tI,14469
+unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=4K8Q2D_6_HCqTVM3HBJv3SNz9gjbQhk44nzeSheDpzA,14462
 unstructured_ingest/processes/connectors/fsspec/gcs.py,sha256=ouxISCKpZTAj3T6pWGYbASu93wytJjl5WSICvQcrgfE,7172
 unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=2ZV6b2E2pIsf_ab1Lty74FwpMnJZhpQUdamPgpwcKsQ,7141
 unstructured_ingest/processes/connectors/fsspec/sftp.py,sha256=pR_a2SgLjt8ffNkariHrPB1E0HVSTj5h3pt7KxTU3TI,6371
@@ -231,8 +231,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
 unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
 unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
 unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
-unstructured_ingest-1.0.28.dist-info/METADATA,sha256=_e-2mJSqWwsdod99-didta-wEjNVIaIua_EkdBU5ZHY,8691
-unstructured_ingest-1.0.28.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-unstructured_ingest-1.0.28.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
-unstructured_ingest-1.0.28.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
-unstructured_ingest-1.0.28.dist-info/RECORD,,
+unstructured_ingest-1.0.32.dist-info/METADATA,sha256=CaoYV49uBnrTClcA3h67r-wol4XH4KaZTo0WMWITX7Q,8747
+unstructured_ingest-1.0.32.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+unstructured_ingest-1.0.32.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
+unstructured_ingest-1.0.32.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
+unstructured_ingest-1.0.32.dist-info/RECORD,,

{unstructured_ingest-1.0.28.dist-info → unstructured_ingest-1.0.32.dist-info}/WHEEL RENAMED Viewed

File without changes

{unstructured_ingest-1.0.28.dist-info → unstructured_ingest-1.0.32.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{unstructured_ingest-1.0.28.dist-info → unstructured_ingest-1.0.32.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

unstructured-ingest 1.0.28__py3-none-any.whl → 1.0.32__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 1.0.28py3-none-any.whl → 1.0.32py3-none-any.whl