PyPI - unstructured-ingest - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl - Mend

unstructured-ingest 0.5.1py3-none-any.whl → 0.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (25) hide show

unstructured_ingest/embed/octoai.py CHANGED Viewed

@@ -9,7 +9,6 @@ from unstructured_ingest.embed.interfaces import (
     EmbeddingConfig,
 )
 from unstructured_ingest.logger import logger
-from unstructured_ingest.utils.data_prep import batch_generator
 from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.errors import (
     ProviderError,
@@ -17,6 +16,7 @@ from unstructured_ingest.v2.errors import (
     RateLimitError,
     UserAuthError,
     UserError,
+    is_internal_error,
 )
 if TYPE_CHECKING:
@@ -29,6 +29,8 @@ class OctoAiEmbeddingConfig(EmbeddingConfig):
     base_url: str = Field(default="https://text.octoai.run/v1")
     def wrap_error(self, e: Exception) -> Exception:
+        if is_internal_error(e=e):
+            return e
         # https://platform.openai.com/docs/guides/error-codes/api-errors
         from openai import APIStatusError
@@ -80,28 +82,17 @@ class OctoAIEmbeddingEncoder(BaseEmbeddingEncoder):
     def wrap_error(self, e: Exception) -> Exception:
         return self.config.wrap_error(e=e)
-    def embed_query(self, query: str):
-        try:
-            client = self.config.get_client()
-            response = client.embeddings.create(input=query, model=self.config.embedder_model_name)
-        except Exception as e:
-            raise self.wrap_error(e=e)
+    def _embed_query(self, query: str):
+        client = self.get_client()
+        response = client.embeddings.create(input=query, model=self.config.embedder_model_name)
         return response.data[0].embedding
-    def embed_documents(self, elements: list[dict]) -> list[dict]:
-        texts = [e.get("text", "") for e in elements]
-        embeddings = []
-        client = self.config.get_client()
-        try:
-            for batch in batch_generator(texts, batch_size=self.config.batch_size or len(texts)):
-                response = client.embeddings.create(
-                    input=batch, model=self.config.embedder_model_name
-                )
-                embeddings.extend([data.embedding for data in response.data])
-        except Exception as e:
-            raise self.wrap_error(e=e)
-        elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
-        return elements_with_embeddings
+    def get_client(self) -> "OpenAI":
+        return self.config.get_client()
+    def embed_batch(self, client: "OpenAI", batch: list[str]) -> list[list[float]]:
+        response = client.embeddings.create(input=batch, model=self.config.embedder_model_name)
+        return [data.embedding for data in response.data]
 @dataclass
@@ -111,27 +102,11 @@ class AsyncOctoAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
     def wrap_error(self, e: Exception) -> Exception:
         return self.config.wrap_error(e=e)
-    async def embed_query(self, query: str):
-        client = self.config.get_async_client()
-        try:
-            response = await client.embeddings.create(
-                input=query, model=self.config.embedder_model_name
-            )
-        except Exception as e:
-            raise self.wrap_error(e=e)
-        return response.data[0].embedding
+    def get_client(self) -> "AsyncOpenAI":
+        return self.config.get_async_client()
-    async def embed_documents(self, elements: list[dict]) -> list[dict]:
-        texts = [e.get("text", "") for e in elements]
-        client = self.config.get_async_client()
-        embeddings = []
-        try:
-            for batch in batch_generator(texts, batch_size=self.config.batch_size or len(texts)):
-                response = await client.embeddings.create(
-                    input=batch, model=self.config.embedder_model_name
-                )
-                embeddings.extend([data.embedding for data in response.data])
-        except Exception as e:
-            raise self.wrap_error(e=e)
-        elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
-        return elements_with_embeddings
+    async def embed_batch(self, client: "AsyncOpenAI", batch: list[str]) -> list[list[float]]:
+        response = await client.embeddings.create(
+            input=batch, model=self.config.embedder_model_name
+        )
+        return [data.embedding for data in response.data]

unstructured_ingest/embed/openai.py CHANGED Viewed

@@ -9,7 +9,6 @@ from unstructured_ingest.embed.interfaces import (
     EmbeddingConfig,
 )
 from unstructured_ingest.logger import logger
-from unstructured_ingest.utils.data_prep import batch_generator
 from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.errors import (
     ProviderError,
@@ -17,6 +16,7 @@ from unstructured_ingest.v2.errors import (
     RateLimitError,
     UserAuthError,
     UserError,
+    is_internal_error,
 )
 if TYPE_CHECKING:
@@ -28,6 +28,8 @@ class OpenAIEmbeddingConfig(EmbeddingConfig):
     embedder_model_name: str = Field(default="text-embedding-ada-002", alias="model_name")
     def wrap_error(self, e: Exception) -> Exception:
+        if is_internal_error(e=e):
+            return e
         # https://platform.openai.com/docs/guides/error-codes/api-errors
         from openai import APIStatusError
@@ -71,29 +73,12 @@ class OpenAIEmbeddingEncoder(BaseEmbeddingEncoder):
     def wrap_error(self, e: Exception) -> Exception:
         return self.config.wrap_error(e=e)
-    def embed_query(self, query: str) -> list[float]:
-        client = self.config.get_client()
-        try:
-            response = client.embeddings.create(input=query, model=self.config.embedder_model_name)
-        except Exception as e:
-            raise self.wrap_error(e=e)
-        return response.data[0].embedding
-    def embed_documents(self, elements: list[dict]) -> list[dict]:
-        client = self.config.get_client()
-        texts = [e.get("text", "") for e in elements]
-        embeddings = []
-        try:
-            for batch in batch_generator(texts, batch_size=self.config.batch_size or len(texts)):
-                response = client.embeddings.create(
-                    input=batch, model=self.config.embedder_model_name
-                )
-                embeddings.extend([data.embedding for data in response.data])
-        except Exception as e:
-            raise self.wrap_error(e=e)
-        elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
-        return elements_with_embeddings
+    def get_client(self) -> "OpenAI":
+        return self.config.get_client()
+    def embed_batch(self, client: "OpenAI", batch: list[str]) -> list[list[float]]:
+        response = client.embeddings.create(input=batch, model=self.config.embedder_model_name)
+        return [data.embedding for data in response.data]
 @dataclass
@@ -103,27 +88,11 @@ class AsyncOpenAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
     def wrap_error(self, e: Exception) -> Exception:
         return self.config.wrap_error(e=e)
-    async def embed_query(self, query: str) -> list[float]:
-        client = self.config.get_async_client()
-        try:
-            response = await client.embeddings.create(
-                input=query, model=self.config.embedder_model_name
-            )
-        except Exception as e:
-            raise self.wrap_error(e=e)
-        return response.data[0].embedding
-    async def embed_documents(self, elements: list[dict]) -> list[dict]:
-        client = self.config.get_async_client()
-        texts = [e.get("text", "") for e in elements]
-        embeddings = []
-        try:
-            for batch in batch_generator(texts, batch_size=self.config.batch_size or len(texts)):
-                response = await client.embeddings.create(
-                    input=batch, model=self.config.embedder_model_name
-                )
-                embeddings.extend([data.embedding for data in response.data])
-        except Exception as e:
-            raise self.wrap_error(e=e)
-        elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
-        return elements_with_embeddings
+    def get_client(self) -> "AsyncOpenAI":
+        return self.config.get_async_client()
+    async def embed_batch(self, client: "AsyncOpenAI", batch: list[str]) -> list[list[float]]:
+        response = await client.embeddings.create(
+            input=batch, model=self.config.embedder_model_name
+        )
+        return [data.embedding for data in response.data]

unstructured_ingest/embed/togetherai.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 from pydantic import Field, SecretStr
@@ -9,15 +9,11 @@ from unstructured_ingest.embed.interfaces import (
     EmbeddingConfig,
 )
 from unstructured_ingest.logger import logger
-from unstructured_ingest.utils.data_prep import batch_generator
 from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.errors import (
     RateLimitError as CustomRateLimitError,
 )
-from unstructured_ingest.v2.errors import (
-    UserAuthError,
-    UserError,
-)
+from unstructured_ingest.v2.errors import UserAuthError, UserError, is_internal_error
 if TYPE_CHECKING:
     from together import AsyncTogether, Together
@@ -30,6 +26,8 @@ class TogetherAIEmbeddingConfig(EmbeddingConfig):
     )
     def wrap_error(self, e: Exception) -> Exception:
+        if is_internal_error(e=e):
+            return e
         # https://docs.together.ai/docs/error-codes
         from together.error import AuthenticationError, RateLimitError, TogetherException
@@ -63,27 +61,12 @@ class TogetherAIEmbeddingEncoder(BaseEmbeddingEncoder):
     def wrap_error(self, e: Exception) -> Exception:
         return self.config.wrap_error(e=e)
-    def embed_query(self, query: str) -> list[float]:
-        return self._embed_documents(elements=[query])[0]
-    def embed_documents(self, elements: list[dict]) -> list[dict]:
-        embeddings = self._embed_documents([e.get("text", "") for e in elements])
-        return self._add_embeddings_to_elements(elements, embeddings)
-    def _embed_documents(self, elements: list[str]) -> list[list[float]]:
-        client = self.config.get_client()
-        embeddings = []
-        try:
-            for batch in batch_generator(
-                elements, batch_size=self.config.batch_size or len(elements)
-            ):
-                outputs = client.embeddings.create(
-                    model=self.config.embedder_model_name, input=batch
-                )
-                embeddings.extend([outputs.data[i].embedding for i in range(len(batch))])
-        except Exception as e:
-            raise self.wrap_error(e=e)
-        return embeddings
+    def get_client(self) -> "Together":
+        return self.config.get_client()
+    def embed_batch(self, client: "Together", batch: list[str]) -> list[list[float]]:
+        outputs = client.embeddings.create(model=self.config.embedder_model_name, input=batch)
+        return [outputs.data[i].embedding for i in range(len(batch))]
 @dataclass
@@ -93,25 +76,9 @@ class AsyncTogetherAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
     def wrap_error(self, e: Exception) -> Exception:
         return self.config.wrap_error(e=e)
-    async def embed_query(self, query: str) -> list[float]:
-        embedding = await self._embed_documents(elements=[query])
-        return embedding[0]
-    async def embed_documents(self, elements: list[dict]) -> list[dict]:
-        embeddings = await self._embed_documents([e.get("text", "") for e in elements])
-        return self._add_embeddings_to_elements(elements, embeddings)
-    async def _embed_documents(self, elements: list[str]) -> list[list[float]]:
-        client = self.config.get_async_client()
-        embeddings = []
-        try:
-            for batch in batch_generator(
-                elements, batch_size=self.config.batch_size or len(elements)
-            ):
-                outputs = await client.embeddings.create(
-                    model=self.config.embedder_model_name, input=batch
-                )
-                embeddings.extend([outputs.data[i].embedding for i in range(len(batch))])
-        except Exception as e:
-            raise self.wrap_error(e=e)
-        return embeddings
+    def get_client(self) -> "AsyncTogether":
+        return self.config.get_async_client()
+    async def embed_batch(self, client: Any, batch: list[str]) -> list[list[float]]:
+        outputs = await client.embeddings.create(model=self.config.embedder_model_name, input=batch)
+        return [outputs.data[i].embedding for i in range(len(batch))]

unstructured_ingest/embed/vertexai.py CHANGED Viewed

@@ -13,9 +13,8 @@ from unstructured_ingest.embed.interfaces import (
     BaseEmbeddingEncoder,
     EmbeddingConfig,
 )
-from unstructured_ingest.utils.data_prep import batch_generator
 from unstructured_ingest.utils.dep_check import requires_dependencies
-from unstructured_ingest.v2.errors import UserAuthError
+from unstructured_ingest.v2.errors import UserAuthError, is_internal_error
 if TYPE_CHECKING:
     from vertexai.language_models import TextEmbeddingModel
@@ -39,6 +38,8 @@ class VertexAIEmbeddingConfig(EmbeddingConfig):
     )
     def wrap_error(self, e: Exception) -> Exception:
+        if is_internal_error(e=e):
+            return e
         from google.auth.exceptions import GoogleAuthError
         if isinstance(e, GoogleAuthError):
@@ -71,31 +72,19 @@ class VertexAIEmbeddingEncoder(BaseEmbeddingEncoder):
     def wrap_error(self, e: Exception) -> Exception:
         return self.config.wrap_error(e=e)
-    def embed_query(self, query):
-        return self._embed_documents(elements=[query])[0]
-    def embed_documents(self, elements: list[dict]) -> list[dict]:
-        embeddings = self._embed_documents([e.get("text", "") for e in elements])
-        elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
-        return elements_with_embeddings
+    def get_client(self) -> "TextEmbeddingModel":
+        return self.config.get_client()
     @requires_dependencies(
         ["vertexai"],
         extras="embed-vertexai",
     )
-    def _embed_documents(self, elements: list[str]) -> list[list[float]]:
+    def embed_batch(self, client: "TextEmbeddingModel", batch: list[str]) -> list[list[float]]:
         from vertexai.language_models import TextEmbeddingInput
-        inputs = [TextEmbeddingInput(text=element) for element in elements]
-        client = self.config.get_client()
-        embeddings = []
-        try:
-            for batch in batch_generator(inputs, batch_size=self.config.batch_size or len(inputs)):
-                response = client.get_embeddings(batch)
-                embeddings.extend([e.values for e in response])
-        except Exception as e:
-            raise self.wrap_error(e=e)
-        return embeddings
+        inputs = [TextEmbeddingInput(text=text) for text in batch]
+        response = client.get_embeddings(inputs)
+        return [e.values for e in response]
 @dataclass
@@ -105,29 +94,16 @@ class AsyncVertexAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
     def wrap_error(self, e: Exception) -> Exception:
         return self.config.wrap_error(e=e)
-    async def embed_query(self, query):
-        embedding = await self._embed_documents(elements=[query])
-        return embedding[0]
-    async def embed_documents(self, elements: list[dict]) -> list[dict]:
-        embeddings = await self._embed_documents([e.get("text", "") for e in elements])
-        elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
-        return elements_with_embeddings
+    def get_client(self) -> "TextEmbeddingModel":
+        return self.config.get_client()
     @requires_dependencies(
         ["vertexai"],
         extras="embed-vertexai",
     )
-    async def _embed_documents(self, elements: list[str]) -> list[list[float]]:
+    async def embed_batch(self, client: Any, batch: list[str]) -> list[list[float]]:
         from vertexai.language_models import TextEmbeddingInput
-        inputs = [TextEmbeddingInput(text=element) for element in elements]
-        client = self.config.get_client()
-        embeddings = []
-        try:
-            for batch in batch_generator(inputs, batch_size=self.config.batch_size or len(inputs)):
-                response = await client.get_embeddings_async(batch)
-                embeddings.extend([e.values for e in response])
-        except Exception as e:
-            raise self.wrap_error(e=e)
-        return embeddings
+        inputs = [TextEmbeddingInput(text=text) for text in batch]
+        response = await client.get_embeddings_async(inputs)
+        return [e.values for e in response]

unstructured_ingest/embed/voyageai.py CHANGED Viewed

@@ -9,13 +9,8 @@ from unstructured_ingest.embed.interfaces import (
     EmbeddingConfig,
 )
 from unstructured_ingest.logger import logger
-from unstructured_ingest.utils.data_prep import batch_generator
 from unstructured_ingest.utils.dep_check import requires_dependencies
-from unstructured_ingest.v2.errors import (
-    ProviderError,
-    UserAuthError,
-    UserError,
-)
+from unstructured_ingest.v2.errors import ProviderError, UserAuthError, UserError, is_internal_error
 from unstructured_ingest.v2.errors import (
     RateLimitError as CustomRateLimitError,
 )
@@ -38,6 +33,8 @@ class VoyageAIEmbeddingConfig(EmbeddingConfig):
     timeout_in_seconds: Optional[int] = None
     def wrap_error(self, e: Exception) -> Exception:
+        if is_internal_error(e=e):
+            return e
         # https://docs.voyageai.com/docs/error-codes
         from voyageai.error import AuthenticationError, RateLimitError, VoyageError
@@ -95,23 +92,12 @@ class VoyageAIEmbeddingEncoder(BaseEmbeddingEncoder):
     def wrap_error(self, e: Exception) -> Exception:
         return self.config.wrap_error(e=e)
-    def _embed_documents(self, elements: list[str]) -> list[list[float]]:
-        client = self.config.get_client()
-        embeddings = []
-        try:
-            for batch in batch_generator(elements, batch_size=self.config.batch_size):
-                response = client.embed(texts=batch, model=self.config.embedder_model_name)
-                embeddings.extend(response.embeddings)
-        except Exception as e:
-            raise self.wrap_error(e=e)
-        return embeddings
-    def embed_documents(self, elements: list[dict]) -> list[dict]:
-        embeddings = self._embed_documents([e.get("text", "") for e in elements])
-        return self._add_embeddings_to_elements(elements, embeddings)
+    def get_client(self) -> "VoyageAIClient":
+        return self.config.get_client()
-    def embed_query(self, query: str) -> list[float]:
-        return self._embed_documents(elements=[query])[0]
+    def embed_batch(self, client: "VoyageAIClient", batch: list[str]) -> list[list[float]]:
+        response = client.embed(texts=batch, model=self.config.embedder_model_name)
+        return response.embeddings
 @dataclass
@@ -121,23 +107,11 @@ class AsyncVoyageAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
     def wrap_error(self, e: Exception) -> Exception:
         return self.config.wrap_error(e=e)
-    async def _embed_documents(self, elements: list[str]) -> list[list[float]]:
-        client = self.config.get_async_client()
-        embeddings = []
-        try:
-            for batch in batch_generator(
-                elements, batch_size=self.config.batch_size or len(elements)
-            ):
-                response = await client.embed(texts=batch, model=self.config.embedder_model_name)
-                embeddings.extend(response.embeddings)
-        except Exception as e:
-            raise self.wrap_error(e=e)
-        return embeddings
-    async def embed_documents(self, elements: list[dict]) -> list[dict]:
-        embeddings = await self._embed_documents([e.get("text", "") for e in elements])
-        return self._add_embeddings_to_elements(elements, embeddings)
-    async def embed_query(self, query: str) -> list[float]:
-        embedding = await self._embed_documents(elements=[query])
-        return embedding[0]
+    def get_client(self) -> "AsyncVoyageAIClient":
+        return self.config.get_async_client()
+    async def embed_batch(
+        self, client: "AsyncVoyageAIClient", batch: list[str]
+    ) -> list[list[float]]:
+        response = await client.embed(texts=batch, model=self.config.embedder_model_name)
+        return response.embeddings

unstructured_ingest/v2/errors.py CHANGED Viewed

@@ -16,3 +16,10 @@ class QuotaError(UserError):
 class ProviderError(Exception):
     pass
+recognized_errors = [UserError, UserAuthError, RateLimitError, QuotaError, ProviderError]
+def is_internal_error(e: Exception) -> bool:
+    return any(isinstance(e, recognized_error) for recognized_error in recognized_errors)

unstructured_ingest/v2/processes/connectors/google_drive.py CHANGED Viewed

@@ -132,12 +132,141 @@ class GoogleDriveIndexer(Indexer):
         ]
     )
+    @staticmethod
+    def verify_drive_api_enabled(client) -> None:
+        from googleapiclient.errors import HttpError
+        """
+        Makes a lightweight API call to verify that the Drive API is enabled.
+        If the API is not enabled, an HttpError should be raised.
+        """
+        try:
+            # A very minimal call: list 1 file from the drive.
+            client.list(spaces="drive", pageSize=1, fields="files(id)").execute()
+        except HttpError as e:
+            error_content = e.content.decode() if hasattr(e, "content") else ""
+            lower_error = error_content.lower()
+            if "drive api" in lower_error and (
+                "not enabled" in lower_error or "not been used" in lower_error
+            ):
+                raise SourceConnectionError(
+                    "Google Drive API is not enabled for your project. \
+                    Please enable it in the Google Cloud Console."
+                )
+            else:
+                raise SourceConnectionError("Google drive API unreachable for an unknown reason!")
+    @staticmethod
+    def count_files_recursively(files_client, folder_id: str, extensions: list[str] = None) -> int:
+        """
+        Count non-folder files recursively under the given folder.
+        If `extensions` is provided, only count files
+        whose `fileExtension` matches one of the values.
+        """
+        count = 0
+        stack = [folder_id]
+        while stack:
+            current_folder = stack.pop()
+            # Always list all items under the current folder.
+            query = f"'{current_folder}' in parents"
+            page_token = None
+            while True:
+                response = files_client.list(
+                    spaces="drive",
+                    q=query,
+                    fields="nextPageToken, files(id, mimeType, fileExtension)",
+                    pageToken=page_token,
+                    pageSize=1000,
+                ).execute()
+                for item in response.get("files", []):
+                    if item.get("mimeType") == "application/vnd.google-apps.folder":
+                        # Always traverse sub-folders regardless of extension filter.
+                        stack.append(item["id"])
+                    else:
+                        if extensions:
+                            # Use a case-insensitive comparison for the file extension.
+                            file_ext = (item.get("fileExtension") or "").lower()
+                            valid_exts = [e.lower() for e in extensions]
+                            if file_ext in valid_exts:
+                                count += 1
+                        else:
+                            count += 1
+                page_token = response.get("nextPageToken")
+                if not page_token:
+                    break
+        return count
     def precheck(self) -> None:
+        """
+        Enhanced precheck that verifies not only connectivity
+        but also that the provided drive_id is valid and accessible.
+        """
         try:
-            self.connection_config.get_client()
+            with self.connection_config.get_client() as client:
+                # First, verify that the Drive API is enabled.
+                self.verify_drive_api_enabled(client)
+                # Try to retrieve metadata for the drive id.
+                # This will catch errors such as an invalid drive id or insufficient permissions.
+                root_info = self.get_root_info(
+                    files_client=client, object_id=self.connection_config.drive_id
+                )
+                logger.info(
+                    f"Successfully retrieved drive root info: "
+                    f"{root_info.get('name', 'Unnamed')} (ID: {root_info.get('id')})"
+                )
+            # If the target is a folder, perform file count check.
+            if self.is_dir(root_info):
+                if self.index_config.recursive:
+                    file_count = self.count_files_recursively(
+                        client,
+                        self.connection_config.drive_id,
+                        extensions=self.index_config.extensions,
+                    )
+                    if file_count == 0:
+                        logger.warning(
+                            "Empty folder: no files found recursively in the folder. \
+                             Please verify that the folder contains files and \
+                             that the service account has proper permissions."
+                        )
+                        # raise SourceConnectionError(
+                        #     "Empty folder: no files found recursively in the folder. "
+                        #     "Please verify that the folder contains files and \
+                        #     that the service account has proper permissions."
+                        # )
+                    else:
+                        logger.info(f"Found {file_count} files recursively in the folder.")
+                else:
+                    # Non-recursive: check for at least one immediate non-folder child.
+                    response = client.list(
+                        spaces="drive",
+                        fields="files(id)",
+                        pageSize=1,
+                        q=f"'{self.connection_config.drive_id}' in parents",
+                    ).execute()
+                    if not response.get("files"):
+                        logger.warning(
+                            "Empty folder: no files found at the folder's root level. "
+                            "Please verify that the folder contains files and \
+                            that the service account has proper permissions."
+                        )
+                        # raise SourceConnectionError(
+                        #     "Empty folder: no files found at the folder's root level. "
+                        #     "Please verify that the folder contains files and \
+                        #     that the service account has proper permissions."
+                        # )
+                    else:
+                        logger.info("Found files at the folder's root level.")
+            else:
+                # If the target is a file, precheck passes.
+                logger.info("Drive ID corresponds to a file. Precheck passed.")
         except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
+            logger.error(
+                "Failed to validate Google Drive connection during precheck", exc_info=True
+            )
+            raise SourceConnectionError(f"Precheck failed: {e}")
     @staticmethod
     def is_dir(record: dict) -> bool:

unstructured-ingest 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.5.1py3-none-any.whl → 0.5.3py3-none-any.whl