PyPI - unstructured-ingest - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

unstructured-ingest 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (27) hide show

unstructured_ingest/embed/vertexai.py CHANGED Viewed

@@ -9,6 +9,7 @@ from pydantic import Field, Secret, ValidationError
 from pydantic.functional_validators import BeforeValidator
 from unstructured_ingest.embed.interfaces import (
+    EMBEDDINGS_KEY,
     AsyncBaseEmbeddingEncoder,
     BaseEmbeddingEncoder,
     EmbeddingConfig,
@@ -75,9 +76,12 @@ class VertexAIEmbeddingEncoder(BaseEmbeddingEncoder):
         return self._embed_documents(elements=[query])[0]
     def embed_documents(self, elements: list[dict]) -> list[dict]:
-        embeddings = self._embed_documents([e.get("text", "") for e in elements])
-        elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
-        return elements_with_embeddings
+        elements = elements.copy()
+        elements_with_text = [e for e in elements if e.get("text")]
+        embeddings = self._embed_documents([e["text"] for e in elements_with_text])
+        for element, embedding in zip(elements_with_text, embeddings):
+            element[EMBEDDINGS_KEY] = embedding
+        return elements
     @requires_dependencies(
         ["vertexai"],
@@ -110,9 +114,12 @@ class AsyncVertexAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
         return embedding[0]
     async def embed_documents(self, elements: list[dict]) -> list[dict]:
-        embeddings = await self._embed_documents([e.get("text", "") for e in elements])
-        elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
-        return elements_with_embeddings
+        elements = elements.copy()
+        elements_with_text = [e for e in elements if e.get("text")]
+        embeddings = await self._embed_documents([e["text"] for e in elements_with_text])
+        for element, embedding in zip(elements_with_text, embeddings):
+            element[EMBEDDINGS_KEY] = embedding
+        return elements
     @requires_dependencies(
         ["vertexai"],

unstructured_ingest/embed/voyageai.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Optional
 from pydantic import Field, SecretStr
 from unstructured_ingest.embed.interfaces import (
+    EMBEDDINGS_KEY,
     AsyncBaseEmbeddingEncoder,
     BaseEmbeddingEncoder,
     EmbeddingConfig,
@@ -107,8 +108,12 @@ class VoyageAIEmbeddingEncoder(BaseEmbeddingEncoder):
         return embeddings
     def embed_documents(self, elements: list[dict]) -> list[dict]:
-        embeddings = self._embed_documents([e.get("text", "") for e in elements])
-        return self._add_embeddings_to_elements(elements, embeddings)
+        elements = elements.copy()
+        elements_with_text = [e for e in elements if e.get("text")]
+        embeddings = self._embed_documents([e["text"] for e in elements_with_text])
+        for element, embedding in zip(elements_with_text, embeddings):
+            element[EMBEDDINGS_KEY] = embedding
+        return elements
     def embed_query(self, query: str) -> list[float]:
         return self._embed_documents(elements=[query])[0]
@@ -135,8 +140,12 @@ class AsyncVoyageAIEmbeddingEncoder(AsyncBaseEmbeddingEncoder):
         return embeddings
     async def embed_documents(self, elements: list[dict]) -> list[dict]:
-        embeddings = await self._embed_documents([e.get("text", "") for e in elements])
-        return self._add_embeddings_to_elements(elements, embeddings)
+        elements = elements.copy()
+        elements_with_text = [e for e in elements if e.get("text")]
+        embeddings = await self._embed_documents([e["text"] for e in elements_with_text])
+        for element, embedding in zip(elements_with_text, embeddings):
+            element[EMBEDDINGS_KEY] = embedding
+        return elements
     async def embed_query(self, query: str) -> list[float]:
         embedding = await self._embed_documents(elements=[query])

unstructured_ingest/v2/processes/connectors/duckdb/base.py CHANGED Viewed

@@ -81,6 +81,8 @@ class BaseDuckDBUploadStager(UploadStager):
         **kwargs: Any,
     ) -> Path:
         elements_contents = get_data(path=elements_filepath)
+        output_filename_suffix = Path(elements_filepath).suffix
+        output_filename = f"{Path(output_filename).stem}{output_filename_suffix}"
         output_path = self.get_output_path(output_filename=output_filename, output_dir=output_dir)
         output = [

unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py CHANGED Viewed

@@ -61,7 +61,7 @@ class MotherDuckConnectionConfig(ConnectionConfig):
                 "custom_user_agent": f"unstructured-io-ingest/{unstructured_io_ingest_version}"
             },
         ) as conn:
-            conn.sql(f"USE {self.database}")
+            conn.sql(f'USE "{self.database}"')
             yield conn
     @contextmanager
@@ -102,11 +102,12 @@ class MotherDuckUploader(Uploader):
     def upload_dataframe(self, df: pd.DataFrame) -> None:
         logger.debug(f"uploading {len(df)} entries to {self.connection_config.database} ")
+        database = self.connection_config.database
+        db_schema = self.connection_config.db_schema
+        table = self.connection_config.table
         with self.connection_config.get_client() as conn:
-            conn.query(
-                f"INSERT INTO {self.connection_config.db_schema}.{self.connection_config.table} BY NAME SELECT * FROM df"  # noqa: E501
-            )
+            conn.query(f'INSERT INTO "{database}"."{db_schema}"."{table}" BY NAME SELECT * FROM df')
     def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
         df = pd.DataFrame(data=data)

unstructured_ingest/v2/processes/connectors/google_drive.py CHANGED Viewed

@@ -132,12 +132,141 @@ class GoogleDriveIndexer(Indexer):
         ]
     )
+    @staticmethod
+    def verify_drive_api_enabled(client) -> None:
+        from googleapiclient.errors import HttpError
+        """
+        Makes a lightweight API call to verify that the Drive API is enabled.
+        If the API is not enabled, an HttpError should be raised.
+        """
+        try:
+            # A very minimal call: list 1 file from the drive.
+            client.list(spaces="drive", pageSize=1, fields="files(id)").execute()
+        except HttpError as e:
+            error_content = e.content.decode() if hasattr(e, "content") else ""
+            lower_error = error_content.lower()
+            if "drive api" in lower_error and (
+                "not enabled" in lower_error or "not been used" in lower_error
+            ):
+                raise SourceConnectionError(
+                    "Google Drive API is not enabled for your project. \
+                    Please enable it in the Google Cloud Console."
+                )
+            else:
+                raise SourceConnectionError("Google drive API unreachable for an unknown reason!")
+    @staticmethod
+    def count_files_recursively(files_client, folder_id: str, extensions: list[str] = None) -> int:
+        """
+        Count non-folder files recursively under the given folder.
+        If `extensions` is provided, only count files
+        whose `fileExtension` matches one of the values.
+        """
+        count = 0
+        stack = [folder_id]
+        while stack:
+            current_folder = stack.pop()
+            # Always list all items under the current folder.
+            query = f"'{current_folder}' in parents"
+            page_token = None
+            while True:
+                response = files_client.list(
+                    spaces="drive",
+                    q=query,
+                    fields="nextPageToken, files(id, mimeType, fileExtension)",
+                    pageToken=page_token,
+                    pageSize=1000,
+                ).execute()
+                for item in response.get("files", []):
+                    if item.get("mimeType") == "application/vnd.google-apps.folder":
+                        # Always traverse sub-folders regardless of extension filter.
+                        stack.append(item["id"])
+                    else:
+                        if extensions:
+                            # Use a case-insensitive comparison for the file extension.
+                            file_ext = (item.get("fileExtension") or "").lower()
+                            valid_exts = [e.lower() for e in extensions]
+                            if file_ext in valid_exts:
+                                count += 1
+                        else:
+                            count += 1
+                page_token = response.get("nextPageToken")
+                if not page_token:
+                    break
+        return count
     def precheck(self) -> None:
+        """
+        Enhanced precheck that verifies not only connectivity
+        but also that the provided drive_id is valid and accessible.
+        """
         try:
-            self.connection_config.get_client()
+            with self.connection_config.get_client() as client:
+                # First, verify that the Drive API is enabled.
+                self.verify_drive_api_enabled(client)
+                # Try to retrieve metadata for the drive id.
+                # This will catch errors such as an invalid drive id or insufficient permissions.
+                root_info = self.get_root_info(
+                    files_client=client, object_id=self.connection_config.drive_id
+                )
+                logger.info(
+                    f"Successfully retrieved drive root info: "
+                    f"{root_info.get('name', 'Unnamed')} (ID: {root_info.get('id')})"
+                )
+            # If the target is a folder, perform file count check.
+            if self.is_dir(root_info):
+                if self.index_config.recursive:
+                    file_count = self.count_files_recursively(
+                        client,
+                        self.connection_config.drive_id,
+                        extensions=self.index_config.extensions,
+                    )
+                    if file_count == 0:
+                        logger.warning(
+                            "Empty folder: no files found recursively in the folder. \
+                             Please verify that the folder contains files and \
+                             that the service account has proper permissions."
+                        )
+                        # raise SourceConnectionError(
+                        #     "Empty folder: no files found recursively in the folder. "
+                        #     "Please verify that the folder contains files and \
+                        #     that the service account has proper permissions."
+                        # )
+                    else:
+                        logger.info(f"Found {file_count} files recursively in the folder.")
+                else:
+                    # Non-recursive: check for at least one immediate non-folder child.
+                    response = client.list(
+                        spaces="drive",
+                        fields="files(id)",
+                        pageSize=1,
+                        q=f"'{self.connection_config.drive_id}' in parents",
+                    ).execute()
+                    if not response.get("files"):
+                        logger.warning(
+                            "Empty folder: no files found at the folder's root level. "
+                            "Please verify that the folder contains files and \
+                            that the service account has proper permissions."
+                        )
+                        # raise SourceConnectionError(
+                        #     "Empty folder: no files found at the folder's root level. "
+                        #     "Please verify that the folder contains files and \
+                        #     that the service account has proper permissions."
+                        # )
+                    else:
+                        logger.info("Found files at the folder's root level.")
+            else:
+                # If the target is a file, precheck passes.
+                logger.info("Drive ID corresponds to a file. Precheck passed.")
         except Exception as e:
-            logger.error(f"failed to validate connection: {e}", exc_info=True)
-            raise SourceConnectionError(f"failed to validate connection: {e}")
+            logger.error(
+                "Failed to validate Google Drive connection during precheck", exc_info=True
+            )
+            raise SourceConnectionError(f"Precheck failed: {e}")
     @staticmethod
     def is_dir(record: dict) -> bool:
@@ -310,20 +439,22 @@ class GoogleDriveDownloader(Downloader):
         from googleapiclient.http import MediaIoBaseDownload
         logger.debug(f"fetching file: {file_data.source_identifiers.fullpath}")
-        mime_type = file_data.additional_metadata["mimeType"]
         record_id = file_data.identifier
+        mime_type = file_data.additional_metadata["mimeType"]
+        if not mime_type:
+            raise TypeError(
+                f"File not supported. Name: {file_data.source_identifiers.filename} "
+                f"ID: {record_id} "
+                f"MimeType: {mime_type}"
+            )
         with self.connection_config.get_client() as client:
-            if mime_type.startswith("application/vnd.google-apps"):
+            if (
+                mime_type.startswith("application/vnd.google-apps")
+                and mime_type in GOOGLE_DRIVE_EXPORT_TYPES
+            ):
                 export_mime = GOOGLE_DRIVE_EXPORT_TYPES.get(
-                    self.meta.get("mimeType"),  # type: ignore
+                    mime_type,  # type: ignore
                 )
-                if not export_mime:
-                    raise TypeError(
-                        f"File not supported. Name: {file_data.source_identifiers.filename} "
-                        f"ID: {record_id} "
-                        f"MimeType: {mime_type}"
-                    )
                 request = client.export_media(
                     fileId=record_id,
                     mimeType=export_mime,

unstructured_ingest/v2/processes/connectors/pinecone.py CHANGED Viewed

@@ -81,6 +81,7 @@ ALLOWED_FIELDS = (
     "link_urls",
     "link_texts",
     "text_as_html",
+    "entities",
 )

unstructured_ingest/v2/processes/connectors/sql/snowflake.py CHANGED Viewed

@@ -1,6 +1,7 @@
+import json
 from contextlib import contextmanager
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Generator, Optional
+from typing import TYPE_CHECKING, Any, Generator, Optional
 import numpy as np
 import pandas as pd
@@ -15,6 +16,7 @@ from unstructured_ingest.v2.processes.connector_registry import (
     SourceRegistryEntry,
 )
 from unstructured_ingest.v2.processes.connectors.sql.sql import (
+    _DATE_COLUMNS,
     SQLAccessConfig,
     SqlBatchFileData,
     SQLConnectionConfig,
@@ -26,6 +28,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
     SQLUploaderConfig,
     SQLUploadStager,
     SQLUploadStagerConfig,
+    parse_date_string,
 )
 if TYPE_CHECKING:
@@ -34,6 +37,17 @@ if TYPE_CHECKING:
 CONNECTOR_TYPE = "snowflake"
+_ARRAY_COLUMNS = (
+    "embeddings",
+    "languages",
+    "link_urls",
+    "link_texts",
+    "sent_from",
+    "sent_to",
+    "emphasized_text_contents",
+    "emphasized_text_tags",
+)
 class SnowflakeAccessConfig(SQLAccessConfig):
     password: Optional[str] = Field(default=None, description="DB password")
@@ -160,6 +174,42 @@ class SnowflakeUploader(SQLUploader):
     connector_type: str = CONNECTOR_TYPE
     values_delimiter: str = "?"
+    def prepare_data(
+        self, columns: list[str], data: tuple[tuple[Any, ...], ...]
+    ) -> list[tuple[Any, ...]]:
+        output = []
+        for row in data:
+            parsed = []
+            for column_name, value in zip(columns, row):
+                if column_name in _DATE_COLUMNS:
+                    if value is None or pd.isna(value):  # pandas is nan
+                        parsed.append(None)
+                    else:
+                        parsed.append(parse_date_string(value))
+                elif column_name in _ARRAY_COLUMNS:
+                    if not isinstance(value, list) and (
+                        value is None or pd.isna(value)
+                    ):  # pandas is nan
+                        parsed.append(None)
+                    else:
+                        parsed.append(json.dumps(value))
+                else:
+                    parsed.append(value)
+            output.append(tuple(parsed))
+        return output
+    def _parse_values(self, columns: list[str]) -> str:
+        return ",".join(
+            [
+                (
+                    f"PARSE_JSON({self.values_delimiter})"
+                    if col in _ARRAY_COLUMNS
+                    else self.values_delimiter
+                )
+                for col in columns
+            ]
+        )
     def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
         if self.can_delete():
             self.delete_by_record_id(file_data=file_data)
@@ -173,10 +223,10 @@ class SnowflakeUploader(SQLUploader):
         self._fit_to_schema(df=df)
         columns = list(df.columns)
-        stmt = "INSERT INTO {table_name} ({columns}) VALUES({values})".format(
+        stmt = "INSERT INTO {table_name} ({columns}) SELECT {values}".format(
             table_name=self.upload_config.table_name,
             columns=",".join(columns),
-            values=",".join([self.values_delimiter for _ in columns]),
+            values=self._parse_values(columns),
         )
         logger.info(
             f"writing a total of {len(df)} elements via"

unstructured_ingest/v2/processes/connectors/sql/sql.py CHANGED Viewed

@@ -38,48 +38,6 @@ from unstructured_ingest.v2.interfaces import (
 from unstructured_ingest.v2.logger import logger
 from unstructured_ingest.v2.utils import get_enhanced_element_id
-_COLUMNS = (
-    "id",
-    "element_id",
-    "text",
-    "embeddings",
-    "type",
-    "system",
-    "layout_width",
-    "layout_height",
-    "points",
-    "url",
-    "version",
-    "date_created",
-    "date_modified",
-    "date_processed",
-    "permissions_data",
-    "record_locator",
-    "category_depth",
-    "parent_id",
-    "attached_filename",
-    "filetype",
-    "last_modified",
-    "file_directory",
-    "filename",
-    "languages",
-    "page_number",
-    "links",
-    "page_name",
-    "link_urls",
-    "link_texts",
-    "sent_from",
-    "sent_to",
-    "subject",
-    "section",
-    "header_footer_type",
-    "emphasized_text_contents",
-    "emphasized_text_tags",
-    "text_as_html",
-    "regex_metadata",
-    "detection_class_prob",
-)
 _DATE_COLUMNS = ("date_created", "date_modified", "date_processed", "last_modified")
@@ -270,10 +228,8 @@ class SQLUploadStager(UploadStager):
         data["id"] = get_enhanced_element_id(element_dict=data, file_data=file_data)
-        # remove extraneous, not supported columns
-        element = {k: v for k, v in data.items() if k in _COLUMNS}
-        element[RECORD_ID_LABEL] = file_data.identifier
-        return element
+        data[RECORD_ID_LABEL] = file_data.identifier
+        return data
     def conform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
         for column in filter(lambda x: x in df.columns, _DATE_COLUMNS):
@@ -375,7 +331,7 @@ class SQLUploader(Uploader):
         missing_columns = schema_fields - columns
         if columns_to_drop:
-            logger.warning(
+            logger.info(
                 "Following columns will be dropped to match the table's schema: "
                 f"{', '.join(columns_to_drop)}"
             )

unstructured_ingest/v2/processes/connectors/sql/vastdb.py CHANGED Viewed

@@ -19,7 +19,6 @@ from unstructured_ingest.v2.processes.connector_registry import (
     SourceRegistryEntry,
 )
 from unstructured_ingest.v2.processes.connectors.sql.sql import (
-    _COLUMNS,
     SQLAccessConfig,
     SqlBatchFileData,
     SQLConnectionConfig,
@@ -149,13 +148,11 @@ class VastdbUploadStagerConfig(SQLUploadStagerConfig):
         default=None,
         description="Map of column names to rename, ex: {'old_name': 'new_name'}",
     )
-    additional_columns: Optional[list[str]] = Field(
-        default_factory=list, description="Additional columns to include in the upload"
-    )
+@dataclass
 class VastdbUploadStager(SQLUploadStager):
-    upload_stager_config: VastdbUploadStagerConfig
+    upload_stager_config: VastdbUploadStagerConfig = field(default_factory=VastdbUploadStagerConfig)
     def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
         data = element_dict.copy()
@@ -168,13 +165,8 @@ class VastdbUploadStager(SQLUploadStager):
         data.update(coordinates)
         data["id"] = get_enhanced_element_id(element_dict=data, file_data=file_data)
-        # remove extraneous, not supported columns
-        # but also allow for additional columns
-        approved_columns = set(_COLUMNS).union(self.upload_stager_config.additional_columns)
-        element = {k: v for k, v in data.items() if k in approved_columns}
-        element[RECORD_ID_LABEL] = file_data.identifier
-        return element
+        data[RECORD_ID_LABEL] = file_data.identifier
+        return data
     def conform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
         df = super().conform_dataframe(df=df)

{unstructured_ingest-0.5.0.dist-info → unstructured_ingest-0.5.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: unstructured-ingest
-Version: 0.5.0
+Version: 0.5.2
 Summary: A library that prepares raw documents for downstream ML tasks.
 Home-page: https://github.com/Unstructured-IO/unstructured-ingest
 Author: Unstructured Technologies
@@ -23,37 +23,37 @@ Requires-Python: >=3.9.0,<3.14
 Description-Content-Type: text/markdown
 License-File: LICENSE.md
 Requires-Dist: pandas
-Requires-Dist: dataclasses-json
 Requires-Dist: pydantic>=2.7
-Requires-Dist: click
-Requires-Dist: tqdm
+Requires-Dist: dataclasses-json
 Requires-Dist: python-dateutil
 Requires-Dist: opentelemetry-sdk
+Requires-Dist: click
+Requires-Dist: tqdm
 Provides-Extra: airtable
 Requires-Dist: pyairtable; extra == "airtable"
 Provides-Extra: astradb
 Requires-Dist: astrapy; extra == "astradb"
 Provides-Extra: azure
-Requires-Dist: adlfs; extra == "azure"
 Requires-Dist: fsspec; extra == "azure"
+Requires-Dist: adlfs; extra == "azure"
 Provides-Extra: azure-ai-search
 Requires-Dist: azure-search-documents; extra == "azure-ai-search"
 Provides-Extra: bedrock
-Requires-Dist: aioboto3; extra == "bedrock"
 Requires-Dist: boto3; extra == "bedrock"
+Requires-Dist: aioboto3; extra == "bedrock"
 Provides-Extra: biomed
 Requires-Dist: requests; extra == "biomed"
 Requires-Dist: bs4; extra == "biomed"
 Provides-Extra: box
-Requires-Dist: boxfs; extra == "box"
 Requires-Dist: fsspec; extra == "box"
+Requires-Dist: boxfs; extra == "box"
 Provides-Extra: chroma
 Requires-Dist: chromadb; extra == "chroma"
 Provides-Extra: clarifai
 Requires-Dist: clarifai; extra == "clarifai"
 Provides-Extra: confluence
-Requires-Dist: requests; extra == "confluence"
 Requires-Dist: atlassian-python-api; extra == "confluence"
+Requires-Dist: requests; extra == "confluence"
 Provides-Extra: couchbase
 Requires-Dist: couchbase; extra == "couchbase"
 Provides-Extra: csv
@@ -63,8 +63,8 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
 Provides-Extra: databricks-volumes
 Requires-Dist: databricks-sdk; extra == "databricks-volumes"
 Provides-Extra: delta-table
-Requires-Dist: deltalake; extra == "delta-table"
 Requires-Dist: boto3; extra == "delta-table"
+Requires-Dist: deltalake; extra == "delta-table"
 Provides-Extra: discord
 Requires-Dist: discord.py; extra == "discord"
 Provides-Extra: doc
@@ -83,8 +83,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
 Provides-Extra: embed-mixedbreadai
 Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
 Provides-Extra: embed-octoai
-Requires-Dist: tiktoken; extra == "embed-octoai"
 Requires-Dist: openai; extra == "embed-octoai"
+Requires-Dist: tiktoken; extra == "embed-octoai"
 Provides-Extra: embed-vertexai
 Requires-Dist: vertexai; extra == "embed-vertexai"
 Provides-Extra: embed-voyageai
@@ -92,9 +92,9 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
 Provides-Extra: epub
 Requires-Dist: unstructured[epub]; extra == "epub"
 Provides-Extra: gcs
-Requires-Dist: gcsfs; extra == "gcs"
-Requires-Dist: bs4; extra == "gcs"
 Requires-Dist: fsspec; extra == "gcs"
+Requires-Dist: bs4; extra == "gcs"
+Requires-Dist: gcsfs; extra == "gcs"
 Provides-Extra: github
 Requires-Dist: pygithub>1.58.0; extra == "github"
 Requires-Dist: requests; extra == "github"
@@ -126,10 +126,10 @@ Requires-Dist: networkx; extra == "neo4j"
 Requires-Dist: cymple; extra == "neo4j"
 Requires-Dist: neo4j; extra == "neo4j"
 Provides-Extra: notion
-Requires-Dist: httpx; extra == "notion"
+Requires-Dist: htmlBuilder; extra == "notion"
 Requires-Dist: backoff; extra == "notion"
 Requires-Dist: notion-client; extra == "notion"
-Requires-Dist: htmlBuilder; extra == "notion"
+Requires-Dist: httpx; extra == "notion"
 Provides-Extra: odt
 Requires-Dist: unstructured[odt]; extra == "odt"
 Provides-Extra: onedrive
@@ -137,8 +137,8 @@ Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
 Requires-Dist: bs4; extra == "onedrive"
 Requires-Dist: msal; extra == "onedrive"
 Provides-Extra: openai
-Requires-Dist: tiktoken; extra == "openai"
 Requires-Dist: openai; extra == "openai"
+Requires-Dist: tiktoken; extra == "openai"
 Provides-Extra: opensearch
 Requires-Dist: opensearch-py; extra == "opensearch"
 Provides-Extra: org
@@ -174,8 +174,8 @@ Requires-Dist: fsspec; extra == "s3"
 Provides-Extra: salesforce
 Requires-Dist: simple-salesforce; extra == "salesforce"
 Provides-Extra: sftp
-Requires-Dist: paramiko; extra == "sftp"
 Requires-Dist: fsspec; extra == "sftp"
+Requires-Dist: paramiko; extra == "sftp"
 Provides-Extra: sharepoint
 Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
 Requires-Dist: msal; extra == "sharepoint"
@@ -192,11 +192,11 @@ Provides-Extra: tsv
 Requires-Dist: unstructured[tsv]; extra == "tsv"
 Provides-Extra: vastdb
 Requires-Dist: vastdb; extra == "vastdb"
-Requires-Dist: ibis; extra == "vastdb"
 Requires-Dist: pyarrow; extra == "vastdb"
+Requires-Dist: ibis; extra == "vastdb"
 Provides-Extra: vectara
-Requires-Dist: httpx; extra == "vectara"
 Requires-Dist: requests; extra == "vectara"
+Requires-Dist: httpx; extra == "vectara"
 Requires-Dist: aiofiles; extra == "vectara"
 Provides-Extra: weaviate
 Requires-Dist: weaviate-client; extra == "weaviate"

unstructured-ingest 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl