PyPI - unstructured-ingest - Versions diffs - 0.5.14__py3-none-any.whl → 0.5.16__py3-none-any.whl - Mend

unstructured-ingest 0.5.14py3-none-any.whl → 0.5.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (39) hide show

unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py CHANGED Viewed

@@ -1,22 +1,17 @@
 from __future__ import annotations
-import datetime
 import hashlib
 from dataclasses import dataclass
 from pathlib import Path
 from time import time
-from typing import Any, AsyncGenerator, List, Literal
+from typing import Any, AsyncGenerator, Literal, Union
 from pydantic import BaseModel, Field, Secret
-from unstructured_ingest.utils.data_prep import batch_generator
 from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.utils.html import HtmlMixin
-from unstructured_ingest.v2.errors import UserAuthError
 from unstructured_ingest.v2.interfaces import (
     AccessConfig,
-    BatchFileData,
-    BatchItem,
     ConnectionConfig,
     Downloader,
     DownloaderConfig,
@@ -36,20 +31,11 @@ CONNECTOR_TYPE = "zendesk"
 class ZendeskAdditionalMetadata(BaseModel):
-    item_type: str
-    leading_id: str  # is the same as id just being verbose.
-    tail_id: str  # last id in the batch.
+    item_type: Literal["ticket", "article"]
+    content: Union[ZendeskTicket, ZendeskArticle]
-class ZendeskFileDataSourceMetadata(FileDataSourceMetadata):
-    """
-    inherits metadata object as tickets and articles
-    are treated in single batch, we need to denote indices ticket/article
-    as the source metadata.
-    """
-class ZendeskBatchFileData(BatchFileData):
+class ZendeskFileData(FileData):
     additional_metadata: ZendeskAdditionalMetadata
@@ -59,48 +45,21 @@ class ZendeskAccessConfig(AccessConfig):
     )
-class ZendeskBatchItemTicket(BatchItem):
-    subject: str
-    description: str
-    item_type: str = "tickets"  # placeholder for downloader
-class ZendeskBatchItemArticle(BatchItem):
-    title: str
-    author_id: str
-    title: str
-    content: str
 class ZendeskConnectionConfig(ConnectionConfig):
     subdomain: str = Field(description="Subdomain for zendesk site, <sub-domain>.company.com")
     email: str = Field(description="Email for zendesk site registered at the subdomain")
     access_config: Secret[ZendeskAccessConfig]
-    async def get_client_async(self) -> ZendeskClient:
-        """Provides an async manager for ZendeskClient."""
-        access_config = self.access_config.get_secret_value()
-        client = ZendeskClient(
-            email=self.email, subdomain=self.subdomain, token=access_config.api_token
-        )
-        return client
     def get_client(self) -> ZendeskClient:
         access_config = self.access_config.get_secret_value()
-        client = ZendeskClient(
+        return ZendeskClient(
             email=self.email, subdomain=self.subdomain, token=access_config.api_token
         )
-        return client
 class ZendeskIndexerConfig(IndexerConfig):
-    batch_size: int = Field(
-        default=2,
-        description="Number of tickets or articles.",
-    )
     item_type: Literal["tickets", "articles", "all"] = Field(
         default="tickets",
         description="Type of item from zendesk to parse, can only be `tickets` or `articles`.",
@@ -115,142 +74,76 @@ class ZendeskIndexer(Indexer):
     def precheck(self) -> None:
         """Validates connection to Zendesk API."""
-        try:
-            client = self.connection_config.get_client()
-            if not client.get_users():
-                subdomain_endpoint = f"{self.connection_config.subdomain}.zendesk.com"
-                raise UserAuthError(f"Users do not exist in subdomain {subdomain_endpoint}")
-        except UserAuthError as e:
-            logger.error(f"Source connection error: {e}", exc_info=True)
-            raise
-        except Exception as e:
-            logger.error(f"Failed to validate connection to Zendesk: {e}", exc_info=True)
-            raise UserAuthError(f"Failed to validate connection: {e}")
+        self.connection_config.get_client()
     def is_async(self) -> bool:
         return True
-    async def _list_articles_async(self) -> List[ZendeskArticle]:
-        client = await self.connection_config.get_client_async()
-        return await client.get_articles_async()
-    async def _list_tickets_async(self) -> List[ZendeskTicket]:
-        client = await self.connection_config.get_client_async()
-        return await client.get_tickets_async()
     def _generate_fullpath(self, identifier: str) -> Path:
         return Path(hashlib.sha256(identifier.encode("utf-8")).hexdigest()[:16] + ".txt")
-    async def handle_articles_async(
-        self, articles: List[ZendeskArticle], batch_size: int
-    ) -> AsyncGenerator[ZendeskBatchFileData, None]:
-        """Parses articles from a list and yields FileData objects asynchronously in batches."""
-        for article_batch in batch_generator(articles, batch_size=batch_size):
-            article_batch = sorted(article_batch)
-            additional_metadata = ZendeskAdditionalMetadata(
-                item_type="articles",
-                leading_id=str(article_batch[0].id),
-                tail_id=str(article_batch[-1].id),
-            )
-            metadata = ZendeskFileDataSourceMetadata(
-                date_processed=str(time()),
-                record_locator={
-                    "id": str(article_batch[0].id),
-                    "item_type": "articles",
-                },
-            )
-            batch_items: List[ZendeskBatchItemArticle] = [
-                ZendeskBatchItemArticle(
-                    identifier=str(article.id),
-                    author_id=str(article.author_id),
-                    title=str(article.title),
-                    content=str(article.content),
-                )
-                for article in article_batch
-            ]
-            full_path = self._generate_fullpath(str(article_batch[0].id))
-            full_path = Path(str(full_path).replace(".txt", ".html"))
-            source_identifiers = SourceIdentifiers(filename=full_path.name, fullpath=str(full_path))
-            batched_file_data = ZendeskBatchFileData(
-                identifier=str(article_batch[0].id),
-                connector_type=self.connector_type,
-                metadata=metadata,
-                batch_items=batch_items,
-                additional_metadata=additional_metadata,
-                source_identifiers=source_identifiers,
-            )
-            yield batched_file_data
-    async def handle_tickets_async(
-        self, tickets: List[ZendeskTicket], batch_size: int
-    ) -> AsyncGenerator[ZendeskBatchFileData, None]:
-        """Parses tickets from a list and yields FileData objects asynchronously in batches."""
-        for ticket_batch in batch_generator(tickets, batch_size=batch_size):
-            sorted_batch = sorted(ticket_batch)
-            additional_metadata = ZendeskAdditionalMetadata(
-                item_type="tickets",
-                leading_id=str(sorted_batch[0].id),
-                tail_id=str(sorted_batch[-1].id),
-            )
-            metadata = ZendeskFileDataSourceMetadata(
-                date_processed=str(time()),
-                record_locator={
-                    "id": str(sorted_batch[0].id),
-                    "item_type": "tickets",
-                },
-            )
-            batch_items: List[ZendeskBatchItemTicket] = [
-                ZendeskBatchItemTicket(
+    async def get_tickets(self) -> AsyncGenerator[ZendeskFileData, None]:
+        async with self.connection_config.get_client() as client:
+            async for ticket in client.get_tickets():
+                yield ZendeskFileData(
                     identifier=str(ticket.id),
-                    subject=str(ticket.subject),
-                    description=str(ticket.description),
+                    connector_type=self.connector_type,
+                    source_identifiers=SourceIdentifiers(
+                        filename=f"{ticket.id}.txt", fullpath=f"tickets/{ticket.id}.txt"
+                    ),
+                    additional_metadata=ZendeskAdditionalMetadata(
+                        item_type="ticket", content=ticket
+                    ),
+                    metadata=FileDataSourceMetadata(
+                        url=str(ticket.url) if ticket.url else None,
+                        date_created=ticket.created_at.isoformat() if ticket.created_at else None,
+                        date_modified=ticket.updated_at.isoformat() if ticket.updated_at else None,
+                        date_processed=str(time()),
+                    ),
                 )
-                for ticket in sorted_batch
-            ]
-            full_path = self._generate_fullpath(str(sorted_batch[0].id))
-            source_identifiers = SourceIdentifiers(filename=full_path.name, fullpath=str(full_path))
-            batched_file_data = ZendeskBatchFileData(
-                connector_type=self.connector_type,
-                metadata=metadata,
-                batch_items=batch_items,
-                additional_metadata=additional_metadata,
-                source_identifiers=source_identifiers,
-            )
-            yield batched_file_data
+    async def get_articles(self) -> AsyncGenerator[ZendeskFileData, None]:
+        async with self.connection_config.get_client() as client:
+            async for article in client.get_articles():
+                yield ZendeskFileData(
+                    identifier=str(article.id),
+                    connector_type=self.connector_type,
+                    source_identifiers=SourceIdentifiers(
+                        filename=f"{article.id}.html", fullpath=f"articles/{article.id}.html"
+                    ),
+                    additional_metadata=ZendeskAdditionalMetadata(
+                        item_type="article", content=article
+                    ),
+                    metadata=FileDataSourceMetadata(
+                        url=str(article.url) if article.url else None,
+                        date_created=article.created_at.isoformat() if article.created_at else None,
+                        date_modified=(
+                            article.updated_at.isoformat() if article.updated_at else None
+                        ),
+                        date_processed=str(time()),
+                    ),
+                )
-    async def run_async(self, **kwargs: Any) -> AsyncGenerator[FileData, None]:
+    async def run_async(self, **kwargs: Any) -> AsyncGenerator[ZendeskFileData, None]:
         """Determines item type and processes accordingly asynchronously."""
         item_type = self.index_config.item_type
-        batch_size = self.index_config.batch_size
         if item_type == "articles":
-            articles = await self._list_articles_async()
-            async for file_data in self.handle_articles_async(
-                articles, batch_size
-            ):  # Using async version
-                yield file_data
+            async for article_file_data in self.get_articles():
+                yield article_file_data
         elif item_type == "tickets":
-            tickets = await self._list_tickets_async()
-            async for file_data in self.handle_tickets_async(
-                tickets, batch_size
-            ):  # Using async version
-                yield file_data
+            async for ticket_file_data in self.get_tickets():
+                yield ticket_file_data
+        elif item_type == "all":
+            async for article_file_data in self.get_articles():
+                yield article_file_data
+            async for ticket_file_data in self.get_tickets():
+                yield ticket_file_data
+        else:
+            raise ValueError(f"Item type {item_type} is not supported by the indexer")
 class ZendeskDownloaderConfig(DownloaderConfig, HtmlMixin):
@@ -289,130 +182,46 @@ class ZendeskDownloader(Downloader):
             session=session,
         )
-    @requires_dependencies(["bs4", "aiofiles"], extras="zendesk")
-    async def handle_articles_async(
-        self, client: ZendeskClient, batch_file_data: ZendeskBatchFileData
-    ):
-        """
-        Processes the article information, downloads the attachments for each article,
-        and updates the content accordingly.
-        """
+    @requires_dependencies(["aiofiles", "bs4"], extras="zendesk")
+    async def download_article(self, article: ZendeskArticle, download_path: Path) -> None:
         import aiofiles
         import bs4
-        # Determine the download path
-        download_path = self.get_download_path(batch_file_data)
-        if download_path is None:
-            raise ValueError("Download path could not be determined")
-        download_path.parent.mkdir(parents=True, exist_ok=True)
-        async with aiofiles.open(download_path, "a", encoding="utf8") as f:
-            for article in batch_file_data.batch_items:
-                html_data_str = article.content
-                soup = bs4.BeautifulSoup(html_data_str, "html.parser")
-                if self.download_config.extract_images:
-                    # Get article attachments asynchronously
-                    image_data_decoded: List = await client.get_article_attachments_async(
-                        article_id=article.identifier
-                    )
-                    img_tags = soup.find_all("img")
-                    # Ensure we don't exceed the available images
-                    for img_tag, img_data in zip(img_tags, image_data_decoded):
-                        img_tag["src"] = img_data.get("encoded_content", "")
+        article_html = article.as_html()
+        soup = bs4.BeautifulSoup(article_html, "html.parser")
+        async with aiofiles.open(download_path, "w", encoding="utf8") as f:
             await f.write(soup.prettify())
-        return super().generate_download_response(
-            file_data=batch_file_data, download_path=download_path
-        )
     @requires_dependencies(["aiofiles"], extras="zendesk")
-    async def handle_tickets_async(
-        self, client: ZendeskClient, batch_file_data: ZendeskBatchFileData
-    ) -> DownloadResponse:
-        """
-        Processes a batch of tickets asynchronously, writing their details and comments to a file.
-        """
+    async def download_ticket(self, ticket: ZendeskTicket, download_path: Path) -> None:
         import aiofiles
-        # Determine the download path
-        download_path = self.get_download_path(batch_file_data)
-        if download_path is None:
-            raise ValueError("Download path could not be determined")
-        download_path.parent.mkdir(parents=True, exist_ok=True)
-        # Process each ticket in the batch
-        async with aiofiles.open(download_path, "a", encoding="utf8") as f:
-            for batch_item in batch_file_data.batch_items:
-                ticket_identifier = batch_item.identifier
-                first_date = None
-                comments: List[dict] = []
-                # Fetch comments asynchronously
-                comments_list = await client.get_comments_async(ticket_id=int(ticket_identifier))
-                for comment in comments_list:  # Iterate over the resolved list
-                    date_created = (
-                        comment.metadata["created_at"].isoformat()
-                        if isinstance(comment.metadata["created_at"], datetime.datetime)
-                        else str(comment.metadata["created_at"])
-                    )
-                    if first_date is None:
-                        first_date = date_created
-                    comments.append(
-                        {
-                            "comment_id": comment.id,
-                            "author_id": comment.author_id,
-                            "body": comment.body,
-                            "date_created": date_created,
-                        }
-                    )
-                # Write ticket details to file
-                content = (
-                    "\nticket\n"
-                    f"{batch_item.identifier}\n"
-                    f"{batch_file_data.metadata.record_locator.get('subject', '')}\n"
-                    f"{batch_file_data.metadata.record_locator.get('description', '')}\n"
-                    f"{first_date}\n"
-                )
-                # Append comments
+        async with aiofiles.open(download_path, "w", encoding="utf8") as f:
+            await f.write(ticket.as_text())
+            async with self.connection_config.get_client() as client:
+                comments = [comment async for comment in client.get_comments(ticket_id=ticket.id)]
                 for comment in comments:
-                    content += (
-                        "comment\n"
-                        f"{comment.get('comment_id', '')}\n"
-                        f"{comment.get('author_id', '')}\n"
-                        f"{comment.get('body', '')}\n"
-                        f"{comment.get('date_created', '')}\n"
-                    )
-                await f.write(content)
+                    await f.write(comment.as_text())
-        return super().generate_download_response(
-            file_data=batch_file_data, download_path=download_path
-        )
+    async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
-    async def run_async(self, file_data: ZendeskBatchFileData, **kwargs: Any) -> DownloadResponse:
+        zendesk_filedata = ZendeskFileData.cast(file_data=file_data)
-        zendesk_filedata: FileData = FileData.cast(file_data=file_data)
-        client = await self.connection_config.get_client_async()
-        item_type = zendesk_filedata.metadata.record_locator["item_type"]
+        item_type = zendesk_filedata.additional_metadata.item_type
+        download_path = self.get_download_path(file_data=zendesk_filedata)
+        download_path.parent.mkdir(parents=True, exist_ok=True)
-        if item_type == "articles":
-            return await self.handle_articles_async(client, file_data)
-        elif item_type == "tickets":
-            return await self.handle_tickets_async(client, file_data)
+        if item_type == "article":
+            article = ZendeskArticle.model_validate(zendesk_filedata.additional_metadata.content)
+            await self.download_article(article=article, download_path=download_path)
+        elif item_type == "ticket":
+            ticket = ZendeskTicket.model_validate(zendesk_filedata.additional_metadata.content)
+            await self.download_ticket(ticket=ticket, download_path=download_path)
         else:
             raise RuntimeError(f"Item type {item_type} cannot be handled by the downloader")
+        return super().generate_download_response(
+            file_data=zendesk_filedata, download_path=download_path
+        )
 # create entry

unstructured_ingest/v2/processes/embedder.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import json
 from abc import ABC
 from dataclasses import dataclass
 from pathlib import Path
@@ -6,6 +5,7 @@ from typing import TYPE_CHECKING, Any, Literal, Optional
 from pydantic import BaseModel, Field, SecretStr
+from unstructured_ingest.utils.data_prep import get_data
 from unstructured_ingest.v2.interfaces.process import BaseProcess
 if TYPE_CHECKING:
@@ -192,9 +192,8 @@ class Embedder(BaseProcess, ABC):
     def run(self, elements_filepath: Path, **kwargs: Any) -> list[dict]:
         # TODO update base embedder classes to support async
         embedder = self.config.get_embedder()
-        with elements_filepath.open("r") as elements_file:
-            elements = json.load(elements_file)
+        elements = get_data(path=elements_filepath)
         if not elements:
-            return [e.to_dict() for e in elements]
+            return []
         embedded_elements = embedder.embed_documents(elements=elements)
         return embedded_elements

unstructured_ingest/v2/processes/utils/__init__.py ADDED Viewed

File without changes

unstructured_ingest/v2/processes/utils/blob_storage.py ADDED Viewed

@@ -0,0 +1,31 @@
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+from unstructured_ingest.utils.data_prep import get_data, write_data
+from unstructured_ingest.v2.interfaces import FileData, UploadStager, UploadStagerConfig
+class BlobStoreUploadStagerConfig(UploadStagerConfig):
+    pass
+@dataclass
+class BlobStoreUploadStager(UploadStager):
+    upload_stager_config: BlobStoreUploadStagerConfig = field(
+        default_factory=BlobStoreUploadStagerConfig
+    )
+    def run(
+        self,
+        elements_filepath: Path,
+        file_data: FileData,
+        output_dir: Path,
+        output_filename: str,
+        **kwargs: Any,
+    ) -> Path:
+        output_file = self.get_output_path(output_filename=output_filename, output_dir=output_dir)
+        # Always save as json
+        data = get_data(elements_filepath)
+        write_data(path=output_file.with_suffix(".json"), data=data)
+        return output_file.with_suffix(".json")

{unstructured_ingest-0.5.14.dist-info → unstructured_ingest-0.5.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: unstructured-ingest
-Version: 0.5.14
+Version: 0.5.16
 Summary: A library that prepares raw documents for downstream ML tasks.
 Home-page: https://github.com/Unstructured-IO/unstructured-ingest
 Author: Unstructured Technologies
@@ -24,11 +24,11 @@ Description-Content-Type: text/markdown
 License-File: LICENSE.md
 Requires-Dist: pandas
 Requires-Dist: opentelemetry-sdk
-Requires-Dist: python-dateutil
-Requires-Dist: pydantic>=2.7
-Requires-Dist: click
 Requires-Dist: tqdm
 Requires-Dist: dataclasses_json
+Requires-Dist: pydantic>=2.7
+Requires-Dist: python-dateutil
+Requires-Dist: click
 Provides-Extra: remote
 Requires-Dist: unstructured-client>=0.30.0; extra == "remote"
 Provides-Extra: csv
@@ -86,21 +86,21 @@ Requires-Dist: atlassian-python-api; extra == "confluence"
 Provides-Extra: couchbase
 Requires-Dist: couchbase; extra == "couchbase"
 Provides-Extra: delta-table
-Requires-Dist: boto3; extra == "delta-table"
 Requires-Dist: deltalake; extra == "delta-table"
+Requires-Dist: boto3; extra == "delta-table"
 Provides-Extra: discord
 Requires-Dist: discord.py; extra == "discord"
 Provides-Extra: dropbox
-Requires-Dist: dropboxdrivefs; extra == "dropbox"
 Requires-Dist: fsspec; extra == "dropbox"
+Requires-Dist: dropboxdrivefs; extra == "dropbox"
 Provides-Extra: duckdb
 Requires-Dist: duckdb; extra == "duckdb"
 Provides-Extra: elasticsearch
 Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
 Provides-Extra: gcs
-Requires-Dist: gcsfs; extra == "gcs"
-Requires-Dist: bs4; extra == "gcs"
 Requires-Dist: fsspec; extra == "gcs"
+Requires-Dist: bs4; extra == "gcs"
+Requires-Dist: gcsfs; extra == "gcs"
 Provides-Extra: github
 Requires-Dist: requests; extra == "github"
 Requires-Dist: pygithub>1.58.0; extra == "github"
@@ -109,8 +109,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
 Provides-Extra: google-drive
 Requires-Dist: google-api-python-client; extra == "google-drive"
 Provides-Extra: hubspot
-Requires-Dist: hubspot-api-client; extra == "hubspot"
 Requires-Dist: urllib3; extra == "hubspot"
+Requires-Dist: hubspot-api-client; extra == "hubspot"
 Provides-Extra: jira
 Requires-Dist: atlassian-python-api; extra == "jira"
 Provides-Extra: kafka
@@ -125,17 +125,17 @@ Provides-Extra: mongodb
 Requires-Dist: pymongo; extra == "mongodb"
 Provides-Extra: neo4j
 Requires-Dist: neo4j-rust-ext; extra == "neo4j"
-Requires-Dist: networkx; extra == "neo4j"
 Requires-Dist: cymple; extra == "neo4j"
+Requires-Dist: networkx; extra == "neo4j"
 Provides-Extra: notion
 Requires-Dist: httpx; extra == "notion"
-Requires-Dist: backoff; extra == "notion"
-Requires-Dist: notion-client; extra == "notion"
 Requires-Dist: htmlBuilder; extra == "notion"
+Requires-Dist: notion-client; extra == "notion"
+Requires-Dist: backoff; extra == "notion"
 Provides-Extra: onedrive
 Requires-Dist: msal; extra == "onedrive"
-Requires-Dist: bs4; extra == "onedrive"
 Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
+Requires-Dist: bs4; extra == "onedrive"
 Provides-Extra: opensearch
 Requires-Dist: opensearch-py; extra == "opensearch"
 Provides-Extra: outlook
@@ -160,8 +160,8 @@ Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
 Provides-Extra: salesforce
 Requires-Dist: simple-salesforce; extra == "salesforce"
 Provides-Extra: sftp
-Requires-Dist: paramiko; extra == "sftp"
 Requires-Dist: fsspec; extra == "sftp"
+Requires-Dist: paramiko; extra == "sftp"
 Provides-Extra: slack
 Requires-Dist: slack_sdk[optional]; extra == "slack"
 Provides-Extra: snowflake
@@ -178,22 +178,22 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
 Provides-Extra: singlestore
 Requires-Dist: singlestoredb; extra == "singlestore"
 Provides-Extra: vectara
-Requires-Dist: httpx; extra == "vectara"
-Requires-Dist: requests; extra == "vectara"
 Requires-Dist: aiofiles; extra == "vectara"
+Requires-Dist: requests; extra == "vectara"
+Requires-Dist: httpx; extra == "vectara"
 Provides-Extra: vastdb
 Requires-Dist: ibis; extra == "vastdb"
-Requires-Dist: pyarrow; extra == "vastdb"
 Requires-Dist: vastdb; extra == "vastdb"
+Requires-Dist: pyarrow; extra == "vastdb"
 Provides-Extra: zendesk
 Requires-Dist: httpx; extra == "zendesk"
-Requires-Dist: aiofiles; extra == "zendesk"
 Requires-Dist: bs4; extra == "zendesk"
+Requires-Dist: aiofiles; extra == "zendesk"
 Provides-Extra: embed-huggingface
 Requires-Dist: sentence-transformers; extra == "embed-huggingface"
 Provides-Extra: embed-octoai
-Requires-Dist: openai; extra == "embed-octoai"
 Requires-Dist: tiktoken; extra == "embed-octoai"
+Requires-Dist: openai; extra == "embed-octoai"
 Provides-Extra: embed-vertexai
 Requires-Dist: vertexai; extra == "embed-vertexai"
 Provides-Extra: embed-voyageai
@@ -201,8 +201,8 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
 Provides-Extra: embed-mixedbreadai
 Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
 Provides-Extra: openai
-Requires-Dist: openai; extra == "openai"
 Requires-Dist: tiktoken; extra == "openai"
+Requires-Dist: openai; extra == "openai"
 Provides-Extra: bedrock
 Requires-Dist: aioboto3; extra == "bedrock"
 Requires-Dist: boto3; extra == "bedrock"

unstructured-ingest 0.5.14__py3-none-any.whl → 0.5.16__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.5.14py3-none-any.whl → 0.5.16py3-none-any.whl