PyPI - unstructured-ingest - Versions diffs - 0.0.2.dev0__py3-none-any.whl → 0.0.3__py3-none-any.whl - Mend

unstructured-ingest 0.0.2.dev0py3-none-any.whl → 0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (44) hide show

unstructured_ingest/v2/processes/connectors/opensearch.py CHANGED Viewed

@@ -100,9 +100,15 @@ class OpenSearchConnectionConfig(ConnectionConfig):
         return OpenSearch(**self.get_client_kwargs())
+@dataclass
+class OpensearchIndexerConfig(ElasticsearchIndexerConfig):
+    pass
 @dataclass
 class OpenSearchIndexer(ElasticsearchIndexer):
     connection_config: OpenSearchConnectionConfig
+    index_config: OpensearchIndexerConfig
     client: "OpenSearch" = field(init=False)
     @requires_dependencies(["opensearchpy"], extras="opensearch")
@@ -112,9 +118,15 @@ class OpenSearchIndexer(ElasticsearchIndexer):
         return scan
+@dataclass
+class OpensearchDownloaderConfig(ElasticsearchDownloaderConfig):
+    pass
 @dataclass
 class OpenSearchDownloader(ElasticsearchDownloader):
     connection_config: OpenSearchConnectionConfig
+    download_config: OpensearchDownloaderConfig
     connector_type: str = CONNECTOR_TYPE
     @requires_dependencies(["opensearchpy"], extras="opensearch")
@@ -125,9 +137,15 @@ class OpenSearchDownloader(ElasticsearchDownloader):
         return AsyncOpenSearch, async_scan
+@dataclass
+class OpensearchUploaderConfig(ElasticsearchUploaderConfig):
+    pass
 @dataclass
 class OpenSearchUploader(ElasticsearchUploader):
     connection_config: OpenSearchConnectionConfig
+    upload_config: OpensearchUploaderConfig
     connector_type: str = CONNECTOR_TYPE
     @requires_dependencies(["opensearchpy"], extras="opensearch")
@@ -137,19 +155,29 @@ class OpenSearchUploader(ElasticsearchUploader):
         return parallel_bulk
+@dataclass
+class OpensearchUploadStagerConfig(ElasticsearchUploadStagerConfig):
+    pass
+@dataclass
+class OpensearchUploadStager(ElasticsearchUploadStager):
+    upload_stager_config: OpensearchUploadStagerConfig
 opensearch_source_entry = SourceRegistryEntry(
     connection_config=OpenSearchConnectionConfig,
     indexer=OpenSearchIndexer,
-    indexer_config=ElasticsearchIndexerConfig,
+    indexer_config=OpensearchIndexerConfig,
     downloader=OpenSearchDownloader,
-    downloader_config=ElasticsearchDownloaderConfig,
+    downloader_config=OpensearchDownloaderConfig,
 )
 opensearch_destination_entry = DestinationRegistryEntry(
     connection_config=OpenSearchConnectionConfig,
-    upload_stager_config=ElasticsearchUploadStagerConfig,
-    upload_stager=ElasticsearchUploadStager,
-    uploader_config=ElasticsearchUploaderConfig,
+    upload_stager_config=OpensearchUploadStagerConfig,
+    upload_stager=OpensearchUploadStager,
+    uploader_config=OpensearchUploaderConfig,
     uploader=OpenSearchUploader,
 )

unstructured_ingest/v2/processes/connectors/pinecone.py CHANGED Viewed

@@ -123,9 +123,12 @@ class PineconeUploader(Uploader):
     connection_config: PineconeConnectionConfig
     connector_type: str = CONNECTOR_TYPE
-    @DestinationConnectionError.wrap
-    def check_connection(self):
-        _ = self.connection_config.get_index()
+    def precheck(self):
+        try:
+            self.connection_config.get_index()
+        except Exception as e:
+            logger.error(f"failed to validate connection: {e}", exc_info=True)
+            raise DestinationConnectionError(f"failed to validate connection: {e}")
     @requires_dependencies(["pinecone"], extras="pinecone")
     def upsert_batch(self, batch):

unstructured_ingest/v2/processes/connectors/salesforce.py CHANGED Viewed

@@ -18,10 +18,9 @@ from textwrap import dedent
 from typing import TYPE_CHECKING, Any, Generator, Type
 from dateutil import parser
-from unstructured.documents.elements import DataSourceMetadata
 from unstructured_ingest.enhanced_dataclass import enhanced_field
-from unstructured_ingest.error import SourceConnectionNetworkError
+from unstructured_ingest.error import SourceConnectionError, SourceConnectionNetworkError
 from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.interfaces import (
     AccessConfig,
@@ -30,6 +29,7 @@ from unstructured_ingest.v2.interfaces import (
     DownloaderConfig,
     DownloadResponse,
     FileData,
+    FileDataSourceMetadata,
     Indexer,
     IndexerConfig,
     SourceIdentifiers,
@@ -132,6 +132,13 @@ class SalesforceIndexer(Indexer):
             if record_type not in ACCEPTED_CATEGORIES:
                 raise ValueError(f"{record_type} not currently an accepted Salesforce category")
+    def precheck(self) -> None:
+        try:
+            self.connection_config.get_client()
+        except Exception as e:
+            logger.error(f"failed to validate connection: {e}", exc_info=True)
+            raise SourceConnectionError(f"failed to validate connection: {e}")
     def get_file_extension(self, record_type) -> str:
         if record_type == "EmailMessage":
             extension = ".eml"
@@ -172,7 +179,7 @@ class SalesforceIndexer(Indexer):
                                 filename=record_with_extension,
                                 fullpath=f"{record['attributes']['type']}/{record_with_extension}",
                             ),
-                            metadata=DataSourceMetadata(
+                            metadata=FileDataSourceMetadata(
                                 url=record["attributes"]["url"],
                                 version=str(parser.parse(record["SystemModstamp"]).timestamp()),
                                 date_created=str(parser.parse(record["CreatedDate"]).timestamp()),
@@ -207,11 +214,6 @@ class SalesforceDownloader(Downloader):
     )
     connector_type: str = CONNECTOR_TYPE
-    def get_download_path(self, file_data: FileData) -> Path:
-        rel_path = file_data.source_identifiers.relative_path
-        rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path
-        return self.download_dir / Path(rel_path)
     def _xml_for_record(self, record: OrderedDict) -> str:
         """Creates partitionable xml file from a record"""
         import xml.etree.ElementTree as ET

unstructured_ingest/v2/processes/connectors/sharepoint.py CHANGED Viewed

@@ -6,10 +6,8 @@ from time import time
 from typing import TYPE_CHECKING, Any, Generator, Optional
 from urllib.parse import quote
-from unstructured.documents.elements import DataSourceMetadata
 from unstructured_ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field
-from unstructured_ingest.error import SourceConnectionNetworkError
+from unstructured_ingest.error import SourceConnectionError, SourceConnectionNetworkError
 from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.interfaces import (
     AccessConfig,
@@ -18,6 +16,7 @@ from unstructured_ingest.v2.interfaces import (
     DownloaderConfig,
     DownloadResponse,
     FileData,
+    FileDataSourceMetadata,
     Indexer,
     IndexerConfig,
     SourceIdentifiers,
@@ -134,6 +133,14 @@ class SharepointIndexer(Indexer):
     connection_config: SharepointConnectionConfig
     index_config: SharepointIndexerConfig = field(default_factory=lambda: SharepointIndexerConfig())
+    def precheck(self) -> None:
+        try:
+            site_client = self.connection_config.get_client()
+            site_client.site_pages.pages.get().execute_query()
+        except Exception as e:
+            logger.error(f"failed to validate connection: {e}", exc_info=True)
+            raise SourceConnectionError(f"failed to validate connection: {e}")
     def list_files(self, folder: "Folder", recursive: bool = False) -> list["File"]:
         if not recursive:
             folder.expand(["Files"]).get().execute_query()
@@ -187,7 +194,7 @@ class SharepointIndexer(Indexer):
                 fullpath=file_path,
                 rel_path=file_path.replace(self.index_config.path, ""),
             ),
-            metadata=DataSourceMetadata(
+            metadata=FileDataSourceMetadata(
                 url=url,
                 version=version,
                 date_modified=str(date_modified_dt.timestamp()) if date_modified_dt else None,
@@ -222,7 +229,7 @@ class SharepointIndexer(Indexer):
                 fullpath=fullpath,
                 rel_path=rel_path,
             ),
-            metadata=DataSourceMetadata(
+            metadata=FileDataSourceMetadata(
                 url=absolute_url,
                 version=f"{file.major_version}.{file.minor_version}",
                 date_modified=str(date_modified_dt.timestamp()) if date_modified_dt else None,
@@ -340,10 +347,9 @@ class SharepointDownloader(Downloader):
     connector_type: str = CONNECTOR_TYPE
     def get_download_path(self, file_data: FileData) -> Path:
+        download_path = super().get_download_path(file_data=file_data)
         content_type = file_data.additional_metadata.get("sharepoint_content_type")
-        rel_path = file_data.source_identifiers.fullpath
-        rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path
-        download_path = self.download_dir / Path(rel_path)
         if content_type == SharepointContentType.SITEPAGE.value:
             # Update output extension to html if site page
             download_path = download_path.with_suffix(".html")

unstructured_ingest/v2/processes/connectors/sql.py CHANGED Viewed

@@ -4,13 +4,14 @@ import uuid
 from dataclasses import dataclass, field
 from datetime import date, datetime
 from pathlib import Path
-from typing import Any, Optional, Union
+from typing import TYPE_CHECKING, Any, Callable, Optional, Union
 import numpy as np
 import pandas as pd
 from dateutil import parser
 from unstructured_ingest.enhanced_dataclass import enhanced_field
+from unstructured_ingest.error import DestinationConnectionError
 from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.interfaces import (
     AccessConfig,
@@ -25,6 +26,11 @@ from unstructured_ingest.v2.interfaces import (
 from unstructured_ingest.v2.logger import logger
 from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
+if TYPE_CHECKING:
+    from sqlite3 import Connection as SqliteConnection
+    from psycopg2.extensions import connection as PostgresConnection
 CONNECTOR_TYPE = "sql"
 ELEMENTS_TABLE_NAME = "elements"
@@ -41,7 +47,7 @@ class DatabaseType(str, enum.Enum):
 @dataclass
-class SimpleSqlConfig(ConnectionConfig):
+class SQLConnectionConfig(ConnectionConfig):
     db_type: DatabaseType = (
         # required default value here because of parent class
         DatabaseType.SQLITE
@@ -134,7 +140,7 @@ class SQLUploadStager(UploadStager):
         **kwargs: Any,
     ) -> Path:
         with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
+            elements_contents: list[dict] = json.load(elements_file)
         output_path = Path(output_dir) / Path(f"{output_filename}.json")
         output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -151,7 +157,7 @@ class SQLUploadStager(UploadStager):
             data["id"] = str(uuid.uuid4())
             # remove extraneous, not supported columns
-            [data.pop(column) for column in data if column not in _COLUMNS]
+            data = {k: v for k, v in data.items() if k in _COLUMNS}
             output.append(data)
@@ -185,23 +191,32 @@ class SQLUploaderConfig(UploaderConfig):
 class SQLUploader(Uploader):
     connector_type: str = CONNECTOR_TYPE
     upload_config: SQLUploaderConfig
-    connection_config: SimpleSqlConfig
+    connection_config: SQLConnectionConfig
+    def precheck(self) -> None:
+        try:
+            cursor = self.connection().cursor()
+            cursor.execute("SELECT 1;")
+            cursor.close()
+        except Exception as e:
+            logger.error(f"failed to validate connection: {e}", exc_info=True)
+            raise DestinationConnectionError(f"failed to validate connection: {e}")
     @property
-    def connection(self):
+    def connection(self) -> Callable[[], Union["SqliteConnection", "PostgresConnection"]]:
         if self.connection_config.db_type == DatabaseType.POSTGRESQL:
             return self._make_psycopg_connection
         elif self.connection_config.db_type == DatabaseType.SQLITE:
             return self._make_sqlite_connection
         raise ValueError(f"Unsupported database {self.connection_config.db_type} connection.")
-    def _make_sqlite_connection(self):
+    def _make_sqlite_connection(self) -> "SqliteConnection":
         from sqlite3 import connect
         return connect(database=self.connection_config.database)
     @requires_dependencies(["psycopg2"], extras="postgres")
-    def _make_psycopg_connection(self):
+    def _make_psycopg_connection(self) -> "PostgresConnection":
         from psycopg2 import connect
         return connect(
@@ -261,7 +276,7 @@ class SQLUploader(Uploader):
 sql_destination_entry = DestinationRegistryEntry(
-    connection_config=SimpleSqlConfig,
+    connection_config=SQLConnectionConfig,
     uploader=SQLUploader,
     uploader_config=SQLUploaderConfig,
     upload_stager=SQLUploadStager,

unstructured_ingest/v2/processes/connectors/weaviate.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Optional
 from dateutil import parser
 from unstructured_ingest.enhanced_dataclass import enhanced_field
+from unstructured_ingest.error import DestinationConnectionError
 from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.interfaces import (
     AccessConfig,
@@ -156,15 +157,21 @@ class WeaviateUploaderConfig(UploaderConfig):
 class WeaviateUploader(Uploader):
     upload_config: WeaviateUploaderConfig
     connection_config: WeaviateConnectionConfig
-    client: Optional["Client"] = field(init=False)
     connector_type: str = CONNECTOR_TYPE
     @requires_dependencies(["weaviate"], extras="weaviate")
-    def __post_init__(self):
+    def get_client(self) -> "Client":
         from weaviate import Client
         auth = self._resolve_auth_method()
-        self.client = Client(url=self.connection_config.host_url, auth_client_secret=auth)
+        return Client(url=self.connection_config.host_url, auth_client_secret=auth)
+    def precheck(self) -> None:
+        try:
+            self.get_client()
+        except Exception as e:
+            logger.error(f"Failed to validate connection {e}", exc_info=True)
+            raise DestinationConnectionError(f"failed to validate connection: {e}")
     @requires_dependencies(["weaviate"], extras="weaviate")
     def _resolve_auth_method(self):
@@ -215,8 +222,9 @@ class WeaviateUploader(Uploader):
             f"at {self.connection_config.host_url}",
         )
-        self.client.batch.configure(batch_size=self.upload_config.batch_size)
-        with self.client.batch as b:
+        client = self.get_client()
+        client.batch.configure(batch_size=self.upload_config.batch_size)
+        with client.batch as b:
             for e in elements_dict:
                 vector = e.pop("embeddings", None)
                 b.add_data_object(

unstructured_ingest/v2/processes/filter.py ADDED Viewed

@@ -0,0 +1,54 @@
+import fnmatch
+from abc import ABC
+from dataclasses import dataclass, field
+from typing import Any, Callable, Optional
+from unstructured_ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
+from unstructured_ingest.v2.interfaces import FileData
+from unstructured_ingest.v2.interfaces.process import BaseProcess
+from unstructured_ingest.v2.logger import logger
+@dataclass
+class FiltererConfig(EnhancedDataClassJsonMixin):
+    file_glob: Optional[list[str]] = None
+    max_file_size: Optional[int] = None
+@dataclass
+class Filterer(BaseProcess, ABC):
+    config: FiltererConfig = field(default_factory=lambda: FiltererConfig())
+    filters: list[Callable[[FileData], bool]] = field(init=False, default_factory=list)
+    def __post_init__(self):
+        # Populate the filters based on values in config
+        if self.config.file_glob is not None:
+            self.filters.append(self.glob_filter)
+        if self.config.max_file_size:
+            self.filters.append(self.file_size_filter)
+    def is_async(self) -> bool:
+        return False
+    def file_size_filter(self, file_data: FileData) -> bool:
+        if filesize_bytes := file_data.metadata.filesize_bytes:
+            return filesize_bytes <= self.config.max_file_size
+        return True
+    def glob_filter(self, file_data: FileData) -> bool:
+        patterns = self.config.file_glob
+        path = file_data.source_identifiers.fullpath
+        for pattern in patterns:
+            if fnmatch.filter([path], pattern):
+                return True
+        logger.debug(f"The file {path!r} is discarded as it does not match any given glob.")
+        return False
+    def run(self, file_data: FileData, **kwargs: Any) -> Optional[FileData]:
+        for filter in self.filters:
+            if not filter(file_data):
+                logger.debug(
+                    f"filtered out file data due to {filter.__name__}: {file_data.identifier}"
+                )
+                return None
+        return file_data

unstructured_ingest-0.0.3.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,175 @@
+Metadata-Version: 2.1
+Name: unstructured-ingest
+Version: 0.0.3
+Summary: A library that prepares raw documents for downstream ML tasks.
+Home-page: https://github.com/Unstructured-IO/unstructured-ingest
+Author: Unstructured Technologies
+Author-email: devops@unstructuredai.io
+License: Apache-2.0
+Keywords: NLP PDF HTML CV XML parsing preprocessing
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Education
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.9.0,<3.13
+Description-Content-Type: text/markdown
+Requires-Dist: unstructured
+Requires-Dist: python-dateutil
+Requires-Dist: pandas
+Provides-Extra: airtable
+Requires-Dist: pyairtable ; extra == 'airtable'
+Provides-Extra: astra
+Requires-Dist: astrapy ; extra == 'astra'
+Provides-Extra: azure
+Requires-Dist: fsspec ; extra == 'azure'
+Requires-Dist: adlfs ; extra == 'azure'
+Provides-Extra: azure-cognitive-search
+Requires-Dist: azure-search-documents ; extra == 'azure-cognitive-search'
+Provides-Extra: bedrock
+Requires-Dist: boto3 ; extra == 'bedrock'
+Requires-Dist: langchain-community ; extra == 'bedrock'
+Provides-Extra: biomed
+Requires-Dist: bs4 ; extra == 'biomed'
+Provides-Extra: box
+Requires-Dist: fsspec ; extra == 'box'
+Requires-Dist: boxfs ; extra == 'box'
+Provides-Extra: chroma
+Requires-Dist: typer <=0.9.0 ; extra == 'chroma'
+Requires-Dist: importlib-metadata >=7.1.0 ; extra == 'chroma'
+Requires-Dist: chromadb ; extra == 'chroma'
+Provides-Extra: clarifai
+Requires-Dist: clarifai ; extra == 'clarifai'
+Provides-Extra: confluence
+Requires-Dist: atlassian-python-api ; extra == 'confluence'
+Provides-Extra: csv
+Requires-Dist: unstructured[tsv] ; extra == 'csv'
+Provides-Extra: databricks-volumes
+Requires-Dist: databricks-sdk ; extra == 'databricks-volumes'
+Provides-Extra: delta-table
+Requires-Dist: fsspec ; extra == 'delta-table'
+Requires-Dist: deltalake ; extra == 'delta-table'
+Provides-Extra: discord
+Requires-Dist: discord-py ; extra == 'discord'
+Provides-Extra: doc
+Requires-Dist: unstructured[docx] ; extra == 'doc'
+Provides-Extra: docx
+Requires-Dist: unstructured[docx] ; extra == 'docx'
+Provides-Extra: dropbox
+Requires-Dist: dropboxdrivefs ; extra == 'dropbox'
+Requires-Dist: fsspec ; extra == 'dropbox'
+Provides-Extra: elasticsearch
+Requires-Dist: elasticsearch[async] ; extra == 'elasticsearch'
+Provides-Extra: embed-huggingface
+Requires-Dist: sentence-transformers ; extra == 'embed-huggingface'
+Requires-Dist: langchain-community ; extra == 'embed-huggingface'
+Requires-Dist: huggingface ; extra == 'embed-huggingface'
+Provides-Extra: embed-octoai
+Requires-Dist: tiktoken ; extra == 'embed-octoai'
+Requires-Dist: openai ; extra == 'embed-octoai'
+Provides-Extra: embed-vertexai
+Requires-Dist: langchain ; extra == 'embed-vertexai'
+Requires-Dist: langchain-community ; extra == 'embed-vertexai'
+Requires-Dist: langchain-google-vertexai ; extra == 'embed-vertexai'
+Provides-Extra: embed-voyageai
+Requires-Dist: langchain ; extra == 'embed-voyageai'
+Requires-Dist: langchain-voyageai ; extra == 'embed-voyageai'
+Provides-Extra: epub
+Requires-Dist: unstructured[epub] ; extra == 'epub'
+Provides-Extra: gcs
+Requires-Dist: fsspec ; extra == 'gcs'
+Requires-Dist: bs4 ; extra == 'gcs'
+Requires-Dist: gcsfs ; extra == 'gcs'
+Provides-Extra: github
+Requires-Dist: pygithub >1.58.0 ; extra == 'github'
+Provides-Extra: gitlab
+Requires-Dist: python-gitlab ; extra == 'gitlab'
+Provides-Extra: google-drive
+Requires-Dist: google-api-python-client ; extra == 'google-drive'
+Provides-Extra: hubspot
+Requires-Dist: urllib3 ; extra == 'hubspot'
+Requires-Dist: hubspot-api-client ; extra == 'hubspot'
+Provides-Extra: jira
+Requires-Dist: atlassian-python-api ; extra == 'jira'
+Provides-Extra: kafka
+Requires-Dist: confluent-kafka ; extra == 'kafka'
+Provides-Extra: md
+Requires-Dist: unstructured[md] ; extra == 'md'
+Provides-Extra: milvus
+Requires-Dist: pymilvus ; extra == 'milvus'
+Provides-Extra: mongodb
+Requires-Dist: pymongo ; extra == 'mongodb'
+Provides-Extra: msg
+Requires-Dist: unstructured[msg] ; extra == 'msg'
+Provides-Extra: notion
+Requires-Dist: notion-client ; extra == 'notion'
+Requires-Dist: htmlBuilder ; extra == 'notion'
+Provides-Extra: odt
+Requires-Dist: unstructured[odt] ; extra == 'odt'
+Provides-Extra: onedrive
+Requires-Dist: bs4 ; extra == 'onedrive'
+Requires-Dist: msal ; extra == 'onedrive'
+Requires-Dist: Office365-REST-Python-Client ; extra == 'onedrive'
+Provides-Extra: openai
+Requires-Dist: tiktoken ; extra == 'openai'
+Requires-Dist: langchain-community ; extra == 'openai'
+Requires-Dist: openai ; extra == 'openai'
+Provides-Extra: opensearch
+Requires-Dist: opensearch-py ; extra == 'opensearch'
+Provides-Extra: org
+Requires-Dist: unstructured[org] ; extra == 'org'
+Provides-Extra: outlook
+Requires-Dist: msal ; extra == 'outlook'
+Requires-Dist: Office365-REST-Python-Client ; extra == 'outlook'
+Provides-Extra: pdf
+Requires-Dist: unstructured[pdf] ; extra == 'pdf'
+Provides-Extra: pinecone
+Requires-Dist: pinecone-client >=3.7.1 ; extra == 'pinecone'
+Provides-Extra: postgres
+Requires-Dist: psycopg2-binary ; extra == 'postgres'
+Provides-Extra: ppt
+Requires-Dist: unstructured[pptx] ; extra == 'ppt'
+Provides-Extra: pptx
+Requires-Dist: unstructured[pptx] ; extra == 'pptx'
+Provides-Extra: qdrant
+Requires-Dist: qdrant-client ; extra == 'qdrant'
+Provides-Extra: reddit
+Requires-Dist: praw ; extra == 'reddit'
+Provides-Extra: rst
+Requires-Dist: unstructured[rst] ; extra == 'rst'
+Provides-Extra: rtf
+Requires-Dist: unstructured[rtf] ; extra == 'rtf'
+Provides-Extra: s3
+Requires-Dist: fsspec ; extra == 's3'
+Requires-Dist: s3fs ; extra == 's3'
+Provides-Extra: salesforce
+Requires-Dist: simple-salesforce ; extra == 'salesforce'
+Provides-Extra: sftp
+Requires-Dist: fsspec ; extra == 'sftp'
+Requires-Dist: paramiko ; extra == 'sftp'
+Provides-Extra: sharepoint
+Requires-Dist: msal ; extra == 'sharepoint'
+Requires-Dist: Office365-REST-Python-Client ; extra == 'sharepoint'
+Provides-Extra: singlestore
+Requires-Dist: singlestoredb ; extra == 'singlestore'
+Provides-Extra: slack
+Requires-Dist: slack-sdk ; extra == 'slack'
+Provides-Extra: tsv
+Requires-Dist: unstructured[tsv] ; extra == 'tsv'
+Provides-Extra: weaviate
+Requires-Dist: weaviate-client ; extra == 'weaviate'
+Provides-Extra: wikipedia
+Requires-Dist: wikipedia ; extra == 'wikipedia'
+Provides-Extra: xlsx
+Requires-Dist: unstructured[xlsx] ; extra == 'xlsx'
+# Unstructured Ingest
+For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.

unstructured-ingest 0.0.2.dev0__py3-none-any.whl → 0.0.3__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.0.2.dev0py3-none-any.whl → 0.0.3py3-none-any.whl