PyPI - unstructured-ingest - Versions diffs - 0.7.2__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

unstructured-ingest 0.7.2py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (187) hide show

unstructured_ingest/__version__.py +1 -1
unstructured_ingest/cli/README.md +28 -0
unstructured_ingest/embed/mixedbreadai.py +0 -1
unstructured_ingest/interfaces/upload_stager.py +2 -2
unstructured_ingest/interfaces/uploader.py +3 -3
unstructured_ingest/main.py +0 -0
unstructured_ingest/pipeline/interfaces.py +1 -1
unstructured_ingest/pipeline/pipeline.py +1 -1
unstructured_ingest/processes/chunker.py +4 -0
unstructured_ingest/processes/connectors/airtable.py +4 -2
unstructured_ingest/processes/connectors/astradb.py +2 -2
unstructured_ingest/processes/connectors/azure_ai_search.py +1 -1
unstructured_ingest/processes/connectors/confluence.py +0 -1
unstructured_ingest/processes/connectors/databricks/volumes_aws.py +1 -1
unstructured_ingest/processes/connectors/databricks/volumes_azure.py +2 -2
unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +1 -1
unstructured_ingest/processes/connectors/databricks/volumes_table.py +1 -2
unstructured_ingest/processes/connectors/delta_table.py +1 -0
unstructured_ingest/processes/connectors/duckdb/base.py +2 -2
unstructured_ingest/processes/connectors/duckdb/duckdb.py +3 -3
unstructured_ingest/processes/connectors/duckdb/motherduck.py +3 -3
unstructured_ingest/processes/connectors/fsspec/s3.py +5 -3
unstructured_ingest/processes/connectors/gitlab.py +1 -2
unstructured_ingest/processes/connectors/google_drive.py +0 -2
unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -7
unstructured_ingest/processes/connectors/kdbai.py +1 -0
unstructured_ingest/processes/connectors/outlook.py +1 -2
unstructured_ingest/processes/connectors/pinecone.py +0 -1
unstructured_ingest/processes/connectors/redisdb.py +28 -24
unstructured_ingest/processes/connectors/salesforce.py +1 -1
unstructured_ingest/processes/connectors/slack.py +1 -2
unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +5 -0
unstructured_ingest/processes/connectors/sql/postgres.py +7 -1
unstructured_ingest/processes/connectors/sql/singlestore.py +11 -6
unstructured_ingest/processes/connectors/sql/snowflake.py +5 -0
unstructured_ingest/processes/connectors/sql/sql.py +3 -4
unstructured_ingest/processes/connectors/sql/sqlite.py +5 -0
unstructured_ingest/processes/connectors/sql/vastdb.py +7 -3
unstructured_ingest/processes/connectors/vectara.py +0 -2
unstructured_ingest/processes/connectors/zendesk/zendesk.py +0 -2
unstructured_ingest/processes/embedder.py +2 -2
unstructured_ingest/processes/filter.py +1 -1
unstructured_ingest/processes/partitioner.py +4 -0
unstructured_ingest/processes/utils/blob_storage.py +2 -2
unstructured_ingest/unstructured_api.py +13 -8
unstructured_ingest/utils/data_prep.py +8 -32
unstructured_ingest-1.0.1.dist-info/METADATA +226 -0
{unstructured_ingest-0.7.2.dist-info → unstructured_ingest-1.0.1.dist-info}/RECORD +50 -184
{unstructured_ingest-0.7.2.dist-info → unstructured_ingest-1.0.1.dist-info}/WHEEL +1 -2
examples/__init__.py +0 -0
examples/airtable.py +0 -44
examples/azure_cognitive_search.py +0 -55
examples/chroma.py +0 -54
examples/couchbase.py +0 -55
examples/databricks_volumes_dest.py +0 -55
examples/databricks_volumes_source.py +0 -53
examples/delta_table.py +0 -45
examples/discord_example.py +0 -36
examples/elasticsearch.py +0 -49
examples/google_drive.py +0 -45
examples/kdbai.py +0 -54
examples/local.py +0 -36
examples/milvus.py +0 -44
examples/mongodb.py +0 -53
examples/opensearch.py +0 -50
examples/pinecone.py +0 -57
examples/s3.py +0 -38
examples/salesforce.py +0 -44
examples/sharepoint.py +0 -47
examples/singlestore.py +0 -49
examples/sql.py +0 -90
examples/vectara.py +0 -54
examples/weaviate.py +0 -44
test/__init__.py +0 -0
test/integration/__init__.py +0 -0
test/integration/chunkers/__init__.py +0 -0
test/integration/chunkers/test_chunkers.py +0 -31
test/integration/connectors/__init__.py +0 -0
test/integration/connectors/conftest.py +0 -38
test/integration/connectors/databricks/__init__.py +0 -0
test/integration/connectors/databricks/test_volumes_native.py +0 -273
test/integration/connectors/discord/__init__.py +0 -0
test/integration/connectors/discord/test_discord.py +0 -90
test/integration/connectors/duckdb/__init__.py +0 -0
test/integration/connectors/duckdb/conftest.py +0 -14
test/integration/connectors/duckdb/test_duckdb.py +0 -90
test/integration/connectors/duckdb/test_motherduck.py +0 -95
test/integration/connectors/elasticsearch/__init__.py +0 -0
test/integration/connectors/elasticsearch/conftest.py +0 -34
test/integration/connectors/elasticsearch/test_elasticsearch.py +0 -331
test/integration/connectors/elasticsearch/test_opensearch.py +0 -326
test/integration/connectors/sql/__init__.py +0 -0
test/integration/connectors/sql/test_databricks_delta_tables.py +0 -170
test/integration/connectors/sql/test_postgres.py +0 -201
test/integration/connectors/sql/test_singlestore.py +0 -182
test/integration/connectors/sql/test_snowflake.py +0 -244
test/integration/connectors/sql/test_sqlite.py +0 -168
test/integration/connectors/sql/test_vastdb.py +0 -34
test/integration/connectors/test_astradb.py +0 -287
test/integration/connectors/test_azure_ai_search.py +0 -254
test/integration/connectors/test_chroma.py +0 -136
test/integration/connectors/test_confluence.py +0 -111
test/integration/connectors/test_delta_table.py +0 -183
test/integration/connectors/test_dropbox.py +0 -151
test/integration/connectors/test_github.py +0 -49
test/integration/connectors/test_google_drive.py +0 -257
test/integration/connectors/test_jira.py +0 -67
test/integration/connectors/test_lancedb.py +0 -247
test/integration/connectors/test_milvus.py +0 -208
test/integration/connectors/test_mongodb.py +0 -335
test/integration/connectors/test_neo4j.py +0 -244
test/integration/connectors/test_notion.py +0 -152
test/integration/connectors/test_onedrive.py +0 -163
test/integration/connectors/test_pinecone.py +0 -387
test/integration/connectors/test_qdrant.py +0 -216
test/integration/connectors/test_redis.py +0 -143
test/integration/connectors/test_s3.py +0 -184
test/integration/connectors/test_sharepoint.py +0 -222
test/integration/connectors/test_vectara.py +0 -282
test/integration/connectors/test_zendesk.py +0 -120
test/integration/connectors/utils/__init__.py +0 -0
test/integration/connectors/utils/constants.py +0 -13
test/integration/connectors/utils/docker.py +0 -151
test/integration/connectors/utils/docker_compose.py +0 -59
test/integration/connectors/utils/validation/__init__.py +0 -0
test/integration/connectors/utils/validation/destination.py +0 -77
test/integration/connectors/utils/validation/equality.py +0 -76
test/integration/connectors/utils/validation/source.py +0 -331
test/integration/connectors/utils/validation/utils.py +0 -36
test/integration/connectors/weaviate/__init__.py +0 -0
test/integration/connectors/weaviate/conftest.py +0 -15
test/integration/connectors/weaviate/test_cloud.py +0 -39
test/integration/connectors/weaviate/test_local.py +0 -152
test/integration/embedders/__init__.py +0 -0
test/integration/embedders/conftest.py +0 -13
test/integration/embedders/test_azure_openai.py +0 -57
test/integration/embedders/test_bedrock.py +0 -103
test/integration/embedders/test_huggingface.py +0 -24
test/integration/embedders/test_mixedbread.py +0 -71
test/integration/embedders/test_octoai.py +0 -75
test/integration/embedders/test_openai.py +0 -74
test/integration/embedders/test_togetherai.py +0 -71
test/integration/embedders/test_vertexai.py +0 -63
test/integration/embedders/test_voyageai.py +0 -79
test/integration/embedders/utils.py +0 -66
test/integration/partitioners/__init__.py +0 -0
test/integration/partitioners/test_partitioner.py +0 -76
test/integration/utils.py +0 -15
test/unit/__init__.py +0 -0
test/unit/chunkers/__init__.py +0 -0
test/unit/chunkers/test_chunkers.py +0 -49
test/unit/connectors/__init__.py +0 -0
test/unit/connectors/ibm_watsonx/__init__.py +0 -0
test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +0 -459
test/unit/connectors/motherduck/__init__.py +0 -0
test/unit/connectors/motherduck/test_base.py +0 -73
test/unit/connectors/sql/__init__.py +0 -0
test/unit/connectors/sql/test_sql.py +0 -152
test/unit/connectors/test_confluence.py +0 -71
test/unit/connectors/test_jira.py +0 -401
test/unit/embed/__init__.py +0 -0
test/unit/embed/test_mixedbreadai.py +0 -42
test/unit/embed/test_octoai.py +0 -27
test/unit/embed/test_openai.py +0 -28
test/unit/embed/test_vertexai.py +0 -25
test/unit/embed/test_voyageai.py +0 -24
test/unit/embedders/__init__.py +0 -0
test/unit/embedders/test_bedrock.py +0 -36
test/unit/embedders/test_huggingface.py +0 -48
test/unit/embedders/test_mixedbread.py +0 -37
test/unit/embedders/test_octoai.py +0 -35
test/unit/embedders/test_openai.py +0 -35
test/unit/embedders/test_togetherai.py +0 -37
test/unit/embedders/test_vertexai.py +0 -37
test/unit/embedders/test_voyageai.py +0 -38
test/unit/partitioners/__init__.py +0 -0
test/unit/partitioners/test_partitioner.py +0 -63
test/unit/test_error.py +0 -27
test/unit/test_html.py +0 -112
test/unit/test_interfaces.py +0 -26
test/unit/test_utils.py +0 -220
test/unit/utils/__init__.py +0 -0
test/unit/utils/data_generator.py +0 -32
unstructured_ingest-0.7.2.dist-info/METADATA +0 -383
unstructured_ingest-0.7.2.dist-info/top_level.txt +0 -3
{unstructured_ingest-0.7.2.dist-info → unstructured_ingest-1.0.1.dist-info}/entry_points.txt +0 -0
{unstructured_ingest-0.7.2.dist-info → unstructured_ingest-1.0.1.dist-info/licenses}/LICENSE.md +0 -0

unstructured_ingest/processes/connectors/sql/sql.py CHANGED Viewed

@@ -36,9 +36,9 @@ from unstructured_ingest.interfaces import (
 from unstructured_ingest.logger import logger
 from unstructured_ingest.utils.constants import RECORD_ID_LABEL
 from unstructured_ingest.utils.data_prep import (
-    get_data,
     get_data_df,
     get_enhanced_element_id,
+    get_json_data,
     split_dataframe,
     write_data,
 )
@@ -122,8 +122,7 @@ class SQLIndexer(Indexer, ABC):
         id_batches: list[frozenset[str]] = [
             frozenset(
                 ids[
-                    i
-                    * self.index_config.batch_size : (i + 1)  # noqa
+                    i * self.index_config.batch_size : (i + 1)  # noqa
                     * self.index_config.batch_size
                 ]
             )
@@ -272,7 +271,7 @@ class SQLUploadStager(UploadStager):
     ) -> Path:
         import pandas as pd
-        elements_contents = get_data(path=elements_filepath)
+        elements_contents = get_json_data(path=elements_filepath)
         df = pd.DataFrame(
             data=[

unstructured_ingest/processes/connectors/sql/sqlite.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Any, Generator
 from pydantic import Field, Secret, model_validator
+from unstructured_ingest.data_types.file_data import FileData
 from unstructured_ingest.logger import logger
 from unstructured_ingest.processes.connector_registry import (
     DestinationRegistryEntry,
@@ -133,6 +134,10 @@ class SQLiteUploader(SQLUploader):
     connection_config: SQLiteConnectionConfig
     connector_type: str = CONNECTOR_TYPE
+    @requires_dependencies(["pandas"])
+    def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
+        super().run(path=path, file_data=file_data, **kwargs)
     @requires_dependencies(["pandas"])
     def prepare_data(
         self, columns: list[str], data: tuple[tuple[Any, ...], ...]

unstructured_ingest/processes/connectors/sql/vastdb.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from contextlib import contextmanager
 from dataclasses import dataclass, field
+from pathlib import Path
 from typing import TYPE_CHECKING, Any, Optional
 from pydantic import Field, Secret
@@ -68,9 +69,8 @@ class VastdbConnectionConfig(SQLConnectionConfig):
     @contextmanager
     def get_cursor(self) -> "VastdbTransaction":
-        with self.get_connection() as connection:
-            with connection.transaction() as transaction:
-                yield transaction
+        with self.get_connection() as connection, connection.transaction() as transaction:
+            yield transaction
     @contextmanager
     def get_table(self, table_name: str) -> "VastdbTable":
@@ -190,6 +190,10 @@ class VastdbUploader(SQLUploader):
             logger.error(f"failed to validate connection: {e}", exc_info=True)
             raise DestinationConnectionError(f"failed to validate connection: {e}")
+    @requires_dependencies(["pandas"], extras="vastdb")
+    def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
+        super().run(path=path, file_data=file_data, **kwargs)
     @requires_dependencies(["pyarrow", "pandas"], extras="vastdb")
     def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
         import numpy as np

unstructured_ingest/processes/connectors/vectara.py CHANGED Viewed

@@ -108,7 +108,6 @@ class VectaraUploaderConfig(UploaderConfig):
 @dataclass
 class VectaraUploader(Uploader):
     connector_type: str = CONNECTOR_TYPE
     upload_config: VectaraUploaderConfig
     connection_config: VectaraConnectionConfig
@@ -336,7 +335,6 @@ class VectaraUploader(Uploader):
         file_data: FileData,
         **kwargs: Any,
     ) -> None:
         logger.info(f"inserting / updating {len(data)} documents to Vectara ")
         await asyncio.gather(*(self._index_document(vdoc) for vdoc in data))

unstructured_ingest/processes/connectors/zendesk/zendesk.py CHANGED Viewed

@@ -53,7 +53,6 @@ class ZendeskConnectionConfig(ConnectionConfig):
     access_config: Secret[ZendeskAccessConfig]
     def get_client(self) -> ZendeskClient:
         access_config = self.access_config.get_secret_value()
         return ZendeskClient(
@@ -206,7 +205,6 @@ class ZendeskDownloader(Downloader):
                     await f.write(comment.as_text())
     async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
         zendesk_filedata = ZendeskFileData.cast(file_data=file_data)
         item_type = zendesk_filedata.additional_metadata.item_type

unstructured_ingest/processes/embedder.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any, Literal, Optional
 from pydantic import BaseModel, Field, SecretStr
 from unstructured_ingest.interfaces.process import BaseProcess
-from unstructured_ingest.utils.data_prep import get_data
+from unstructured_ingest.utils.data_prep import get_json_data
 if TYPE_CHECKING:
     from unstructured_ingest.embed.interfaces import BaseEmbeddingEncoder
@@ -192,7 +192,7 @@ class Embedder(BaseProcess, ABC):
     def run(self, elements_filepath: Path, **kwargs: Any) -> list[dict]:
         # TODO update base embedder classes to support async
         embedder = self.config.get_embedder()
-        elements = get_data(path=elements_filepath)
+        elements = get_json_data(path=elements_filepath)
         if not elements:
             return []
         embedded_elements = embedder.embed_documents(elements=elements)

unstructured_ingest/processes/filter.py CHANGED Viewed

@@ -13,7 +13,7 @@ from unstructured_ingest.logger import logger
 class FiltererConfig(BaseModel):
     file_glob: Optional[list[str]] = Field(
         default=None,
-        description="file globs to limit which data_types of " "files are accepted",
+        description="file globs to limit which data_types of files are accepted",
         examples=["*.pdf", "*.html"],
     )
     max_file_size: Optional[int] = Field(

unstructured_ingest/processes/partitioner.py CHANGED Viewed

@@ -68,6 +68,9 @@ class PartitionerConfig(BaseModel):
         description="Use a remote API to partition the files."
         " Otherwise, use the function from partition.auto",
     )
+    api_timeout_ms: Optional[int] = Field(
+        default=None, description="Timeout in milliseconds for all api call during partitioning."
+    )
     api_key: Optional[SecretStr] = Field(
         default=None, description="API Key for partition endpoint."
     )
@@ -188,6 +191,7 @@ class Partitioner(BaseProcess, ABC):
             api_key=self.config.api_key.get_secret_value(),
             filename=filename,
             api_parameters=self.config.to_partition_kwargs(),
+            timeout_ms=self.config.api_timeout_ms,
         )
         # Append the data source metadata the auto partition does for you

unstructured_ingest/processes/utils/blob_storage.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import Any
 from unstructured_ingest.data_types.file_data import FileData
 from unstructured_ingest.interfaces import UploadStager, UploadStagerConfig
-from unstructured_ingest.utils.data_prep import get_data, write_data
+from unstructured_ingest.utils.data_prep import get_json_data, write_data
 class BlobStoreUploadStagerConfig(UploadStagerConfig):
@@ -27,6 +27,6 @@ class BlobStoreUploadStager(UploadStager):
     ) -> Path:
         output_file = self.get_output_path(output_filename=output_filename, output_dir=output_dir)
         # Always save as json
-        data = get_data(elements_filepath)
+        data = get_json_data(elements_filepath)
         write_data(path=output_file.with_suffix(".json"), data=data)
         return output_file.with_suffix(".json")

unstructured_ingest/unstructured_api.py CHANGED Viewed

@@ -80,7 +80,11 @@ def wrap_error(e: Exception) -> Exception:
 async def call_api_async(
-    server_url: Optional[str], api_key: Optional[str], filename: Path, api_parameters: dict
+    server_url: Optional[str],
+    api_key: Optional[str],
+    filename: Path,
+    api_parameters: dict,
+    timeout_ms: Optional[int] = None,
 ) -> list[dict]:
     """Call the Unstructured API using unstructured-client.
@@ -94,13 +98,10 @@ async def call_api_async(
     """
     from unstructured_client import UnstructuredClient
-    client = UnstructuredClient(
-        server_url=server_url,
-        api_key_auth=api_key,
-    )
+    client = UnstructuredClient(server_url=server_url, api_key_auth=api_key)
     partition_request = create_partition_request(filename=filename, parameters_dict=api_parameters)
     try:
-        res = await client.general.partition_async(request=partition_request)
+        res = await client.general.partition_async(request=partition_request, timeout_ms=timeout_ms)
     except Exception as e:
         raise wrap_error(e)
@@ -108,7 +109,11 @@ async def call_api_async(
 def call_api(
-    server_url: Optional[str], api_key: Optional[str], filename: Path, api_parameters: dict
+    server_url: Optional[str],
+    api_key: Optional[str],
+    filename: Path,
+    api_parameters: dict,
+    timeout_ms: Optional[int] = None,
 ) -> list[dict]:
     """Call the Unstructured API using unstructured-client.
@@ -128,7 +133,7 @@ def call_api(
     )
     partition_request = create_partition_request(filename=filename, parameters_dict=api_parameters)
     try:
-        res = client.general.partition(request=partition_request)
+        res = client.general.partition(request=partition_request, timeout_ms=timeout_ms)
     except Exception as e:
         raise wrap_error(e)

unstructured_ingest/utils/data_prep.py CHANGED Viewed

@@ -2,7 +2,7 @@ import itertools
 import json
 from datetime import datetime
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Generator, Iterable, Optional, Sequence, TypeVar, Union, cast
+from typing import TYPE_CHECKING, Any, Generator, Iterable, Optional, Sequence, TypeVar, cast
 from uuid import NAMESPACE_DNS, uuid5
 from unstructured_ingest.data_types.file_data import FileData
@@ -171,15 +171,13 @@ def write_data(path: Path, data: list[dict], indent: Optional[int] = 2) -> None:
             raise IOError("Unsupported file type: {path}")
-def get_data(path: Union[Path, str]) -> list[dict]:
-    if isinstance(path, str):
-        path = Path(path)
-    try:
-        return get_data_by_suffix(path=path)
-    except Exception as e:
-        logger.warning(f"failed to read {path} by extension: {e}")
-    # Fall back
+def get_json_data(path: Path) -> list[dict]:
     with path.open() as f:
+        # Attempt by prefix
+        if path.suffix == ".json":
+            return json.load(f)
+        elif path.suffix == ".ndjson":
+            return ndjson.load(f)
         try:
             return json.load(f)
         except Exception as e:
@@ -188,29 +186,7 @@ def get_data(path: Union[Path, str]) -> list[dict]:
             return ndjson.load(f)
         except Exception as e:
             logger.warning(f"failed to read {path} as ndjson: {e}")
-        import pandas as pd
-        try:
-            df = pd.read_csv(path)
-            return df.to_dict(orient="records")
-        except Exception as e:
-            logger.warning(f"failed to read {path} as csv: {e}")
-        try:
-            df = pd.read_parquet(path)
-            return df.to_dict(orient="records")
-        except Exception as e:
-            logger.warning(f"failed to read {path} as parquet: {e}")
-def get_json_data(path: Path) -> list[dict]:
-    with path.open() as f:
-        if path.suffix == ".json":
-            return json.load(f)
-        elif path.suffix == ".ndjson":
-            return ndjson.load(f)
-        else:
-            raise ValueError(f"Unsupported file type: {path}")
+    raise ValueError(f"Unsupported json file: {path}")
 @requires_dependencies(["pandas"])

unstructured_ingest-1.0.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,226 @@
+Metadata-Version: 2.4
+Name: unstructured_ingest
+Version: 1.0.1
+Summary: Local ETL data pipeline to get data RAG ready
+Author-email: Unstructured Technologies <devops@unstructuredai.io>
+License-Expression: Apache-2.0
+License-File: LICENSE.md
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Education
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: <3.13,>=3.9
+Requires-Dist: click
+Requires-Dist: dataclasses-json
+Requires-Dist: opentelemetry-sdk
+Requires-Dist: pydantic>=2.7
+Requires-Dist: python-dateutil
+Requires-Dist: tqdm
+Provides-Extra: airtable
+Requires-Dist: pandas; extra == 'airtable'
+Requires-Dist: pyairtable; extra == 'airtable'
+Provides-Extra: astradb
+Requires-Dist: astrapy; extra == 'astradb'
+Provides-Extra: azure
+Requires-Dist: adlfs; extra == 'azure'
+Requires-Dist: fsspec; extra == 'azure'
+Provides-Extra: azure-ai-search
+Requires-Dist: azure-search-documents; extra == 'azure-ai-search'
+Provides-Extra: bedrock
+Requires-Dist: aioboto3; extra == 'bedrock'
+Requires-Dist: boto3; extra == 'bedrock'
+Provides-Extra: biomed
+Requires-Dist: bs4; extra == 'biomed'
+Requires-Dist: requests; extra == 'biomed'
+Provides-Extra: box
+Requires-Dist: boxfs; extra == 'box'
+Requires-Dist: fsspec; extra == 'box'
+Provides-Extra: chroma
+Requires-Dist: chromadb; extra == 'chroma'
+Provides-Extra: clarifai
+Requires-Dist: clarifai; extra == 'clarifai'
+Provides-Extra: confluence
+Requires-Dist: atlassian-python-api; extra == 'confluence'
+Requires-Dist: requests; extra == 'confluence'
+Provides-Extra: couchbase
+Requires-Dist: couchbase; extra == 'couchbase'
+Provides-Extra: databricks-delta-tables
+Requires-Dist: databricks-sql-connector; extra == 'databricks-delta-tables'
+Requires-Dist: pandas; extra == 'databricks-delta-tables'
+Provides-Extra: databricks-volumes
+Requires-Dist: databricks-sdk; extra == 'databricks-volumes'
+Provides-Extra: delta-table
+Requires-Dist: boto3; extra == 'delta-table'
+Requires-Dist: deltalake; extra == 'delta-table'
+Requires-Dist: pandas; extra == 'delta-table'
+Provides-Extra: discord
+Requires-Dist: discord-py; extra == 'discord'
+Provides-Extra: doc
+Requires-Dist: unstructured[doc]; extra == 'doc'
+Provides-Extra: docx
+Requires-Dist: unstructured[docx]; extra == 'docx'
+Provides-Extra: dropbox
+Requires-Dist: dropboxdrivefs; extra == 'dropbox'
+Requires-Dist: fsspec; extra == 'dropbox'
+Provides-Extra: duckdb
+Requires-Dist: duckdb; extra == 'duckdb'
+Requires-Dist: pandas; extra == 'duckdb'
+Provides-Extra: elasticsearch
+Requires-Dist: elasticsearch[async]; extra == 'elasticsearch'
+Provides-Extra: epub
+Requires-Dist: unstructured[epub]; extra == 'epub'
+Provides-Extra: gcs
+Requires-Dist: bs4; extra == 'gcs'
+Requires-Dist: fsspec; extra == 'gcs'
+Requires-Dist: gcsfs; extra == 'gcs'
+Provides-Extra: github
+Requires-Dist: pygithub>1.58.0; extra == 'github'
+Requires-Dist: requests; extra == 'github'
+Provides-Extra: gitlab
+Requires-Dist: python-gitlab; extra == 'gitlab'
+Provides-Extra: google-drive
+Requires-Dist: google-api-python-client; extra == 'google-drive'
+Provides-Extra: hubspot
+Requires-Dist: hubspot-api-client; extra == 'hubspot'
+Requires-Dist: urllib3; extra == 'hubspot'
+Provides-Extra: huggingface
+Requires-Dist: sentence-transformers; extra == 'huggingface'
+Provides-Extra: ibm-watsonx-s3
+Requires-Dist: httpx; extra == 'ibm-watsonx-s3'
+Requires-Dist: pandas; extra == 'ibm-watsonx-s3'
+Requires-Dist: pyarrow; extra == 'ibm-watsonx-s3'
+Requires-Dist: pyiceberg; extra == 'ibm-watsonx-s3'
+Requires-Dist: tenacity; extra == 'ibm-watsonx-s3'
+Provides-Extra: image
+Requires-Dist: unstructured[image]; extra == 'image'
+Provides-Extra: jira
+Requires-Dist: atlassian-python-api; extra == 'jira'
+Provides-Extra: kafka
+Requires-Dist: confluent-kafka; extra == 'kafka'
+Provides-Extra: kdbai
+Requires-Dist: kdbai-client>=1.4.0; extra == 'kdbai'
+Requires-Dist: pandas; extra == 'kdbai'
+Provides-Extra: lancedb
+Requires-Dist: lancedb; extra == 'lancedb'
+Provides-Extra: md
+Requires-Dist: unstructured[md]; extra == 'md'
+Provides-Extra: milvus
+Requires-Dist: pymilvus; extra == 'milvus'
+Provides-Extra: mixedbreadai
+Requires-Dist: mixedbread-ai; extra == 'mixedbreadai'
+Provides-Extra: mongodb
+Requires-Dist: pymongo; extra == 'mongodb'
+Provides-Extra: msg
+Requires-Dist: unstructured[msg]; extra == 'msg'
+Provides-Extra: neo4j
+Requires-Dist: cymple; extra == 'neo4j'
+Requires-Dist: neo4j-rust-ext; extra == 'neo4j'
+Requires-Dist: networkx; extra == 'neo4j'
+Provides-Extra: notion
+Requires-Dist: backoff; extra == 'notion'
+Requires-Dist: htmlbuilder; extra == 'notion'
+Requires-Dist: httpx; extra == 'notion'
+Requires-Dist: notion-client; extra == 'notion'
+Provides-Extra: octoai
+Requires-Dist: openai; extra == 'octoai'
+Requires-Dist: tiktoken; extra == 'octoai'
+Provides-Extra: odt
+Requires-Dist: unstructured[odt]; extra == 'odt'
+Provides-Extra: onedrive
+Requires-Dist: msal; extra == 'onedrive'
+Requires-Dist: office365-rest-python-client; extra == 'onedrive'
+Requires-Dist: requests; extra == 'onedrive'
+Provides-Extra: openai
+Requires-Dist: openai; extra == 'openai'
+Requires-Dist: tiktoken; extra == 'openai'
+Provides-Extra: opensearch
+Requires-Dist: opensearch-py; extra == 'opensearch'
+Provides-Extra: org
+Requires-Dist: unstructured[org]; extra == 'org'
+Provides-Extra: outlook
+Requires-Dist: msal; extra == 'outlook'
+Requires-Dist: office365-rest-python-client; extra == 'outlook'
+Provides-Extra: pdf
+Requires-Dist: unstructured[pdf]; extra == 'pdf'
+Provides-Extra: pinecone
+Requires-Dist: pinecone; extra == 'pinecone'
+Provides-Extra: postgres
+Requires-Dist: pandas; extra == 'postgres'
+Requires-Dist: psycopg2-binary; extra == 'postgres'
+Provides-Extra: ppt
+Requires-Dist: unstructured[ppt]; extra == 'ppt'
+Provides-Extra: pptx
+Requires-Dist: unstructured[pptx]; extra == 'pptx'
+Provides-Extra: qdrant
+Requires-Dist: qdrant-client; extra == 'qdrant'
+Provides-Extra: reddit
+Requires-Dist: praw; extra == 'reddit'
+Provides-Extra: redis
+Requires-Dist: redis; extra == 'redis'
+Provides-Extra: remote
+Requires-Dist: unstructured-client>=0.30.0; extra == 'remote'
+Provides-Extra: rst
+Requires-Dist: unstructured[rst]; extra == 'rst'
+Provides-Extra: rtf
+Requires-Dist: unstructured[rtf]; extra == 'rtf'
+Provides-Extra: s3
+Requires-Dist: fsspec; extra == 's3'
+Requires-Dist: s3fs; extra == 's3'
+Provides-Extra: salesforce
+Requires-Dist: simple-salesforce; extra == 'salesforce'
+Provides-Extra: sftp
+Requires-Dist: fsspec; extra == 'sftp'
+Requires-Dist: paramiko; extra == 'sftp'
+Provides-Extra: sharepoint
+Requires-Dist: msal; extra == 'sharepoint'
+Requires-Dist: office365-rest-python-client; extra == 'sharepoint'
+Requires-Dist: requests; extra == 'sharepoint'
+Provides-Extra: singlestore
+Requires-Dist: pandas; extra == 'singlestore'
+Requires-Dist: singlestoredb; extra == 'singlestore'
+Provides-Extra: slack
+Requires-Dist: slack-sdk[optional]; extra == 'slack'
+Provides-Extra: snowflake
+Requires-Dist: pandas; extra == 'snowflake'
+Requires-Dist: psycopg2-binary; extra == 'snowflake'
+Requires-Dist: snowflake-connector-python; extra == 'snowflake'
+Provides-Extra: togetherai
+Requires-Dist: together; extra == 'togetherai'
+Provides-Extra: tsv
+Requires-Dist: unstructured[tsv]; extra == 'tsv'
+Provides-Extra: vastdb
+Requires-Dist: ibis; extra == 'vastdb'
+Requires-Dist: pandas; extra == 'vastdb'
+Requires-Dist: pyarrow; extra == 'vastdb'
+Requires-Dist: vastdb; extra == 'vastdb'
+Provides-Extra: vectara
+Requires-Dist: aiofiles; extra == 'vectara'
+Requires-Dist: httpx; extra == 'vectara'
+Requires-Dist: requests; extra == 'vectara'
+Provides-Extra: vertexai
+Requires-Dist: vertexai; extra == 'vertexai'
+Provides-Extra: voyageai
+Requires-Dist: voyageai; extra == 'voyageai'
+Provides-Extra: weaviate
+Requires-Dist: weaviate-client; extra == 'weaviate'
+Provides-Extra: wikipedia
+Requires-Dist: wikipedia; extra == 'wikipedia'
+Provides-Extra: xlsx
+Requires-Dist: unstructured[xlsx]; extra == 'xlsx'
+Provides-Extra: zendesk
+Requires-Dist: aiofiles; extra == 'zendesk'
+Requires-Dist: bs4; extra == 'zendesk'
+Requires-Dist: httpx; extra == 'zendesk'
+Description-Content-Type: text/markdown
+# Unstructured Ingest
+For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.

unstructured-ingest 0.7.2__py3-none-any.whl → 1.0.1__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.7.2py3-none-any.whl → 1.0.1py3-none-any.whl