PyPI - unstructured-ingest - Versions diffs - 0.5.20__py3-none-any.whl → 0.5.21__py3-none-any.whl - Mend

unstructured-ingest 0.5.20py3-none-any.whl → 0.5.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (23) hide show

test/integration/connectors/test_astradb.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import contextlib
 import json
 import os
 from dataclasses import dataclass
@@ -231,6 +232,13 @@ def test_astra_create_destination():
     )
     collection_name = "system_created-123"
     formatted_collection_name = "system_created_123"
+    client = AstraDBClient()
+    db = client.get_database(api_endpoint=env_data.api_endpoint, token=env_data.token)
+    with contextlib.suppress(Exception):
+        # drop collection before trying to create it
+        db.drop_collection(formatted_collection_name)
     created = uploader.create_destination(destination_name=collection_name, vector_length=3072)
     assert created
     assert uploader.upload_config.collection_name == formatted_collection_name
@@ -239,8 +247,6 @@ def test_astra_create_destination():
     assert not created
     # cleanup
-    client = AstraDBClient()
-    db = client.get_database(api_endpoint=env_data.api_endpoint, token=env_data.token)
     db.drop_collection(formatted_collection_name)

unstructured_ingest/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.5.20" # pragma: no cover
1	+ __version__ = "0.5.21" # pragma: no cover

unstructured_ingest/embed/interfaces.py CHANGED Viewed

@@ -2,10 +2,10 @@ from abc import ABC
 from dataclasses import dataclass
 from typing import Any, Optional
-import numpy as np
 from pydantic import BaseModel, Field
 from unstructured_ingest.utils.data_prep import batch_generator
+from unstructured_ingest.utils.dep_check import requires_dependencies
 EMBEDDINGS_KEY = "embeddings"
@@ -32,7 +32,6 @@ class BaseEncoder(ABC):
 @dataclass
 class BaseEmbeddingEncoder(BaseEncoder, ABC):
     def initialize(self):
         """Initializes the embedding encoder class. Should also validate the instance
         is properly configured: e.g., embed a single a element"""
@@ -46,8 +45,11 @@ class BaseEmbeddingEncoder(BaseEncoder, ABC):
         return self.embed_query(query="Q")
     @property
+    @requires_dependencies(["numpy"])
     def is_unit_vector(self) -> bool:
         """Denotes if the embedding vector is a unit vector."""
+        import numpy as np
         exemplary_embedding = self.get_exemplary_embedding()
         return np.isclose(np.linalg.norm(exemplary_embedding), 1.0, rtol=1e-03)
@@ -86,7 +88,6 @@ class BaseEmbeddingEncoder(BaseEncoder, ABC):
 @dataclass
 class AsyncBaseEmbeddingEncoder(BaseEncoder, ABC):
     async def initialize(self):
         """Initializes the embedding encoder class. Should also validate the instance
         is properly configured: e.g., embed a single a element"""
@@ -100,8 +101,11 @@ class AsyncBaseEmbeddingEncoder(BaseEncoder, ABC):
         return await self.embed_query(query="Q")
     @property
+    @requires_dependencies(["numpy"])
     async def is_unit_vector(self) -> bool:
         """Denotes if the embedding vector is a unit vector."""
+        import numpy as np
         exemplary_embedding = await self.get_exemplary_embedding()
         return np.isclose(np.linalg.norm(exemplary_embedding), 1.0, rtol=1e-03)

unstructured_ingest/utils/data_prep.py CHANGED Viewed

@@ -2,20 +2,22 @@ import itertools
 import json
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Generator, Iterable, Optional, Sequence, TypeVar, Union, cast
-import pandas as pd
+from typing import TYPE_CHECKING, Any, Generator, Iterable, Optional, Sequence, TypeVar, Union, cast
 from unstructured_ingest.utils import ndjson
+from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.logger import logger
+if TYPE_CHECKING:
+    from pandas import DataFrame
 DATE_FORMATS = ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d+%H:%M:%S", "%Y-%m-%dT%H:%M:%S%z")
 T = TypeVar("T")
 IterableT = Iterable[T]
-def split_dataframe(df: pd.DataFrame, chunk_size: int = 100) -> Generator[pd.DataFrame, None, None]:
+def split_dataframe(df: "DataFrame", chunk_size: int = 100) -> Generator["DataFrame", None, None]:
     num_chunks = len(df) // chunk_size + 1
     for i in range(num_chunks):
         yield df[i * chunk_size : (i + 1) * chunk_size]
@@ -144,9 +146,13 @@ def get_data_by_suffix(path: Path) -> list[dict]:
         elif path.suffix == ".ndjson":
             return ndjson.load(f)
         elif path.suffix == ".csv":
+            import pandas as pd
             df = pd.read_csv(path)
             return df.to_dict(orient="records")
         elif path.suffix == ".parquet":
+            import pandas as pd
             df = pd.read_parquet(path)
             return df.to_dict(orient="records")
         else:
@@ -180,6 +186,9 @@ def get_data(path: Union[Path, str]) -> list[dict]:
             return ndjson.load(f)
         except Exception as e:
             logger.warning(f"failed to read {path} as ndjson: {e}")
+        import pandas as pd
         try:
             df = pd.read_csv(path)
             return df.to_dict(orient="records")
@@ -202,7 +211,10 @@ def get_json_data(path: Path) -> list[dict]:
             raise ValueError(f"Unsupported file type: {path}")
-def get_data_df(path: Path) -> pd.DataFrame:
+@requires_dependencies(["pandas"])
+def get_data_df(path: Path) -> "DataFrame":
+    import pandas as pd
     with path.open() as f:
         if path.suffix == ".json":
             data = json.load(f)

unstructured_ingest/utils/table.py CHANGED Viewed

@@ -1,11 +1,16 @@
-from typing import Any
-import pandas as pd
+from typing import TYPE_CHECKING, Any
 from unstructured_ingest.utils.data_prep import flatten_dict
+from unstructured_ingest.utils.dep_check import requires_dependencies
+if TYPE_CHECKING:
+    from pandas import DataFrame
+@requires_dependencies(["pandas"])
 def get_default_pandas_dtypes() -> dict[str, Any]:
+    import pandas as pd
     return {
         "text": pd.StringDtype(),  # type: ignore
         "type": pd.StringDtype(),  # type: ignore
@@ -57,7 +62,9 @@ def get_default_pandas_dtypes() -> dict[str, Any]:
 def convert_to_pandas_dataframe(
     elements_dict: list[dict[str, Any]],
     drop_empty_cols: bool = False,
-) -> pd.DataFrame:
+) -> "DataFrame":
+    import pandas as pd
     # Flatten metadata if it hasn't already been flattened
     for d in elements_dict:
         if metadata := d.pop("metadata", None):

unstructured_ingest/v2/processes/connectors/delta_table.py CHANGED Viewed

@@ -3,10 +3,9 @@ import traceback
 from dataclasses import dataclass, field
 from multiprocessing import Process, Queue
 from pathlib import Path
-from typing import Any, Optional
+from typing import TYPE_CHECKING, Any, Optional
 from urllib.parse import urlparse
-import pandas as pd
 from pydantic import Field, Secret
 from unstructured_ingest.error import DestinationConnectionError
@@ -27,6 +26,9 @@ from unstructured_ingest.v2.processes.connector_registry import DestinationRegis
 CONNECTOR_TYPE = "delta_table"
+if TYPE_CHECKING:
+    from pandas import DataFrame
 @requires_dependencies(["deltalake"], extras="delta-table")
 def write_deltalake_with_error_handling(queue, **kwargs):
@@ -136,7 +138,7 @@ class DeltaTableUploader(Uploader):
                 logger.error(f"failed to validate connection: {e}", exc_info=True)
                 raise DestinationConnectionError(f"failed to validate connection: {e}")
-    def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
+    def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
         updated_upload_path = os.path.join(
             self.connection_config.table_uri, file_data.source_identifiers.relative_path
         )
@@ -172,7 +174,10 @@ class DeltaTableUploader(Uploader):
             logger.error(f"Exception occurred in write_deltalake: {error_message}")
             raise RuntimeError(f"Error in write_deltalake: {error_message}")
+    @requires_dependencies(["pandas"], extras="delta-table")
     def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
+        import pandas as pd
         df = pd.DataFrame(data=data)
         self.upload_dataframe(df=df, file_data=file_data)

unstructured_ingest/v2/processes/connectors/duckdb/base.py CHANGED Viewed

@@ -2,9 +2,8 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
-import pandas as pd
 from unstructured_ingest.utils.data_prep import get_data, write_data
+from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.interfaces import FileData, UploadStager
 from unstructured_ingest.v2.utils import get_enhanced_element_id
@@ -55,7 +54,6 @@ _COLUMNS = (
 @dataclass
 class BaseDuckDBUploadStager(UploadStager):
     def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
         data = element_dict.copy()
         metadata: dict[str, Any] = data.pop("metadata", {})
@@ -72,6 +70,7 @@ class BaseDuckDBUploadStager(UploadStager):
         data = {k: v for k, v in data.items() if k in _COLUMNS}
         return data
+    @requires_dependencies(["pandas"], extras="duckdb")
     def run(
         self,
         elements_filepath: Path,
@@ -80,6 +79,8 @@ class BaseDuckDBUploadStager(UploadStager):
         output_filename: str,
         **kwargs: Any,
     ) -> Path:
+        import pandas as pd
         elements_contents = get_data(path=elements_filepath)
         output_filename_suffix = Path(elements_filepath).suffix
         output_filename = f"{Path(output_filename).stem}{output_filename_suffix}"

unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py CHANGED Viewed

@@ -3,7 +3,6 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Generator, Optional
-import pandas as pd
 from pydantic import Field, Secret
 from unstructured_ingest.error import DestinationConnectionError
@@ -23,6 +22,7 @@ from unstructured_ingest.v2.processes.connectors.duckdb.base import BaseDuckDBUp
 if TYPE_CHECKING:
     from duckdb import DuckDBPyConnection as DuckDBConnection
+    from pandas import DataFrame
 CONNECTOR_TYPE = "duckdb"
@@ -101,7 +101,7 @@ class DuckDBUploader(Uploader):
             logger.error(f"failed to validate connection: {e}", exc_info=True)
             raise DestinationConnectionError(f"failed to validate connection: {e}")
-    def upload_dataframe(self, df: pd.DataFrame) -> None:
+    def upload_dataframe(self, df: "DataFrame") -> None:
         logger.debug(f"uploading {len(df)} entries to {self.connection_config.database} ")
         with self.connection_config.get_client() as conn:
@@ -109,7 +109,10 @@ class DuckDBUploader(Uploader):
                 f"INSERT INTO {self.connection_config.db_schema}.{self.connection_config.table} BY NAME SELECT * FROM df"  # noqa: E501
             )
+    @requires_dependencies(["pandas"], extras="duckdb")
     def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
+        import pandas as pd
         df = pd.DataFrame(data=data)
         self.upload_dataframe(df=df)

unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py CHANGED Viewed

@@ -3,7 +3,6 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Generator, Optional
-import pandas as pd
 from pydantic import Field, Secret
 from unstructured_ingest.__version__ import __version__ as unstructured_io_ingest_version
@@ -24,6 +23,7 @@ from unstructured_ingest.v2.processes.connectors.duckdb.base import BaseDuckDBUp
 if TYPE_CHECKING:
     from duckdb import DuckDBPyConnection as MotherDuckConnection
+    from pandas import DataFrame
 CONNECTOR_TYPE = "motherduck"
@@ -100,7 +100,7 @@ class MotherDuckUploader(Uploader):
             logger.error(f"failed to validate connection: {e}", exc_info=True)
             raise DestinationConnectionError(f"failed to validate connection: {e}")
-    def upload_dataframe(self, df: pd.DataFrame) -> None:
+    def upload_dataframe(self, df: "DataFrame") -> None:
         logger.debug(f"uploading {len(df)} entries to {self.connection_config.database} ")
         database = self.connection_config.database
         db_schema = self.connection_config.db_schema
@@ -109,7 +109,10 @@ class MotherDuckUploader(Uploader):
         with self.connection_config.get_client() as conn:
             conn.query(f'INSERT INTO "{database}"."{db_schema}"."{table}" BY NAME SELECT * FROM df')
+    @requires_dependencies(["pandas"], extras="duckdb")
     def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
+        import pandas as pd
         df = pd.DataFrame(data=data)
         self.upload_dataframe(df=df)

unstructured_ingest/v2/processes/connectors/kdbai.py CHANGED Viewed

@@ -3,7 +3,6 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Generator, Optional
-import pandas as pd
 from pydantic import Field, Secret
 from unstructured_ingest.error import DestinationConnectionError
@@ -26,6 +25,7 @@ from unstructured_ingest.v2.utils import get_enhanced_element_id
 if TYPE_CHECKING:
     from kdbai_client import Database, Session, Table
+    from pandas import DataFrame
 CONNECTOR_TYPE = "kdbai"
@@ -118,11 +118,11 @@ class KdbaiUploader(Uploader):
             table = db.table(self.upload_config.table_name)
             yield table
-    def upsert_batch(self, batch: pd.DataFrame):
+    def upsert_batch(self, batch: "DataFrame"):
         with self.get_table() as table:
             table.insert(batch)
-    def process_dataframe(self, df: pd.DataFrame):
+    def process_dataframe(self, df: "DataFrame"):
         logger.debug(
             f"uploading {len(df)} entries to {self.connection_config.endpoint} "
             f"db {self.upload_config.database_name} in table {self.upload_config.table_name}"
@@ -130,7 +130,10 @@ class KdbaiUploader(Uploader):
         for batch_df in split_dataframe(df=df, chunk_size=self.upload_config.batch_size):
             self.upsert_batch(batch=batch_df)
+    @requires_dependencies(["pandas"], extras="kdbai")
     def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
+        import pandas as pd
         df = pd.DataFrame(data=data)
         self.process_dataframe(df=df)

unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py CHANGED Viewed

@@ -8,7 +8,6 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, AsyncGenerator, Optional
-import pandas as pd
 from pydantic import Field
 from unstructured_ingest.error import DestinationConnectionError
@@ -26,6 +25,7 @@ CONNECTOR_TYPE = "lancedb"
 if TYPE_CHECKING:
     from lancedb import AsyncConnection
     from lancedb.table import AsyncTable
+    from pandas import DataFrame
 class LanceDBConnectionConfig(ConnectionConfig, ABC):
@@ -69,6 +69,7 @@ class LanceDBUploadStager(UploadStager):
         default_factory=LanceDBUploadStagerConfig
     )
+    @requires_dependencies(["pandas"], extras="lancedb")
     def run(
         self,
         elements_filepath: Path,
@@ -77,6 +78,8 @@ class LanceDBUploadStager(UploadStager):
         output_filename: str,
         **kwargs: Any,
     ) -> Path:
+        import pandas as pd
         with open(elements_filepath) as elements_file:
             elements_contents: list[dict] = json.load(elements_file)
@@ -129,7 +132,10 @@ class LanceDBUploader(Uploader):
             finally:
                 table.close()
+    @requires_dependencies(["pandas"], extras="lancedb")
     async def run_async(self, path, file_data, **kwargs):
+        import pandas as pd
         df = pd.read_feather(path)
         async with self.get_table() as table:
             schema = await table.schema()
@@ -144,7 +150,9 @@ class LanceDBUploader(Uploader):
                 await table.delete(f'{RECORD_ID_LABEL} = "{file_data.identifier}"')
             await table.add(data=df)
-    def _fit_to_schema(self, df: pd.DataFrame, schema) -> pd.DataFrame:
+    def _fit_to_schema(self, df: "DataFrame", schema) -> "DataFrame":
+        import pandas as pd
         columns = set(df.columns)
         schema_fields = set(schema.names)
         columns_to_drop = columns - schema_fields

unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py CHANGED Viewed

@@ -3,8 +3,6 @@ from contextlib import contextmanager
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, Generator, Optional
-import numpy as np
-import pandas as pd
 from pydantic import Field, Secret
 from unstructured_ingest.utils.data_prep import split_dataframe
@@ -27,6 +25,7 @@ if TYPE_CHECKING:
     from databricks.sdk.core import oauth_service_principal
     from databricks.sql.client import Connection as DeltaTableConnection
     from databricks.sql.client import Cursor as DeltaTableCursor
+    from pandas import DataFrame
 CONNECTOR_TYPE = "databricks_delta_tables"
@@ -180,7 +179,10 @@ class DatabricksDeltaTablesUploader(SQLUploader):
         )
         return statement
-    def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
+    @requires_dependencies(["pandas"], extras="databricks-delta-tables")
+    def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
+        import numpy as np
         if self.can_delete():
             self.delete_by_record_id(file_data=file_data)
         else:

unstructured_ingest/v2/processes/connectors/sql/singlestore.py CHANGED Viewed

@@ -3,9 +3,9 @@ from contextlib import contextmanager
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Generator, Optional
-import pandas as pd
 from pydantic import Field, Secret
+from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.logger import logger
 from unstructured_ingest.v2.processes.connector_registry import (
     DestinationRegistryEntry,
@@ -46,6 +46,7 @@ class SingleStoreConnectionConfig(SQLConnectionConfig):
     database: Optional[str] = Field(default=None, description="SingleStore database")
     @contextmanager
+    @requires_dependencies(["singlestoredb"], extras="singlestore")
     def get_connection(self) -> Generator["SingleStoreConnection", None, None]:
         import singlestoredb as s2
@@ -130,9 +131,12 @@ class SingleStoreUploader(SQLUploader):
     values_delimiter: str = "%s"
     connector_type: str = CONNECTOR_TYPE
+    @requires_dependencies(["pandas"], extras="singlestore")
     def prepare_data(
         self, columns: list[str], data: tuple[tuple[Any, ...], ...]
     ) -> list[tuple[Any, ...]]:
+        import pandas as pd
         output = []
         for row in data:
             parsed = []

unstructured_ingest/v2/processes/connectors/sql/snowflake.py CHANGED Viewed

@@ -3,8 +3,6 @@ from contextlib import contextmanager
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Generator, Optional
-import numpy as np
-import pandas as pd
 from pydantic import Field, Secret
 from unstructured_ingest.utils.data_prep import split_dataframe
@@ -32,6 +30,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
 )
 if TYPE_CHECKING:
+    from pandas import DataFrame
     from snowflake.connector import SnowflakeConnection
     from snowflake.connector.cursor import SnowflakeCursor
@@ -174,9 +173,12 @@ class SnowflakeUploader(SQLUploader):
     connector_type: str = CONNECTOR_TYPE
     values_delimiter: str = "?"
+    @requires_dependencies(["pandas"], extras="snowflake")
     def prepare_data(
         self, columns: list[str], data: tuple[tuple[Any, ...], ...]
     ) -> list[tuple[Any, ...]]:
+        import pandas as pd
         output = []
         for row in data:
             parsed = []
@@ -210,7 +212,9 @@ class SnowflakeUploader(SQLUploader):
             ]
         )
-    def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
+    def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
+        import numpy as np
         if self.can_delete():
             self.delete_by_record_id(file_data=file_data)
         else:

unstructured_ingest/v2/processes/connectors/sql/sql.py CHANGED Viewed

@@ -6,10 +6,8 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from time import time
-from typing import Any, Generator, Union
+from typing import TYPE_CHECKING, Any, Generator, Union
-import numpy as np
-import pandas as pd
 from dateutil import parser
 from pydantic import BaseModel, Field, Secret
@@ -38,6 +36,9 @@ from unstructured_ingest.v2.interfaces import (
 from unstructured_ingest.v2.logger import logger
 from unstructured_ingest.v2.utils import get_enhanced_element_id
+if TYPE_CHECKING:
+    from pandas import DataFrame
 _DATE_COLUMNS = ("date_created", "date_modified", "date_processed", "last_modified")
@@ -154,13 +155,15 @@ class SQLDownloader(Downloader, ABC):
     def query_db(self, file_data: SqlBatchFileData) -> tuple[list[tuple], list[str]]:
         pass
-    def sql_to_df(self, rows: list[tuple], columns: list[str]) -> list[pd.DataFrame]:
+    def sql_to_df(self, rows: list[tuple], columns: list[str]) -> list["DataFrame"]:
+        import pandas as pd
         data = [dict(zip(columns, row)) for row in rows]
         df = pd.DataFrame(data)
         dfs = [pd.DataFrame([row.values], columns=df.columns) for index, row in df.iterrows()]
         return dfs
-    def get_data(self, file_data: SqlBatchFileData) -> list[pd.DataFrame]:
+    def get_data(self, file_data: SqlBatchFileData) -> list["DataFrame"]:
         rows, columns = self.query_db(file_data=file_data)
         return self.sql_to_df(rows=rows, columns=columns)
@@ -174,7 +177,7 @@ class SQLDownloader(Downloader, ABC):
         return f
     def generate_download_response(
-        self, result: pd.DataFrame, file_data: SqlBatchFileData
+        self, result: "DataFrame", file_data: SqlBatchFileData
     ) -> DownloadResponse:
         id_column = file_data.additional_metadata.id_column
         table_name = file_data.additional_metadata.table_name
@@ -231,7 +234,7 @@ class SQLUploadStager(UploadStager):
         data[RECORD_ID_LABEL] = file_data.identifier
         return data
-    def conform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
+    def conform_dataframe(self, df: "DataFrame") -> "DataFrame":
         for column in filter(lambda x: x in df.columns, _DATE_COLUMNS):
             df[column] = df[column].apply(parse_date_string).apply(lambda date: date.timestamp())
         for column in filter(
@@ -259,6 +262,8 @@ class SQLUploadStager(UploadStager):
         output_filename: str,
         **kwargs: Any,
     ) -> Path:
+        import pandas as pd
         elements_contents = get_data(path=elements_filepath)
         df = pd.DataFrame(
@@ -309,6 +314,8 @@ class SQLUploader(Uploader):
     def prepare_data(
         self, columns: list[str], data: tuple[tuple[Any, ...], ...]
     ) -> list[tuple[Any, ...]]:
+        import pandas as pd
         output = []
         for row in data:
             parsed = []
@@ -323,7 +330,9 @@ class SQLUploader(Uploader):
             output.append(tuple(parsed))
         return output
-    def _fit_to_schema(self, df: pd.DataFrame, add_missing_columns: bool = True) -> pd.DataFrame:
+    def _fit_to_schema(self, df: "DataFrame", add_missing_columns: bool = True) -> "DataFrame":
+        import pandas as pd
         table_columns = self.get_table_columns()
         columns = set(df.columns)
         schema_fields = set(table_columns)
@@ -348,7 +357,9 @@ class SQLUploader(Uploader):
                 df[column] = pd.Series()
         return df
-    def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
+    def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
+        import numpy as np
         if self.can_delete():
             self.delete_by_record_id(file_data=file_data)
         else:
@@ -409,6 +420,8 @@ class SQLUploader(Uploader):
                 logger.info(f"deleted {rowcount} rows from table {self.upload_config.table_name}")
     def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
+        import pandas as pd
         df = pd.DataFrame(data)
         self.upload_dataframe(df=df, file_data=file_data)

unstructured_ingest/v2/processes/connectors/sql/sqlite.py CHANGED Viewed

@@ -4,9 +4,9 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Generator
-import pandas as pd
 from pydantic import Field, Secret, model_validator
+from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.logger import logger
 from unstructured_ingest.v2.processes.connector_registry import (
     DestinationRegistryEntry,
@@ -32,6 +32,7 @@ if TYPE_CHECKING:
     from sqlite3 import Connection as SqliteConnection
     from sqlite3 import Cursor as SqliteCursor
 CONNECTOR_TYPE = "sqlite"
@@ -132,9 +133,12 @@ class SQLiteUploader(SQLUploader):
     connection_config: SQLiteConnectionConfig
     connector_type: str = CONNECTOR_TYPE
+    @requires_dependencies(["pandas"])
     def prepare_data(
         self, columns: list[str], data: tuple[tuple[Any, ...], ...]
     ) -> list[tuple[Any, ...]]:
+        import pandas as pd
         output = []
         for row in data:
             parsed = []

unstructured_ingest/v2/processes/connectors/sql/vastdb.py CHANGED Viewed

@@ -2,8 +2,6 @@ from contextlib import contextmanager
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Optional
-import numpy as np
-import pandas as pd
 from pydantic import Field, Secret
 from unstructured_ingest.error import DestinationConnectionError
@@ -34,6 +32,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
 from unstructured_ingest.v2.utils import get_enhanced_element_id
 if TYPE_CHECKING:
+    from pandas import DataFrame
     from vastdb import connect as VastdbConnect
     from vastdb import transaction as VastdbTransaction
     from vastdb.table import Table as VastdbTable
@@ -128,7 +127,6 @@ class VastdbDownloader(SQLDownloader):
         ids = tuple([item.identifier for item in file_data.batch_items])
         with self.connection_config.get_table(table_name) as table:
             predicate = _[id_column].isin(ids)
             if self.download_config.fields:
@@ -168,7 +166,7 @@ class VastdbUploadStager(SQLUploadStager):
         data[RECORD_ID_LABEL] = file_data.identifier
         return data
-    def conform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
+    def conform_dataframe(self, df: "DataFrame") -> "DataFrame":
         df = super().conform_dataframe(df=df)
         if self.upload_stager_config.rename_columns_map:
             df.rename(columns=self.upload_stager_config.rename_columns_map, inplace=True)
@@ -193,8 +191,9 @@ class VastdbUploader(SQLUploader):
             logger.error(f"failed to validate connection: {e}", exc_info=True)
             raise DestinationConnectionError(f"failed to validate connection: {e}")
-    @requires_dependencies(["pyarrow"], extras="vastdb")
-    def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
+    @requires_dependencies(["pyarrow", "pandas"], extras="vastdb")
+    def upload_dataframe(self, df: "DataFrame", file_data: FileData) -> None:
+        import numpy as np
         import pyarrow as pa
         if self.can_delete():
@@ -216,7 +215,6 @@ class VastdbUploader(SQLUploader):
         )
         for rows in split_dataframe(df=df, chunk_size=self.upload_config.batch_size):
             with self.connection_config.get_table(self.upload_config.table_name) as table:
                 pa_table = pa.Table.from_pandas(rows)
                 table.insert(pa_table)

{unstructured_ingest-0.5.20.dist-info → unstructured_ingest-0.5.21.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: unstructured-ingest
-Version: 0.5.20
+Version: 0.5.21
 Summary: A library that prepares raw documents for downstream ML tasks.
 Home-page: https://github.com/Unstructured-IO/unstructured-ingest
 Author: Unstructured Technologies
@@ -22,197 +22,348 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0,<3.14
 Description-Content-Type: text/markdown
 License-File: LICENSE.md
-Requires-Dist: python-dateutil
 Requires-Dist: click
-Requires-Dist: opentelemetry-sdk
+Requires-Dist: dataclasses_json
 Requires-Dist: pydantic>=2.7
-Requires-Dist: pandas
+Requires-Dist: python-dateutil
+Requires-Dist: opentelemetry-sdk
 Requires-Dist: tqdm
-Requires-Dist: dataclasses_json
+Requires-Dist: numpy
+Requires-Dist: pandas
 Provides-Extra: remote
 Requires-Dist: unstructured-client>=0.30.0; extra == "remote"
+Requires-Dist: numpy; extra == "remote"
+Requires-Dist: pandas; extra == "remote"
 Provides-Extra: csv
 Requires-Dist: unstructured[tsv]; extra == "csv"
+Requires-Dist: numpy; extra == "csv"
+Requires-Dist: pandas; extra == "csv"
 Provides-Extra: doc
 Requires-Dist: unstructured[docx]; extra == "doc"
+Requires-Dist: numpy; extra == "doc"
+Requires-Dist: pandas; extra == "doc"
 Provides-Extra: docx
 Requires-Dist: unstructured[docx]; extra == "docx"
+Requires-Dist: numpy; extra == "docx"
+Requires-Dist: pandas; extra == "docx"
 Provides-Extra: epub
 Requires-Dist: unstructured[epub]; extra == "epub"
+Requires-Dist: numpy; extra == "epub"
+Requires-Dist: pandas; extra == "epub"
 Provides-Extra: md
 Requires-Dist: unstructured[md]; extra == "md"
+Requires-Dist: numpy; extra == "md"
+Requires-Dist: pandas; extra == "md"
 Provides-Extra: msg
 Requires-Dist: unstructured[msg]; extra == "msg"
+Requires-Dist: numpy; extra == "msg"
+Requires-Dist: pandas; extra == "msg"
 Provides-Extra: odt
 Requires-Dist: unstructured[odt]; extra == "odt"
+Requires-Dist: numpy; extra == "odt"
+Requires-Dist: pandas; extra == "odt"
 Provides-Extra: org
 Requires-Dist: unstructured[org]; extra == "org"
+Requires-Dist: numpy; extra == "org"
+Requires-Dist: pandas; extra == "org"
 Provides-Extra: pdf
 Requires-Dist: unstructured[pdf]; extra == "pdf"
+Requires-Dist: numpy; extra == "pdf"
+Requires-Dist: pandas; extra == "pdf"
 Provides-Extra: ppt
 Requires-Dist: unstructured[pptx]; extra == "ppt"
+Requires-Dist: numpy; extra == "ppt"
+Requires-Dist: pandas; extra == "ppt"
 Provides-Extra: pptx
 Requires-Dist: unstructured[pptx]; extra == "pptx"
+Requires-Dist: numpy; extra == "pptx"
+Requires-Dist: pandas; extra == "pptx"
 Provides-Extra: rtf
 Requires-Dist: unstructured[rtf]; extra == "rtf"
+Requires-Dist: numpy; extra == "rtf"
+Requires-Dist: pandas; extra == "rtf"
 Provides-Extra: rst
 Requires-Dist: unstructured[rst]; extra == "rst"
+Requires-Dist: numpy; extra == "rst"
+Requires-Dist: pandas; extra == "rst"
 Provides-Extra: tsv
 Requires-Dist: unstructured[tsv]; extra == "tsv"
+Requires-Dist: numpy; extra == "tsv"
+Requires-Dist: pandas; extra == "tsv"
 Provides-Extra: xlsx
 Requires-Dist: unstructured[xlsx]; extra == "xlsx"
+Requires-Dist: numpy; extra == "xlsx"
+Requires-Dist: pandas; extra == "xlsx"
 Provides-Extra: airtable
 Requires-Dist: pyairtable; extra == "airtable"
+Requires-Dist: numpy; extra == "airtable"
+Requires-Dist: pandas; extra == "airtable"
 Provides-Extra: astradb
 Requires-Dist: astrapy; extra == "astradb"
+Requires-Dist: numpy; extra == "astradb"
+Requires-Dist: pandas; extra == "astradb"
 Provides-Extra: azure
-Requires-Dist: adlfs; extra == "azure"
 Requires-Dist: fsspec; extra == "azure"
+Requires-Dist: adlfs; extra == "azure"
+Requires-Dist: numpy; extra == "azure"
+Requires-Dist: pandas; extra == "azure"
 Provides-Extra: azure-ai-search
 Requires-Dist: azure-search-documents; extra == "azure-ai-search"
+Requires-Dist: numpy; extra == "azure-ai-search"
+Requires-Dist: pandas; extra == "azure-ai-search"
 Provides-Extra: biomed
 Requires-Dist: bs4; extra == "biomed"
 Requires-Dist: requests; extra == "biomed"
+Requires-Dist: numpy; extra == "biomed"
+Requires-Dist: pandas; extra == "biomed"
 Provides-Extra: box
-Requires-Dist: boxfs; extra == "box"
 Requires-Dist: fsspec; extra == "box"
+Requires-Dist: boxfs; extra == "box"
+Requires-Dist: numpy; extra == "box"
+Requires-Dist: pandas; extra == "box"
 Provides-Extra: chroma
 Requires-Dist: chromadb; extra == "chroma"
+Requires-Dist: numpy; extra == "chroma"
+Requires-Dist: pandas; extra == "chroma"
 Provides-Extra: clarifai
 Requires-Dist: clarifai; extra == "clarifai"
+Requires-Dist: numpy; extra == "clarifai"
+Requires-Dist: pandas; extra == "clarifai"
 Provides-Extra: confluence
-Requires-Dist: requests; extra == "confluence"
 Requires-Dist: atlassian-python-api; extra == "confluence"
+Requires-Dist: requests; extra == "confluence"
+Requires-Dist: numpy; extra == "confluence"
+Requires-Dist: pandas; extra == "confluence"
 Provides-Extra: couchbase
 Requires-Dist: couchbase; extra == "couchbase"
+Requires-Dist: numpy; extra == "couchbase"
+Requires-Dist: pandas; extra == "couchbase"
 Provides-Extra: delta-table
 Requires-Dist: boto3; extra == "delta-table"
 Requires-Dist: deltalake; extra == "delta-table"
+Requires-Dist: numpy; extra == "delta-table"
+Requires-Dist: pandas; extra == "delta-table"
 Provides-Extra: discord
 Requires-Dist: discord.py; extra == "discord"
+Requires-Dist: numpy; extra == "discord"
+Requires-Dist: pandas; extra == "discord"
 Provides-Extra: dropbox
 Requires-Dist: fsspec; extra == "dropbox"
 Requires-Dist: dropboxdrivefs; extra == "dropbox"
+Requires-Dist: numpy; extra == "dropbox"
+Requires-Dist: pandas; extra == "dropbox"
 Provides-Extra: duckdb
 Requires-Dist: duckdb; extra == "duckdb"
+Requires-Dist: numpy; extra == "duckdb"
+Requires-Dist: pandas; extra == "duckdb"
 Provides-Extra: elasticsearch
 Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
+Requires-Dist: numpy; extra == "elasticsearch"
+Requires-Dist: pandas; extra == "elasticsearch"
 Provides-Extra: gcs
 Requires-Dist: bs4; extra == "gcs"
-Requires-Dist: gcsfs; extra == "gcs"
 Requires-Dist: fsspec; extra == "gcs"
+Requires-Dist: gcsfs; extra == "gcs"
+Requires-Dist: numpy; extra == "gcs"
+Requires-Dist: pandas; extra == "gcs"
 Provides-Extra: github
 Requires-Dist: pygithub>1.58.0; extra == "github"
 Requires-Dist: requests; extra == "github"
+Requires-Dist: numpy; extra == "github"
+Requires-Dist: pandas; extra == "github"
 Provides-Extra: gitlab
 Requires-Dist: python-gitlab; extra == "gitlab"
+Requires-Dist: numpy; extra == "gitlab"
+Requires-Dist: pandas; extra == "gitlab"
 Provides-Extra: google-drive
 Requires-Dist: google-api-python-client; extra == "google-drive"
+Requires-Dist: numpy; extra == "google-drive"
+Requires-Dist: pandas; extra == "google-drive"
 Provides-Extra: hubspot
-Requires-Dist: hubspot-api-client; extra == "hubspot"
 Requires-Dist: urllib3; extra == "hubspot"
+Requires-Dist: hubspot-api-client; extra == "hubspot"
+Requires-Dist: numpy; extra == "hubspot"
+Requires-Dist: pandas; extra == "hubspot"
 Provides-Extra: ibm-watsonx-s3
-Requires-Dist: httpx; extra == "ibm-watsonx-s3"
+Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
 Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
+Requires-Dist: httpx; extra == "ibm-watsonx-s3"
 Requires-Dist: tenacity; extra == "ibm-watsonx-s3"
-Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
+Requires-Dist: numpy; extra == "ibm-watsonx-s3"
+Requires-Dist: pandas; extra == "ibm-watsonx-s3"
 Provides-Extra: jira
 Requires-Dist: atlassian-python-api; extra == "jira"
+Requires-Dist: numpy; extra == "jira"
+Requires-Dist: pandas; extra == "jira"
 Provides-Extra: kafka
 Requires-Dist: confluent-kafka; extra == "kafka"
+Requires-Dist: numpy; extra == "kafka"
+Requires-Dist: pandas; extra == "kafka"
 Provides-Extra: kdbai
 Requires-Dist: kdbai-client>=1.4.0; extra == "kdbai"
+Requires-Dist: numpy; extra == "kdbai"
+Requires-Dist: pandas; extra == "kdbai"
 Provides-Extra: lancedb
 Requires-Dist: lancedb; extra == "lancedb"
+Requires-Dist: numpy; extra == "lancedb"
+Requires-Dist: pandas; extra == "lancedb"
 Provides-Extra: milvus
 Requires-Dist: pymilvus; extra == "milvus"
+Requires-Dist: numpy; extra == "milvus"
+Requires-Dist: pandas; extra == "milvus"
 Provides-Extra: mongodb
 Requires-Dist: pymongo; extra == "mongodb"
+Requires-Dist: numpy; extra == "mongodb"
+Requires-Dist: pandas; extra == "mongodb"
 Provides-Extra: neo4j
 Requires-Dist: networkx; extra == "neo4j"
-Requires-Dist: cymple; extra == "neo4j"
 Requires-Dist: neo4j-rust-ext; extra == "neo4j"
+Requires-Dist: cymple; extra == "neo4j"
+Requires-Dist: numpy; extra == "neo4j"
+Requires-Dist: pandas; extra == "neo4j"
 Provides-Extra: notion
+Requires-Dist: httpx; extra == "notion"
 Requires-Dist: htmlBuilder; extra == "notion"
 Requires-Dist: notion-client; extra == "notion"
-Requires-Dist: httpx; extra == "notion"
 Requires-Dist: backoff; extra == "notion"
+Requires-Dist: numpy; extra == "notion"
+Requires-Dist: pandas; extra == "notion"
 Provides-Extra: onedrive
 Requires-Dist: bs4; extra == "onedrive"
-Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
 Requires-Dist: msal; extra == "onedrive"
+Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
+Requires-Dist: numpy; extra == "onedrive"
+Requires-Dist: pandas; extra == "onedrive"
 Provides-Extra: opensearch
 Requires-Dist: opensearch-py; extra == "opensearch"
+Requires-Dist: numpy; extra == "opensearch"
+Requires-Dist: pandas; extra == "opensearch"
 Provides-Extra: outlook
-Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
 Requires-Dist: msal; extra == "outlook"
+Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
+Requires-Dist: numpy; extra == "outlook"
+Requires-Dist: pandas; extra == "outlook"
 Provides-Extra: pinecone
 Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
+Requires-Dist: numpy; extra == "pinecone"
+Requires-Dist: pandas; extra == "pinecone"
 Provides-Extra: postgres
 Requires-Dist: psycopg2-binary; extra == "postgres"
+Requires-Dist: numpy; extra == "postgres"
+Requires-Dist: pandas; extra == "postgres"
 Provides-Extra: qdrant
 Requires-Dist: qdrant-client; extra == "qdrant"
+Requires-Dist: numpy; extra == "qdrant"
+Requires-Dist: pandas; extra == "qdrant"
 Provides-Extra: reddit
 Requires-Dist: praw; extra == "reddit"
+Requires-Dist: numpy; extra == "reddit"
+Requires-Dist: pandas; extra == "reddit"
 Provides-Extra: redis
 Requires-Dist: redis; extra == "redis"
+Requires-Dist: numpy; extra == "redis"
+Requires-Dist: pandas; extra == "redis"
 Provides-Extra: s3
-Requires-Dist: s3fs; extra == "s3"
 Requires-Dist: fsspec; extra == "s3"
+Requires-Dist: s3fs; extra == "s3"
+Requires-Dist: numpy; extra == "s3"
+Requires-Dist: pandas; extra == "s3"
 Provides-Extra: sharepoint
-Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
 Requires-Dist: msal; extra == "sharepoint"
+Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
+Requires-Dist: numpy; extra == "sharepoint"
+Requires-Dist: pandas; extra == "sharepoint"
 Provides-Extra: salesforce
 Requires-Dist: simple-salesforce; extra == "salesforce"
+Requires-Dist: numpy; extra == "salesforce"
+Requires-Dist: pandas; extra == "salesforce"
 Provides-Extra: sftp
-Requires-Dist: paramiko; extra == "sftp"
 Requires-Dist: fsspec; extra == "sftp"
+Requires-Dist: paramiko; extra == "sftp"
+Requires-Dist: numpy; extra == "sftp"
+Requires-Dist: pandas; extra == "sftp"
 Provides-Extra: slack
 Requires-Dist: slack_sdk[optional]; extra == "slack"
+Requires-Dist: numpy; extra == "slack"
+Requires-Dist: pandas; extra == "slack"
 Provides-Extra: snowflake
-Requires-Dist: snowflake-connector-python; extra == "snowflake"
 Requires-Dist: psycopg2-binary; extra == "snowflake"
+Requires-Dist: snowflake-connector-python; extra == "snowflake"
+Requires-Dist: numpy; extra == "snowflake"
+Requires-Dist: pandas; extra == "snowflake"
 Provides-Extra: wikipedia
 Requires-Dist: wikipedia; extra == "wikipedia"
+Requires-Dist: numpy; extra == "wikipedia"
+Requires-Dist: pandas; extra == "wikipedia"
 Provides-Extra: weaviate
 Requires-Dist: weaviate-client; extra == "weaviate"
+Requires-Dist: numpy; extra == "weaviate"
+Requires-Dist: pandas; extra == "weaviate"
 Provides-Extra: databricks-volumes
 Requires-Dist: databricks-sdk; extra == "databricks-volumes"
+Requires-Dist: numpy; extra == "databricks-volumes"
+Requires-Dist: pandas; extra == "databricks-volumes"
 Provides-Extra: databricks-delta-tables
 Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
+Requires-Dist: numpy; extra == "databricks-delta-tables"
+Requires-Dist: pandas; extra == "databricks-delta-tables"
 Provides-Extra: singlestore
 Requires-Dist: singlestoredb; extra == "singlestore"
+Requires-Dist: numpy; extra == "singlestore"
+Requires-Dist: pandas; extra == "singlestore"
 Provides-Extra: vectara
 Requires-Dist: httpx; extra == "vectara"
 Requires-Dist: aiofiles; extra == "vectara"
 Requires-Dist: requests; extra == "vectara"
+Requires-Dist: numpy; extra == "vectara"
+Requires-Dist: pandas; extra == "vectara"
 Provides-Extra: vastdb
+Requires-Dist: ibis; extra == "vastdb"
 Requires-Dist: pyarrow; extra == "vastdb"
 Requires-Dist: vastdb; extra == "vastdb"
-Requires-Dist: ibis; extra == "vastdb"
+Requires-Dist: numpy; extra == "vastdb"
+Requires-Dist: pandas; extra == "vastdb"
 Provides-Extra: zendesk
 Requires-Dist: bs4; extra == "zendesk"
-Requires-Dist: aiofiles; extra == "zendesk"
 Requires-Dist: httpx; extra == "zendesk"
+Requires-Dist: aiofiles; extra == "zendesk"
+Requires-Dist: numpy; extra == "zendesk"
+Requires-Dist: pandas; extra == "zendesk"
 Provides-Extra: embed-huggingface
 Requires-Dist: sentence-transformers; extra == "embed-huggingface"
+Requires-Dist: numpy; extra == "embed-huggingface"
+Requires-Dist: pandas; extra == "embed-huggingface"
 Provides-Extra: embed-octoai
-Requires-Dist: openai; extra == "embed-octoai"
 Requires-Dist: tiktoken; extra == "embed-octoai"
+Requires-Dist: openai; extra == "embed-octoai"
+Requires-Dist: numpy; extra == "embed-octoai"
+Requires-Dist: pandas; extra == "embed-octoai"
 Provides-Extra: embed-vertexai
 Requires-Dist: vertexai; extra == "embed-vertexai"
+Requires-Dist: numpy; extra == "embed-vertexai"
+Requires-Dist: pandas; extra == "embed-vertexai"
 Provides-Extra: embed-voyageai
 Requires-Dist: voyageai; extra == "embed-voyageai"
+Requires-Dist: numpy; extra == "embed-voyageai"
+Requires-Dist: pandas; extra == "embed-voyageai"
 Provides-Extra: embed-mixedbreadai
 Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
+Requires-Dist: numpy; extra == "embed-mixedbreadai"
+Requires-Dist: pandas; extra == "embed-mixedbreadai"
 Provides-Extra: openai
-Requires-Dist: openai; extra == "openai"
 Requires-Dist: tiktoken; extra == "openai"
+Requires-Dist: openai; extra == "openai"
+Requires-Dist: numpy; extra == "openai"
+Requires-Dist: pandas; extra == "openai"
 Provides-Extra: bedrock
 Requires-Dist: aioboto3; extra == "bedrock"
 Requires-Dist: boto3; extra == "bedrock"
+Requires-Dist: numpy; extra == "bedrock"
+Requires-Dist: pandas; extra == "bedrock"
 Provides-Extra: togetherai
 Requires-Dist: together; extra == "togetherai"
+Requires-Dist: numpy; extra == "togetherai"
+Requires-Dist: pandas; extra == "togetherai"
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier

{unstructured_ingest-0.5.20.dist-info → unstructured_ingest-0.5.21.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ test/integration/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
 test/integration/chunkers/test_chunkers.py,sha256=USkltQN_mVVCxI0FkJsrS1gnLXlVr-fvsc0tPaK2sWI,1062
 test/integration/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/integration/connectors/conftest.py,sha256=vYs4WDlCuieAwwErkJxCk4a1lGvr3qpeiAm-YaDznSo,1018
-test/integration/connectors/test_astradb.py,sha256=c9Lk0dvJVVdzHcokvsc4XMNJ4SIO1k2vGtT5py0cFVM,9753
+test/integration/connectors/test_astradb.py,sha256=hQyxvnbvN1UN-oDOBkXyniAs6GLb0rstQOoLT4LcBNI,9921
 test/integration/connectors/test_azure_ai_search.py,sha256=MxFwk84vI_HT4taQTGrNpJ8ewGPqHSGrx626j8hC_Pw,9695
 test/integration/connectors/test_chroma.py,sha256=1uGHbZXkXKGb8wl3p7c9G-L1MViUe283Hw5u3dg8OgI,4532
 test/integration/connectors/test_confluence.py,sha256=W93znOusdvFXta8q0dqQ1rKhLafRVIqrfaFqk2FY-fo,3590
@@ -113,7 +113,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
 test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
 unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
-unstructured_ingest/__version__.py,sha256=BCszjb86jsmMjfakEG2zLAZFKHpLYTR2k5TCe7RzaBc,43
+unstructured_ingest/__version__.py,sha256=b5BrQJjlBZoPiM_J1cJDbJABGvcwaDFb_Bvwb0AHN10,43
 unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
 unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
 unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -284,7 +284,7 @@ unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
 unstructured_ingest/embed/azure_openai.py,sha256=_-I-nwd-wdCiKkSdYBL4UKrTZ2UPWsM_0T69fcObs_I,1707
 unstructured_ingest/embed/bedrock.py,sha256=tZumLLXafSr1zIFVjckapRoiiY-7u65GPuWmwsdhY0I,7726
 unstructured_ingest/embed/huggingface.py,sha256=-ZD17O_H_UnK80fqig6y6wNKJckjx0HuAkY5vgPvk8M,2259
-unstructured_ingest/embed/interfaces.py,sha256=_-CqasY6R5nnNUY-X6PS5lz8dsmGaUw5zIGRdPfx16o,4918
+unstructured_ingest/embed/interfaces.py,sha256=SdB3t8eMPB8CbXzOYBpgwjzTvyb4T19L61Sr6Jy3_rw,5099
 unstructured_ingest/embed/mixedbreadai.py,sha256=-Y0J27G9CL1t3ZTIeNjTjRviErSMAzJRf2zgDgMHUmg,4499
 unstructured_ingest/embed/octoai.py,sha256=hNLEskDEP-2qWExUgVz2Eyw3KTIFwdUE9elbJ5qp4Ao,3855
 unstructured_ingest/embed/openai.py,sha256=EindGUouvP8wolOBNbWQhAkaI6WGyPN4Hh2xyKuR6L8,3372
@@ -372,13 +372,13 @@ unstructured_ingest/runner/writers/fsspec/s3.py,sha256=kHJq2O3864QBd_tL2SKb0mdyw
 unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 unstructured_ingest/utils/chunking.py,sha256=9b3sXMA6L8RW5xAkKQbwdtVudGLAcj_sgT6Grh5tyYM,1870
 unstructured_ingest/utils/compression.py,sha256=NNiY-2S2Gf3at7zC1PYxMijaEza9vVSzRn5mdFf6mHo,4434
-unstructured_ingest/utils/data_prep.py,sha256=MfID_7SPZHeZztlNTSXIzilaWvv1mdfCcLlhqpGLYNg,7557
+unstructured_ingest/utils/data_prep.py,sha256=-hhGbWm1Sev57t9z20JJLW0vS6kdhArCbb_xmIlKGaY,7826
 unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
 unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
 unstructured_ingest/utils/html.py,sha256=DGRDMqGbwH8RiF94Qh6NiqVkbbjZfe1h26dIehC-X7M,6340
 unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01qAbElH0,1201
 unstructured_ingest/utils/string_and_date_utils.py,sha256=54tzuqmhPN0uWnPLrzAWAsDGU9s6mQE_KSVywMDwTBk,2522
-unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
+unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
 unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
 unstructured_ingest/v2/constants.py,sha256=pDspTYz-nEojHBqrZNfssGEiujmVa02pIWL63PQP9sU,103
 unstructured_ingest/v2/errors.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
@@ -435,12 +435,12 @@ unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6
 unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
 unstructured_ingest/v2/processes/connectors/confluence.py,sha256=gSs4-AxL0gfeWdJfP7JfCrQSQNLoJRkvHquKK9RJvpQ,12043
 unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=i7vuNKsUkN93JRVmg4--MO0ZgbjvhIqt46oYqk9zFSQ,12250
-unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=bfEGiepJLOS9TxK-bMkjTTjHLHUc0q7qUzIYdwkLDMs,7104
+unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=Jx2EUqchJDqfPsyw4Ks-HaLSq2rIwXc1l1YFqjh_BbM,7240
 unstructured_ingest/v2/processes/connectors/discord.py,sha256=-e4-cBK4TnHkknK1qIb86AIVMy81lBgC288_iLpTzM8,5246
 unstructured_ingest/v2/processes/connectors/gitlab.py,sha256=ufE65Z8q_tC4oppGg5BsGXwSaL7RbEXcaagJQYsylNo,9984
 unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=QzcHNelUbnubsDtanFIgDCRzmYTuP-GjJ_g9y8fButE,19623
 unstructured_ingest/v2/processes/connectors/jira.py,sha256=-f_vIWNw6Xr8rMNdAcfCC2cmhB-QndnZk5XymHo60FU,17094
-unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=VRDAiou_7oWOIAgQTdOGQWxudzQEDopXM8XkfkQ2j6g,5004
+unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=1dXfNb3qaV669-_BjCQdznmfuWLPGjmdkv2ybmkAHjQ,5099
 unstructured_ingest/v2/processes/connectors/local.py,sha256=FWPRjjUsnQjyZMChuZGuMU04AB5X0sFEOcAXhx1r9sk,7381
 unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
 unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
@@ -464,9 +464,9 @@ unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=Uss
 unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=g1qYnIrML4TjN7rmC0MGrD5JzAprb6SymBHlEdOumz0,3113
 unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=FZhjrMYBr_je6mWYp7MUUvyKR9YwGD2HiNljeT7U5ws,5044
 unstructured_ingest/v2/processes/connectors/duckdb/__init__.py,sha256=5sVvJCWhU-YkjHIwk4W6BZCanFYK5W4xTpWtQ8xzeB4,561
-unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=IHaY1mWuidt6GDEJhB1c_orwmjeyXuRCVJ88djYDciM,2793
-unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=oUHHaLpO2pWW2Lu4Mc-XFjrA0ze97205WQ_xP95ua4M,4296
-unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=OsRy-rcrP4_KSustpxlEKoZ_FmJNFMyMmIfFk6WJ3UY,4559
+unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=o3J81DnSwt3lmAh19jXVPAYRZLJ3VyGhaEVO2SIjksQ,2926
+unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=NIo2CCiPiuTFotNC891Mbelzg01knItryYGUtOM96xg,4393
+unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=RW-Cw94Hs3ZsN8Kb4ciSh_N-Qkp0cqkw_xkJbt8CDNU,4656
 unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py,sha256=Zzc0JNPP-eFqpwWw1Gp-XC8H-s__IgkYKzoagECycZY,829
 unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=MEKU64OsiQmbLPb3ken-WWCIV6-pnFbs_6kjJweG-SY,18813
 unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=qRz8Fyr2RSZIPZGkhPeme6AZxM0aX-c_xOa1ZtSr2Kg,6781
@@ -490,7 +490,7 @@ unstructured_ingest/v2/processes/connectors/lancedb/aws.py,sha256=eeXWsh8UeVm1Ur
 unstructured_ingest/v2/processes/connectors/lancedb/azure.py,sha256=Ms5vQVRIpTF1Q2qBl_bET9wbgaf4diPaH-iR8kJlr4E,1461
 unstructured_ingest/v2/processes/connectors/lancedb/cloud.py,sha256=BFy0gW2OZ_qaZJM97m-tNsFaJPi9zOKrrd2y4thcNP0,1341
 unstructured_ingest/v2/processes/connectors/lancedb/gcp.py,sha256=p5BPaFtS3y3Yh8PIr3tUqsAXrUYu4QYYAWQNh5W2ucE,1361
-unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py,sha256=oQbRZfocnRWqc9VIHgloYbEsfV0Ei_s1_-TKmRnTdYg,5714
+unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py,sha256=Y4waCOrtlz5Eyf3Me6rInzt_Ory0woseLe_hfSD1nDM,5926
 unstructured_ingest/v2/processes/connectors/lancedb/local.py,sha256=_7-6iO6B60gAWwJUUrmlsRzYMFIBeZgu_QT3mhw5L0I,1272
 unstructured_ingest/v2/processes/connectors/notion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 unstructured_ingest/v2/processes/connectors/notion/client.py,sha256=8_K6x1Z4bkvSer1NicQeqpX8Y275OUS65kfqTWRU09g,13120
@@ -564,13 +564,13 @@ unstructured_ingest/v2/processes/connectors/qdrant/local.py,sha256=cGEyv3Oy6y4BQ
 unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py,sha256=BHI7HYSdbS05j2vrjyDvLzVG1WfsM8osKeq-lttlybQ,5437
 unstructured_ingest/v2/processes/connectors/qdrant/server.py,sha256=odvCZWZp8DmRxLXMR7tHhW-c7UQbix1_zpFdfXfCvKI,1613
 unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=NSEZwJDHh_9kFc31LnG14iRtYF3meK2UfUlQfYnwYEQ,2059
-unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py,sha256=xbZ90rmehiCnBoqFXMz-3ZMXeYb0PzWB6iobCNSHTmQ,8955
+unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py,sha256=Ys-pRLiYtdvNRdDnWYwhMqteLQPekRFHrqsrr9jQVpo,9049
 unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=BATfX1PQGT2kl8jAbdNKXTojYKJxh3pJV9-h3OBnHGo,5124
-unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=OPBDQ2c_5KjWHEFfqXxf3pQ2tWC-N4MtslMulMgP1Wc,5503
-unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=0hfiX_u7V38k_RfoeDmXJp8WIHZ19ilIHnrgZVSleKw,9270
-unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=wtVK6CHrQ4McwsPifUoa7KKaY-v0cjDZJetASSAaSIA,15415
-unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=PRjN_S7UQv0k4ZpSyclW1AJrsrugyxbR-GoOrHvBpks,5200
-unstructured_ingest/v2/processes/connectors/sql/vastdb.py,sha256=0rxrb1ByXIefB9umzMTEJbpvzdTttXHK5DjRY97-GG8,9618
+unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=am2d87kDkpTTB0VbPSX3ce9o6oM9KUQu5y9T_p1kgJw,5711
+unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=r2qgoEF3bUugzgSr3hMJyIm8DKmxsO53ZHXJSNxOsvE,9379
+unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=G28VUR0zaMVmQtbdZG6TRpkWFHvXJqFrr7SBuyM-fME,15608
+unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=6RoBUxMbeuhduvTFlBKMgEH1NKJg7doQjXF_R5cUuX0,5319
+unstructured_ingest/v2/processes/connectors/sql/vastdb.py,sha256=wklJ8p3eMb81FTjS6ukPoILuWN0_KQBfuYGXfE0XrqY,9644
 unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWan69KnzVELvaqX34tMhCytIa-C8EDsXVKsEo,856
 unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
 unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
@@ -581,9 +581,9 @@ unstructured_ingest/v2/processes/connectors/zendesk/client.py,sha256=DDAYQB7catK
 unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py,sha256=R8SXYkRhVUoWEHdGCt2CzcTxxuFundw_0GlGZ34YmbM,8987
 unstructured_ingest/v2/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 unstructured_ingest/v2/processes/utils/blob_storage.py,sha256=EWvK4HRYubr9i1UyMhv5cU9u0UzVkCDC_BIm4Uxab7Y,964
-unstructured_ingest-0.5.20.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
-unstructured_ingest-0.5.20.dist-info/METADATA,sha256=S2Yr62sVeW0csT-QRyonnokiHFvvH0FAwQ2x02BqAeM,8697
-unstructured_ingest-0.5.20.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-unstructured_ingest-0.5.20.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
-unstructured_ingest-0.5.20.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
-unstructured_ingest-0.5.20.dist-info/RECORD,,
+unstructured_ingest-0.5.21.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
+unstructured_ingest-0.5.21.dist-info/METADATA,sha256=c1bUHvgG6X9QOiAD669sVHAFkGfI2tBTRBM-eRJBLiU,14999
+unstructured_ingest-0.5.21.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+unstructured_ingest-0.5.21.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
+unstructured_ingest-0.5.21.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
+unstructured_ingest-0.5.21.dist-info/RECORD,,

{unstructured_ingest-0.5.20.dist-info → unstructured_ingest-0.5.21.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{unstructured_ingest-0.5.20.dist-info → unstructured_ingest-0.5.21.dist-info}/WHEEL RENAMED Viewed

File without changes

{unstructured_ingest-0.5.20.dist-info → unstructured_ingest-0.5.21.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{unstructured_ingest-0.5.20.dist-info → unstructured_ingest-0.5.21.dist-info}/top_level.txt RENAMED Viewed

File without changes

unstructured-ingest 0.5.20__py3-none-any.whl → 0.5.21__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.5.20py3-none-any.whl → 0.5.21py3-none-any.whl